hackmyagent 0.16.5 → 0.16.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.integrity-manifest.json +1 -1
- package/dist/arp/crypto/hybrid-signing.d.ts +107 -0
- package/dist/arp/crypto/hybrid-signing.d.ts.map +1 -0
- package/dist/arp/crypto/hybrid-signing.js +321 -0
- package/dist/arp/crypto/hybrid-signing.js.map +1 -0
- package/dist/arp/crypto/index.d.ts +13 -0
- package/dist/arp/crypto/index.d.ts.map +1 -0
- package/dist/arp/crypto/index.js +33 -0
- package/dist/arp/crypto/index.js.map +1 -0
- package/dist/arp/crypto/manifest-loader.d.ts +117 -0
- package/dist/arp/crypto/manifest-loader.d.ts.map +1 -0
- package/dist/arp/crypto/manifest-loader.js +361 -0
- package/dist/arp/crypto/manifest-loader.js.map +1 -0
- package/dist/arp/crypto/types.d.ts +69 -0
- package/dist/arp/crypto/types.d.ts.map +1 -0
- package/dist/arp/crypto/types.js +11 -0
- package/dist/arp/crypto/types.js.map +1 -0
- package/dist/arp/index.d.ts +27 -0
- package/dist/arp/index.d.ts.map +1 -1
- package/dist/arp/index.js +94 -1
- package/dist/arp/index.js.map +1 -1
- package/dist/arp/intelligence/behavioral-risk-server.d.ts +82 -0
- package/dist/arp/intelligence/behavioral-risk-server.d.ts.map +1 -0
- package/dist/arp/intelligence/behavioral-risk-server.js +258 -0
- package/dist/arp/intelligence/behavioral-risk-server.js.map +1 -0
- package/dist/arp/intelligence/behavioral-risk.d.ts +217 -0
- package/dist/arp/intelligence/behavioral-risk.d.ts.map +1 -0
- package/dist/arp/intelligence/behavioral-risk.js +429 -0
- package/dist/arp/intelligence/behavioral-risk.js.map +1 -0
- package/dist/arp/intelligence/coordinator.d.ts +93 -2
- package/dist/arp/intelligence/coordinator.d.ts.map +1 -1
- package/dist/arp/intelligence/coordinator.js +281 -1
- package/dist/arp/intelligence/coordinator.js.map +1 -1
- package/dist/arp/intelligence/guard-anomaly.d.ts +349 -0
- package/dist/arp/intelligence/guard-anomaly.d.ts.map +1 -0
- package/dist/arp/intelligence/guard-anomaly.js +399 -0
- package/dist/arp/intelligence/guard-anomaly.js.map +1 -0
- package/dist/arp/intelligence/nanomind-l1.d.ts +37 -0
- package/dist/arp/intelligence/nanomind-l1.d.ts.map +1 -1
- package/dist/arp/intelligence/nanomind-l1.js +78 -0
- package/dist/arp/intelligence/nanomind-l1.js.map +1 -1
- package/dist/arp/intelligence/verify-classification.d.ts +124 -0
- package/dist/arp/intelligence/verify-classification.d.ts.map +1 -0
- package/dist/arp/intelligence/verify-classification.js +329 -0
- package/dist/arp/intelligence/verify-classification.js.map +1 -0
- package/dist/arp/proxy/server.d.ts +38 -8
- package/dist/arp/proxy/server.d.ts.map +1 -1
- package/dist/arp/proxy/server.js +89 -0
- package/dist/arp/proxy/server.js.map +1 -1
- package/dist/arp/types.d.ts +228 -1
- package/dist/arp/types.d.ts.map +1 -1
- package/dist/cli.js +85 -18
- package/dist/cli.js.map +1 -1
- package/dist/nanomind-core/compiler/semantic-compiler.d.ts.map +1 -1
- package/dist/nanomind-core/compiler/semantic-compiler.js +170 -10
- package/dist/nanomind-core/compiler/semantic-compiler.js.map +1 -1
- package/dist/nanomind-core/compiler/source-code-preprocessor.d.ts +64 -0
- package/dist/nanomind-core/compiler/source-code-preprocessor.d.ts.map +1 -0
- package/dist/nanomind-core/compiler/source-code-preprocessor.js +656 -0
- package/dist/nanomind-core/compiler/source-code-preprocessor.js.map +1 -0
- package/dist/nanomind-core/ingestion/artifact-parser.d.ts.map +1 -1
- package/dist/nanomind-core/ingestion/artifact-parser.js +15 -6
- package/dist/nanomind-core/ingestion/artifact-parser.js.map +1 -1
- package/package.json +3 -1
|
@@ -0,0 +1,656 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Source Code Preprocessor
|
|
4
|
+
*
|
|
5
|
+
* The config-oriented detectors in the semantic compiler
|
|
6
|
+
* (`extractDataAccessPatterns`, `mapRiskSurfaces`) were designed for
|
|
7
|
+
* skills, agent configs, and system prompts, where the entire content is
|
|
8
|
+
* semantically meaningful. Running them against source code produces
|
|
9
|
+
* reflexive false positives: any file whose *purpose* is to scan for
|
|
10
|
+
* attack patterns (a credential regex, an `eval(` pattern, a typosquatting
|
|
11
|
+
* detector) contains the exact strings those detectors look for.
|
|
12
|
+
*
|
|
13
|
+
* This preprocessor produces a "stripped view" of source code with
|
|
14
|
+
* comments, import statements, and string literals replaced by whitespace.
|
|
15
|
+
* The stripped view preserves byte offsets so any index-based analysis
|
|
16
|
+
* downstream keeps working. The original content is left untouched for
|
|
17
|
+
* callers that need it (evidence extraction, capability parsing, etc.).
|
|
18
|
+
*
|
|
19
|
+
* What survives the strip:
|
|
20
|
+
* - Identifiers (variable, function, and type names)
|
|
21
|
+
* - Control flow and operators
|
|
22
|
+
* - Struct tags and field names (outside of string literals)
|
|
23
|
+
*
|
|
24
|
+
* What is removed:
|
|
25
|
+
* - `//`, `#`, `--` line comments
|
|
26
|
+
* - `/* ... *\/`, `"""..."""`, `'''...'''` block comments
|
|
27
|
+
* - `"..."`, `'...'`, `` `...` ``, `r"..."`, etc. string literals
|
|
28
|
+
* - `import (...)` blocks and `import "..."` / `from X import Y` lines
|
|
29
|
+
*
|
|
30
|
+
* The net effect: code that references credential/eval/RCE patterns
|
|
31
|
+
* defensively (scanners, allowlists, documentation strings) no longer
|
|
32
|
+
* trips reflexive regex matches, while code that actually *does* those
|
|
33
|
+
* things (e.g. `exec(userInput)` where `exec` is a symbol call, not a
|
|
34
|
+
* string) still surfaces.
|
|
35
|
+
*
|
|
36
|
+
* Security note: stripping string literals trades off a narrow detection
|
|
37
|
+
* case — a hardcoded secret embedded in a string literal whose *exact*
|
|
38
|
+
* bytes match an AWS/Anthropic/GitHub key format — for eliminating the
|
|
39
|
+
* dominant false positive mode on source code. Canonical API key formats
|
|
40
|
+
* are still detected upstream of this path: `classifyArtifactType` routes
|
|
41
|
+
* files matching `sk-ant-`, `AKIA...`, `ghp_...`, or PEM headers into the
|
|
42
|
+
* `credential_file` artifact type before source code preprocessing runs,
|
|
43
|
+
* so those cases are preserved.
|
|
44
|
+
*/
|
|
45
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
46
|
+
exports.detectSourceLanguage = detectSourceLanguage;
|
|
47
|
+
exports.buildAnalysisView = buildAnalysisView;
|
|
48
|
+
exports.stripSourceCode = stripSourceCode;
|
|
49
|
+
/** Detect source language from a file path. Returns 'unknown' for unsupported extensions. */
|
|
50
|
+
function detectSourceLanguage(path) {
|
|
51
|
+
if (!path)
|
|
52
|
+
return 'unknown';
|
|
53
|
+
const lower = path.toLowerCase();
|
|
54
|
+
if (lower.endsWith('.go'))
|
|
55
|
+
return 'go';
|
|
56
|
+
if (lower.endsWith('.ts') || lower.endsWith('.tsx'))
|
|
57
|
+
return 'typescript';
|
|
58
|
+
if (lower.endsWith('.js') ||
|
|
59
|
+
lower.endsWith('.jsx') ||
|
|
60
|
+
lower.endsWith('.mjs') ||
|
|
61
|
+
lower.endsWith('.cjs')) {
|
|
62
|
+
return 'javascript';
|
|
63
|
+
}
|
|
64
|
+
if (lower.endsWith('.py') || lower.endsWith('.pyi'))
|
|
65
|
+
return 'python';
|
|
66
|
+
if (lower.endsWith('.rs'))
|
|
67
|
+
return 'rust';
|
|
68
|
+
if (lower.endsWith('.java'))
|
|
69
|
+
return 'java';
|
|
70
|
+
if (lower.endsWith('.rb'))
|
|
71
|
+
return 'ruby';
|
|
72
|
+
return 'unknown';
|
|
73
|
+
}
|
|
74
|
+
// ============================================================================
|
|
75
|
+
// Public API
|
|
76
|
+
// ============================================================================
|
|
77
|
+
/**
|
|
78
|
+
* Build an analysis view of content for use with config-oriented detectors.
|
|
79
|
+
*
|
|
80
|
+
* For source_code artifacts, strips comments, imports, and string literals.
|
|
81
|
+
* For all other artifact types, returns the original content unchanged.
|
|
82
|
+
*
|
|
83
|
+
* The stripped view preserves byte offsets: removed bytes are replaced with
|
|
84
|
+
* spaces (or newlines inside multi-line regions), so any downstream code
|
|
85
|
+
* that uses absolute indices remains valid.
|
|
86
|
+
*/
|
|
87
|
+
function buildAnalysisView(content, artifactType, path) {
|
|
88
|
+
if (artifactType !== 'source_code') {
|
|
89
|
+
return content;
|
|
90
|
+
}
|
|
91
|
+
const language = detectSourceLanguage(path);
|
|
92
|
+
if (language === 'unknown') {
|
|
93
|
+
return content;
|
|
94
|
+
}
|
|
95
|
+
return stripSourceCode(content, language);
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Strip comments, imports, and string literals from source code.
|
|
99
|
+
* Exposed for tests. Callers should usually use `buildAnalysisView`.
|
|
100
|
+
*/
|
|
101
|
+
function stripSourceCode(content, language) {
|
|
102
|
+
switch (language) {
|
|
103
|
+
case 'go':
|
|
104
|
+
return stripGo(content);
|
|
105
|
+
case 'typescript':
|
|
106
|
+
case 'javascript':
|
|
107
|
+
return stripJsLike(content);
|
|
108
|
+
case 'python':
|
|
109
|
+
return stripPython(content);
|
|
110
|
+
case 'rust':
|
|
111
|
+
return stripRust(content);
|
|
112
|
+
case 'java':
|
|
113
|
+
return stripJava(content);
|
|
114
|
+
case 'ruby':
|
|
115
|
+
return stripRuby(content);
|
|
116
|
+
default:
|
|
117
|
+
return content;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
// ============================================================================
|
|
121
|
+
// Language-specific strippers
|
|
122
|
+
// ============================================================================
|
|
123
|
+
//
|
|
124
|
+
// Each stripper walks the content once and produces a same-length buffer
|
|
125
|
+
// where comments, imports, and string literals are replaced with spaces
|
|
126
|
+
// (newlines preserved so line numbers are stable).
|
|
127
|
+
//
|
|
128
|
+
// These are hand-rolled tokenizers. They are not language-complete parsers,
|
|
129
|
+
// but they are strict enough for our purpose: stripping signal that the
|
|
130
|
+
// config-oriented regex detectors should not see. A cosmetic miscount (e.g.
|
|
131
|
+
// an unterminated string) may leave a trailing region un-stripped, which is
|
|
132
|
+
// acceptable (worst case, we leave an FP in place; we never silence a real
|
|
133
|
+
// finding outside source code files).
|
|
134
|
+
function blank(n, newlines) {
|
|
135
|
+
if (newlines === 0)
|
|
136
|
+
return ' '.repeat(n);
|
|
137
|
+
return ' '.repeat(n - newlines) + '\n'.repeat(newlines);
|
|
138
|
+
}
|
|
139
|
+
function countNewlines(s) {
|
|
140
|
+
let count = 0;
|
|
141
|
+
for (let i = 0; i < s.length; i++)
|
|
142
|
+
if (s.charCodeAt(i) === 10)
|
|
143
|
+
count++;
|
|
144
|
+
return count;
|
|
145
|
+
}
|
|
146
|
+
// ----- Go -----
|
|
147
|
+
function stripGo(src) {
|
|
148
|
+
const out = [];
|
|
149
|
+
let i = 0;
|
|
150
|
+
const n = src.length;
|
|
151
|
+
// Track start-of-line for import detection
|
|
152
|
+
while (i < n) {
|
|
153
|
+
const ch = src[i];
|
|
154
|
+
const next = src[i + 1];
|
|
155
|
+
// Line comment
|
|
156
|
+
if (ch === '/' && next === '/') {
|
|
157
|
+
const end = src.indexOf('\n', i);
|
|
158
|
+
const stop = end === -1 ? n : end;
|
|
159
|
+
out.push(blank(stop - i, 0));
|
|
160
|
+
i = stop;
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
// Block comment
|
|
164
|
+
if (ch === '/' && next === '*') {
|
|
165
|
+
const end = src.indexOf('*/', i + 2);
|
|
166
|
+
const stop = end === -1 ? n : end + 2;
|
|
167
|
+
out.push(blank(stop - i, countNewlines(src.slice(i, stop))));
|
|
168
|
+
i = stop;
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
// Interpreted string literal
|
|
172
|
+
if (ch === '"') {
|
|
173
|
+
const start = i;
|
|
174
|
+
i++;
|
|
175
|
+
while (i < n) {
|
|
176
|
+
const c = src[i];
|
|
177
|
+
if (c === '\\' && i + 1 < n) {
|
|
178
|
+
i += 2;
|
|
179
|
+
continue;
|
|
180
|
+
}
|
|
181
|
+
if (c === '"' || c === '\n') {
|
|
182
|
+
i++;
|
|
183
|
+
break;
|
|
184
|
+
}
|
|
185
|
+
i++;
|
|
186
|
+
}
|
|
187
|
+
out.push(blank(i - start, countNewlines(src.slice(start, i))));
|
|
188
|
+
continue;
|
|
189
|
+
}
|
|
190
|
+
// Raw string literal
|
|
191
|
+
if (ch === '`') {
|
|
192
|
+
const start = i;
|
|
193
|
+
i++;
|
|
194
|
+
const end = src.indexOf('`', i);
|
|
195
|
+
i = end === -1 ? n : end + 1;
|
|
196
|
+
out.push(blank(i - start, countNewlines(src.slice(start, i))));
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
// Rune literal (skip to avoid confusing quoting)
|
|
200
|
+
if (ch === "'") {
|
|
201
|
+
const start = i;
|
|
202
|
+
i++;
|
|
203
|
+
while (i < n) {
|
|
204
|
+
const c = src[i];
|
|
205
|
+
if (c === '\\' && i + 1 < n) {
|
|
206
|
+
i += 2;
|
|
207
|
+
continue;
|
|
208
|
+
}
|
|
209
|
+
if (c === "'" || c === '\n') {
|
|
210
|
+
i++;
|
|
211
|
+
break;
|
|
212
|
+
}
|
|
213
|
+
i++;
|
|
214
|
+
}
|
|
215
|
+
out.push(blank(i - start, countNewlines(src.slice(start, i))));
|
|
216
|
+
continue;
|
|
217
|
+
}
|
|
218
|
+
// Import block / import statement at start of a line
|
|
219
|
+
if (ch === 'i' && isImportAtLineStart(src, i, 'go')) {
|
|
220
|
+
const consumed = consumeGoImport(src, i);
|
|
221
|
+
out.push(blank(consumed - i, countNewlines(src.slice(i, consumed))));
|
|
222
|
+
i = consumed;
|
|
223
|
+
continue;
|
|
224
|
+
}
|
|
225
|
+
out.push(ch);
|
|
226
|
+
i++;
|
|
227
|
+
}
|
|
228
|
+
return out.join('');
|
|
229
|
+
}
|
|
230
|
+
function isImportAtLineStart(src, i, lang) {
|
|
231
|
+
// Check that `import` keyword starts this line (allowing leading whitespace)
|
|
232
|
+
let j = i - 1;
|
|
233
|
+
while (j >= 0 && (src[j] === ' ' || src[j] === '\t'))
|
|
234
|
+
j--;
|
|
235
|
+
if (j >= 0 && src[j] !== '\n')
|
|
236
|
+
return false;
|
|
237
|
+
// Check keyword
|
|
238
|
+
const keyword = lang === 'java' ? 'import ' : 'import';
|
|
239
|
+
if (src.slice(i, i + keyword.length) !== keyword)
|
|
240
|
+
return false;
|
|
241
|
+
// After `import`, must be whitespace or `(` (Go) or `{` / identifier (rust)
|
|
242
|
+
const after = src[i + keyword.length];
|
|
243
|
+
return after === ' ' || after === '\t' || after === '(' || after === '\n';
|
|
244
|
+
}
|
|
245
|
+
function consumeGoImport(src, i) {
|
|
246
|
+
// `import` keyword, optional space, then either `(...)` block or `"..."` single
|
|
247
|
+
let j = i + 'import'.length;
|
|
248
|
+
while (j < src.length && (src[j] === ' ' || src[j] === '\t'))
|
|
249
|
+
j++;
|
|
250
|
+
if (src[j] === '(') {
|
|
251
|
+
const end = src.indexOf(')', j);
|
|
252
|
+
return end === -1 ? src.length : end + 1;
|
|
253
|
+
}
|
|
254
|
+
// Single-line: consume to end of line
|
|
255
|
+
const end = src.indexOf('\n', j);
|
|
256
|
+
return end === -1 ? src.length : end;
|
|
257
|
+
}
|
|
258
|
+
// ----- JavaScript / TypeScript -----
|
|
259
|
+
function stripJsLike(src) {
|
|
260
|
+
const out = [];
|
|
261
|
+
let i = 0;
|
|
262
|
+
const n = src.length;
|
|
263
|
+
while (i < n) {
|
|
264
|
+
const ch = src[i];
|
|
265
|
+
const next = src[i + 1];
|
|
266
|
+
// Line comment
|
|
267
|
+
if (ch === '/' && next === '/') {
|
|
268
|
+
const end = src.indexOf('\n', i);
|
|
269
|
+
const stop = end === -1 ? n : end;
|
|
270
|
+
out.push(blank(stop - i, 0));
|
|
271
|
+
i = stop;
|
|
272
|
+
continue;
|
|
273
|
+
}
|
|
274
|
+
// Block comment
|
|
275
|
+
if (ch === '/' && next === '*') {
|
|
276
|
+
const end = src.indexOf('*/', i + 2);
|
|
277
|
+
const stop = end === -1 ? n : end + 2;
|
|
278
|
+
out.push(blank(stop - i, countNewlines(src.slice(i, stop))));
|
|
279
|
+
i = stop;
|
|
280
|
+
continue;
|
|
281
|
+
}
|
|
282
|
+
// String literals
|
|
283
|
+
if (ch === '"' || ch === "'") {
|
|
284
|
+
const quote = ch;
|
|
285
|
+
const start = i;
|
|
286
|
+
i++;
|
|
287
|
+
while (i < n) {
|
|
288
|
+
const c = src[i];
|
|
289
|
+
if (c === '\\' && i + 1 < n) {
|
|
290
|
+
i += 2;
|
|
291
|
+
continue;
|
|
292
|
+
}
|
|
293
|
+
if (c === quote || c === '\n') {
|
|
294
|
+
if (c === quote)
|
|
295
|
+
i++;
|
|
296
|
+
break;
|
|
297
|
+
}
|
|
298
|
+
i++;
|
|
299
|
+
}
|
|
300
|
+
out.push(blank(i - start, countNewlines(src.slice(start, i))));
|
|
301
|
+
continue;
|
|
302
|
+
}
|
|
303
|
+
// Template literal
|
|
304
|
+
if (ch === '`') {
|
|
305
|
+
const start = i;
|
|
306
|
+
i++;
|
|
307
|
+
while (i < n) {
|
|
308
|
+
const c = src[i];
|
|
309
|
+
if (c === '\\' && i + 1 < n) {
|
|
310
|
+
i += 2;
|
|
311
|
+
continue;
|
|
312
|
+
}
|
|
313
|
+
// Nested ${...} interpolation — leave contents visible; rewind
|
|
314
|
+
if (c === '$' && src[i + 1] === '{') {
|
|
315
|
+
// Find matching brace
|
|
316
|
+
let depth = 1;
|
|
317
|
+
let k = i + 2;
|
|
318
|
+
while (k < n && depth > 0) {
|
|
319
|
+
if (src[k] === '{')
|
|
320
|
+
depth++;
|
|
321
|
+
else if (src[k] === '}')
|
|
322
|
+
depth--;
|
|
323
|
+
if (depth > 0)
|
|
324
|
+
k++;
|
|
325
|
+
}
|
|
326
|
+
// Blank only the literal portion up to `${`
|
|
327
|
+
const segmentEnd = i;
|
|
328
|
+
out.push(blank(segmentEnd - start, countNewlines(src.slice(start, segmentEnd))));
|
|
329
|
+
// Preserve interpolation content verbatim for analysis
|
|
330
|
+
out.push(src.slice(i, k + 1));
|
|
331
|
+
i = k + 1;
|
|
332
|
+
// Continue template from after `}`
|
|
333
|
+
while (i < n) {
|
|
334
|
+
const c2 = src[i];
|
|
335
|
+
if (c2 === '\\' && i + 1 < n) {
|
|
336
|
+
i += 2;
|
|
337
|
+
continue;
|
|
338
|
+
}
|
|
339
|
+
if (c2 === '`') {
|
|
340
|
+
i++;
|
|
341
|
+
return out.concat(stripJsLike(src.slice(i))).join('');
|
|
342
|
+
}
|
|
343
|
+
if (c2 === '$' && src[i + 1] === '{')
|
|
344
|
+
break;
|
|
345
|
+
i++;
|
|
346
|
+
}
|
|
347
|
+
// If we fall out naturally (no more interpolation, no closing backtick)
|
|
348
|
+
continue;
|
|
349
|
+
}
|
|
350
|
+
if (c === '`') {
|
|
351
|
+
i++;
|
|
352
|
+
break;
|
|
353
|
+
}
|
|
354
|
+
i++;
|
|
355
|
+
}
|
|
356
|
+
out.push(blank(i - start, countNewlines(src.slice(start, i))));
|
|
357
|
+
continue;
|
|
358
|
+
}
|
|
359
|
+
// Import statement at start of line
|
|
360
|
+
if (ch === 'i' && isImportAtJsLineStart(src, i)) {
|
|
361
|
+
const end = consumeJsImport(src, i);
|
|
362
|
+
out.push(blank(end - i, countNewlines(src.slice(i, end))));
|
|
363
|
+
i = end;
|
|
364
|
+
continue;
|
|
365
|
+
}
|
|
366
|
+
out.push(ch);
|
|
367
|
+
i++;
|
|
368
|
+
}
|
|
369
|
+
return out.join('');
|
|
370
|
+
}
|
|
371
|
+
function isImportAtJsLineStart(src, i) {
|
|
372
|
+
let j = i - 1;
|
|
373
|
+
while (j >= 0 && (src[j] === ' ' || src[j] === '\t'))
|
|
374
|
+
j--;
|
|
375
|
+
if (j >= 0 && src[j] !== '\n')
|
|
376
|
+
return false;
|
|
377
|
+
if (src.slice(i, i + 7) === 'import ' || src.slice(i, i + 7) === 'import{')
|
|
378
|
+
return true;
|
|
379
|
+
if (src.slice(i, i + 7) === 'import*')
|
|
380
|
+
return true;
|
|
381
|
+
return false;
|
|
382
|
+
}
|
|
383
|
+
function consumeJsImport(src, i) {
|
|
384
|
+
// Consume until the terminating semicolon or newline where the statement ends
|
|
385
|
+
// (accounting for multi-line import blocks with braces)
|
|
386
|
+
let j = i;
|
|
387
|
+
let brace = 0;
|
|
388
|
+
while (j < src.length) {
|
|
389
|
+
const c = src[j];
|
|
390
|
+
if (c === '{')
|
|
391
|
+
brace++;
|
|
392
|
+
else if (c === '}')
|
|
393
|
+
brace--;
|
|
394
|
+
else if (c === ';' && brace === 0)
|
|
395
|
+
return j + 1;
|
|
396
|
+
else if (c === '\n' && brace === 0) {
|
|
397
|
+
// If next non-space char is not a continuation, end here
|
|
398
|
+
let k = j + 1;
|
|
399
|
+
while (k < src.length && (src[k] === ' ' || src[k] === '\t'))
|
|
400
|
+
k++;
|
|
401
|
+
if (k === src.length || src[k] !== 'f' /* from */) {
|
|
402
|
+
// Heuristic: end of import
|
|
403
|
+
return j;
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
j++;
|
|
407
|
+
}
|
|
408
|
+
return j;
|
|
409
|
+
}
|
|
410
|
+
// ----- Python -----
|
|
411
|
+
function stripPython(src) {
|
|
412
|
+
const out = [];
|
|
413
|
+
let i = 0;
|
|
414
|
+
const n = src.length;
|
|
415
|
+
while (i < n) {
|
|
416
|
+
const ch = src[i];
|
|
417
|
+
// Comment
|
|
418
|
+
if (ch === '#') {
|
|
419
|
+
const end = src.indexOf('\n', i);
|
|
420
|
+
const stop = end === -1 ? n : end;
|
|
421
|
+
out.push(blank(stop - i, 0));
|
|
422
|
+
i = stop;
|
|
423
|
+
continue;
|
|
424
|
+
}
|
|
425
|
+
// Triple-quoted string
|
|
426
|
+
if ((ch === '"' || ch === "'") &&
|
|
427
|
+
src[i + 1] === ch &&
|
|
428
|
+
src[i + 2] === ch) {
|
|
429
|
+
const triple = ch + ch + ch;
|
|
430
|
+
const start = i;
|
|
431
|
+
i += 3;
|
|
432
|
+
const end = src.indexOf(triple, i);
|
|
433
|
+
i = end === -1 ? n : end + 3;
|
|
434
|
+
out.push(blank(i - start, countNewlines(src.slice(start, i))));
|
|
435
|
+
continue;
|
|
436
|
+
}
|
|
437
|
+
// Regular string (allow prefix like r, b, f)
|
|
438
|
+
if (ch === '"' || ch === "'") {
|
|
439
|
+
const quote = ch;
|
|
440
|
+
const start = i;
|
|
441
|
+
i++;
|
|
442
|
+
while (i < n) {
|
|
443
|
+
const c = src[i];
|
|
444
|
+
if (c === '\\' && i + 1 < n) {
|
|
445
|
+
i += 2;
|
|
446
|
+
continue;
|
|
447
|
+
}
|
|
448
|
+
if (c === quote || c === '\n') {
|
|
449
|
+
if (c === quote)
|
|
450
|
+
i++;
|
|
451
|
+
break;
|
|
452
|
+
}
|
|
453
|
+
i++;
|
|
454
|
+
}
|
|
455
|
+
out.push(blank(i - start, countNewlines(src.slice(start, i))));
|
|
456
|
+
continue;
|
|
457
|
+
}
|
|
458
|
+
// Import statement at start of line
|
|
459
|
+
if ((ch === 'i' || ch === 'f') && isImportAtPyLineStart(src, i)) {
|
|
460
|
+
const end = src.indexOf('\n', i);
|
|
461
|
+
const stop = end === -1 ? n : end;
|
|
462
|
+
out.push(blank(stop - i, 0));
|
|
463
|
+
i = stop;
|
|
464
|
+
continue;
|
|
465
|
+
}
|
|
466
|
+
out.push(ch);
|
|
467
|
+
i++;
|
|
468
|
+
}
|
|
469
|
+
return out.join('');
|
|
470
|
+
}
|
|
471
|
+
function isImportAtPyLineStart(src, i) {
|
|
472
|
+
let j = i - 1;
|
|
473
|
+
while (j >= 0 && (src[j] === ' ' || src[j] === '\t'))
|
|
474
|
+
j--;
|
|
475
|
+
if (j >= 0 && src[j] !== '\n')
|
|
476
|
+
return false;
|
|
477
|
+
if (src.slice(i, i + 7) === 'import ')
|
|
478
|
+
return true;
|
|
479
|
+
if (src.slice(i, i + 5) === 'from ')
|
|
480
|
+
return true;
|
|
481
|
+
return false;
|
|
482
|
+
}
|
|
483
|
+
// ----- Rust -----
|
|
484
|
+
function stripRust(src) {
|
|
485
|
+
const out = [];
|
|
486
|
+
let i = 0;
|
|
487
|
+
const n = src.length;
|
|
488
|
+
while (i < n) {
|
|
489
|
+
const ch = src[i];
|
|
490
|
+
const next = src[i + 1];
|
|
491
|
+
if (ch === '/' && next === '/') {
|
|
492
|
+
const end = src.indexOf('\n', i);
|
|
493
|
+
const stop = end === -1 ? n : end;
|
|
494
|
+
out.push(blank(stop - i, 0));
|
|
495
|
+
i = stop;
|
|
496
|
+
continue;
|
|
497
|
+
}
|
|
498
|
+
if (ch === '/' && next === '*') {
|
|
499
|
+
const end = src.indexOf('*/', i + 2);
|
|
500
|
+
const stop = end === -1 ? n : end + 2;
|
|
501
|
+
out.push(blank(stop - i, countNewlines(src.slice(i, stop))));
|
|
502
|
+
i = stop;
|
|
503
|
+
continue;
|
|
504
|
+
}
|
|
505
|
+
if (ch === '"') {
|
|
506
|
+
const start = i;
|
|
507
|
+
i++;
|
|
508
|
+
while (i < n) {
|
|
509
|
+
const c = src[i];
|
|
510
|
+
if (c === '\\' && i + 1 < n) {
|
|
511
|
+
i += 2;
|
|
512
|
+
continue;
|
|
513
|
+
}
|
|
514
|
+
if (c === '"') {
|
|
515
|
+
i++;
|
|
516
|
+
break;
|
|
517
|
+
}
|
|
518
|
+
i++;
|
|
519
|
+
}
|
|
520
|
+
out.push(blank(i - start, countNewlines(src.slice(start, i))));
|
|
521
|
+
continue;
|
|
522
|
+
}
|
|
523
|
+
// `use crate::foo;` and `extern crate foo;`
|
|
524
|
+
if ((ch === 'u' || ch === 'e') && isRustUseAtLineStart(src, i)) {
|
|
525
|
+
const end = src.indexOf(';', i);
|
|
526
|
+
const stop = end === -1 ? n : end + 1;
|
|
527
|
+
out.push(blank(stop - i, countNewlines(src.slice(i, stop))));
|
|
528
|
+
i = stop;
|
|
529
|
+
continue;
|
|
530
|
+
}
|
|
531
|
+
out.push(ch);
|
|
532
|
+
i++;
|
|
533
|
+
}
|
|
534
|
+
return out.join('');
|
|
535
|
+
}
|
|
536
|
+
function isRustUseAtLineStart(src, i) {
|
|
537
|
+
let j = i - 1;
|
|
538
|
+
while (j >= 0 && (src[j] === ' ' || src[j] === '\t'))
|
|
539
|
+
j--;
|
|
540
|
+
if (j >= 0 && src[j] !== '\n')
|
|
541
|
+
return false;
|
|
542
|
+
return src.slice(i, i + 4) === 'use ' || src.slice(i, i + 13) === 'extern crate ';
|
|
543
|
+
}
|
|
544
|
+
// ----- Java -----
|
|
545
|
+
function stripJava(src) {
|
|
546
|
+
// Java syntax overlaps heavily with the JS-like stripper for comments and strings;
|
|
547
|
+
// imports start with `import `. Reuse JS-like with a small tweak.
|
|
548
|
+
const out = [];
|
|
549
|
+
let i = 0;
|
|
550
|
+
const n = src.length;
|
|
551
|
+
while (i < n) {
|
|
552
|
+
const ch = src[i];
|
|
553
|
+
const next = src[i + 1];
|
|
554
|
+
if (ch === '/' && next === '/') {
|
|
555
|
+
const end = src.indexOf('\n', i);
|
|
556
|
+
const stop = end === -1 ? n : end;
|
|
557
|
+
out.push(blank(stop - i, 0));
|
|
558
|
+
i = stop;
|
|
559
|
+
continue;
|
|
560
|
+
}
|
|
561
|
+
if (ch === '/' && next === '*') {
|
|
562
|
+
const end = src.indexOf('*/', i + 2);
|
|
563
|
+
const stop = end === -1 ? n : end + 2;
|
|
564
|
+
out.push(blank(stop - i, countNewlines(src.slice(i, stop))));
|
|
565
|
+
i = stop;
|
|
566
|
+
continue;
|
|
567
|
+
}
|
|
568
|
+
if (ch === '"') {
|
|
569
|
+
const start = i;
|
|
570
|
+
i++;
|
|
571
|
+
while (i < n) {
|
|
572
|
+
const c = src[i];
|
|
573
|
+
if (c === '\\' && i + 1 < n) {
|
|
574
|
+
i += 2;
|
|
575
|
+
continue;
|
|
576
|
+
}
|
|
577
|
+
if (c === '"' || c === '\n') {
|
|
578
|
+
if (c === '"')
|
|
579
|
+
i++;
|
|
580
|
+
break;
|
|
581
|
+
}
|
|
582
|
+
i++;
|
|
583
|
+
}
|
|
584
|
+
out.push(blank(i - start, countNewlines(src.slice(start, i))));
|
|
585
|
+
continue;
|
|
586
|
+
}
|
|
587
|
+
if (ch === 'i' && isImportAtLineStart(src, i, 'java')) {
|
|
588
|
+
const end = src.indexOf(';', i);
|
|
589
|
+
const stop = end === -1 ? n : end + 1;
|
|
590
|
+
out.push(blank(stop - i, countNewlines(src.slice(i, stop))));
|
|
591
|
+
i = stop;
|
|
592
|
+
continue;
|
|
593
|
+
}
|
|
594
|
+
out.push(ch);
|
|
595
|
+
i++;
|
|
596
|
+
}
|
|
597
|
+
return out.join('');
|
|
598
|
+
}
|
|
599
|
+
// ----- Ruby -----
|
|
600
|
+
function stripRuby(src) {
|
|
601
|
+
const out = [];
|
|
602
|
+
let i = 0;
|
|
603
|
+
const n = src.length;
|
|
604
|
+
while (i < n) {
|
|
605
|
+
const ch = src[i];
|
|
606
|
+
if (ch === '#') {
|
|
607
|
+
const end = src.indexOf('\n', i);
|
|
608
|
+
const stop = end === -1 ? n : end;
|
|
609
|
+
out.push(blank(stop - i, 0));
|
|
610
|
+
i = stop;
|
|
611
|
+
continue;
|
|
612
|
+
}
|
|
613
|
+
if (ch === '"' || ch === "'") {
|
|
614
|
+
const quote = ch;
|
|
615
|
+
const start = i;
|
|
616
|
+
i++;
|
|
617
|
+
while (i < n) {
|
|
618
|
+
const c = src[i];
|
|
619
|
+
if (c === '\\' && i + 1 < n) {
|
|
620
|
+
i += 2;
|
|
621
|
+
continue;
|
|
622
|
+
}
|
|
623
|
+
if (c === quote || c === '\n') {
|
|
624
|
+
if (c === quote)
|
|
625
|
+
i++;
|
|
626
|
+
break;
|
|
627
|
+
}
|
|
628
|
+
i++;
|
|
629
|
+
}
|
|
630
|
+
out.push(blank(i - start, countNewlines(src.slice(start, i))));
|
|
631
|
+
continue;
|
|
632
|
+
}
|
|
633
|
+
// `require`, `require_relative`, `load` at start of line
|
|
634
|
+
if (ch === 'r' && isRubyRequireAtLineStart(src, i)) {
|
|
635
|
+
const end = src.indexOf('\n', i);
|
|
636
|
+
const stop = end === -1 ? n : end;
|
|
637
|
+
out.push(blank(stop - i, 0));
|
|
638
|
+
i = stop;
|
|
639
|
+
continue;
|
|
640
|
+
}
|
|
641
|
+
out.push(ch);
|
|
642
|
+
i++;
|
|
643
|
+
}
|
|
644
|
+
return out.join('');
|
|
645
|
+
}
|
|
646
|
+
function isRubyRequireAtLineStart(src, i) {
|
|
647
|
+
let j = i - 1;
|
|
648
|
+
while (j >= 0 && (src[j] === ' ' || src[j] === '\t'))
|
|
649
|
+
j--;
|
|
650
|
+
if (j >= 0 && src[j] !== '\n')
|
|
651
|
+
return false;
|
|
652
|
+
return (src.slice(i, i + 8) === 'require ' ||
|
|
653
|
+
src.slice(i, i + 17) === 'require_relative ' ||
|
|
654
|
+
src.slice(i, i + 5) === 'load ');
|
|
655
|
+
}
|
|
656
|
+
//# sourceMappingURL=source-code-preprocessor.js.map
|