@massu/core 1.5.8 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,561 @@
1
+ // ../adapter-go-chi/dist/index.js
2
+ import { Parser as Parser2 } from "web-tree-sitter";
3
+
4
+ // src/detect/adapters/query-helpers.ts
5
+ import { Query } from "web-tree-sitter";
6
+ var InvalidQueryError = class extends Error {
7
+ queryName;
8
+ querySource;
9
+ cause;
10
+ constructor(queryName, querySource, cause) {
11
+ const causeMsg = cause instanceof Error ? cause.message : String(cause);
12
+ super(
13
+ `[query-helpers] Invalid Tree-sitter query "${queryName}": ${causeMsg}
14
+ Query source:
15
+ ${querySource}`
16
+ );
17
+ this.name = "InvalidQueryError";
18
+ this.queryName = queryName;
19
+ this.querySource = querySource;
20
+ this.cause = cause;
21
+ }
22
+ };
23
+ var queryCache = /* @__PURE__ */ new WeakMap();
24
+ function compileQuery(language, source, queryName) {
25
+ let perLang = queryCache.get(language);
26
+ if (!perLang) {
27
+ perLang = /* @__PURE__ */ new Map();
28
+ queryCache.set(language, perLang);
29
+ }
30
+ const cached = perLang.get(source);
31
+ if (cached) return cached;
32
+ let q;
33
+ try {
34
+ q = new Query(language, source);
35
+ } catch (e) {
36
+ throw new InvalidQueryError(queryName, source, e);
37
+ }
38
+ perLang.set(source, q);
39
+ return q;
40
+ }
41
+ function runQuery(parser, source, queryText, queryName, filePath) {
42
+ const language = parser.language;
43
+ if (!language) {
44
+ throw new InvalidQueryError(
45
+ queryName,
46
+ queryText,
47
+ new Error("Parser has no language assigned")
48
+ );
49
+ }
50
+ const query = compileQuery(language, queryText, queryName);
51
+ const tree = parser.parse(source);
52
+ if (!tree) return [];
53
+ let matches;
54
+ try {
55
+ matches = query.matches(tree.rootNode);
56
+ } catch (e) {
57
+ throw new InvalidQueryError(queryName, queryText, e);
58
+ }
59
+ const out = [];
60
+ for (const match of matches) {
61
+ if (!match.captures || match.captures.length === 0) continue;
62
+ const captures = {};
63
+ let earliestLine = Number.POSITIVE_INFINITY;
64
+ for (const cap of match.captures) {
65
+ const node = cap.node;
66
+ captures[cap.name] = node.text;
67
+ if (node.startPosition.row + 1 < earliestLine) {
68
+ earliestLine = node.startPosition.row + 1;
69
+ }
70
+ }
71
+ out.push({
72
+ captures,
73
+ file: filePath,
74
+ line: Number.isFinite(earliestLine) ? earliestLine : 1,
75
+ queryName
76
+ });
77
+ }
78
+ try {
79
+ tree.delete();
80
+ } catch {
81
+ }
82
+ return out;
83
+ }
84
+
85
+ // src/detect/adapters/tree-sitter-loader.ts
86
+ import { createHash } from "crypto";
87
+ import {
88
+ mkdirSync,
89
+ readdirSync,
90
+ readFileSync,
91
+ writeFileSync,
92
+ renameSync,
93
+ unlinkSync,
94
+ lstatSync,
95
+ chmodSync,
96
+ utimesSync
97
+ } from "fs";
98
+ import { homedir } from "os";
99
+ import { dirname, join } from "path";
100
+ import { Language, Parser } from "web-tree-sitter";
101
+ var GrammarSHAMismatchError = class extends Error {
102
+ language;
103
+ expected;
104
+ actual;
105
+ constructor(language, expected, actual) {
106
+ super(
107
+ `[tree-sitter-loader] SHA-256 mismatch for grammar "${language}". Expected ${expected}, got ${actual}. REFUSING to load \u2014 see Phase 3.5 audit attack vector #3.`
108
+ );
109
+ this.name = "GrammarSHAMismatchError";
110
+ this.language = language;
111
+ this.expected = expected;
112
+ this.actual = actual;
113
+ }
114
+ };
115
+ var GrammarUnavailableError = class extends Error {
116
+ language;
117
+ cause;
118
+ constructor(language, cause) {
119
+ const causeMsg = cause instanceof Error ? cause.message : cause ? String(cause) : "no cached grammar and download failed";
120
+ super(
121
+ `[tree-sitter-loader] Grammar for "${language}" is unavailable: ${causeMsg}. Falling back to regex introspection for files in ${language}.`
122
+ );
123
+ this.name = "GrammarUnavailableError";
124
+ this.language = language;
125
+ this.cause = cause;
126
+ }
127
+ };
128
+ var GrammarCacheSymlinkError = class extends Error {
129
+ cachePath;
130
+ constructor(cachePath) {
131
+ super(
132
+ `[tree-sitter-loader] Refusing to load grammar \u2014 cache path "${cachePath}" is a symlink or non-regular file. (Phase 3.5 finding #3 \u2014 symlink attack vector.)`
133
+ );
134
+ this.name = "GrammarCacheSymlinkError";
135
+ this.cachePath = cachePath;
136
+ }
137
+ };
138
+ var GrammarUrlNotHttpsError = class extends Error {
139
+ url;
140
+ constructor(url) {
141
+ super(
142
+ `[tree-sitter-loader] Refusing to download grammar from non-HTTPS URL: ${url}. Only https:// URLs are accepted. (Phase 3.5 finding #3.)`
143
+ );
144
+ this.name = "GrammarUrlNotHttpsError";
145
+ this.url = url;
146
+ }
147
+ };
148
+ var GRAMMAR_MANIFEST = {
149
+ python: {
150
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-python.wasm",
151
+ sha256: "9056d0fb0c337810d019fae350e8167786119da98f0f282aceae7ab89ee8253b",
152
+ version: "0.1.13"
153
+ },
154
+ typescript: {
155
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-typescript.wasm",
156
+ sha256: "8515404dceed38e1ed86aa34b09fcf3379fff1b4ff9dd3967bcd6d1eb5ac3d8f",
157
+ version: "0.1.13"
158
+ },
159
+ javascript: {
160
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-javascript.wasm",
161
+ sha256: "63812b9e275d26851264734868d27a1656bd44a2ef6eb3e85e6b03728c595ab5",
162
+ version: "0.1.13"
163
+ },
164
+ swift: {
165
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-swift.wasm",
166
+ sha256: "41c4fdb2249a3aa6d87eed0d383081ff09725c2248b4977043a43825980ffcc7",
167
+ version: "0.1.13"
168
+ },
169
+ // ----------------------------------------------------------------
170
+ // Plan 3c Phase 7 expansion (2026-05-07):
171
+ //
172
+ // Six additional grammars to support the registry-verified framework
173
+ // adapters (go-chi, rails, aspnet, spring, ktor, phoenix) plus the
174
+ // bundled adapters in the same language families (gin/echo/fiber,
175
+ // sinatra, etc.). All entries use the SAME pinned tree-sitter-wasms
176
+ // version (0.1.13) as the v1 four to keep the dependency surface
177
+ // single-source.
178
+ //
179
+ // SHA-256s computed 2026-05-07 via:
180
+ // curl -fsSL <url> | shasum -a 256
181
+ //
182
+ // The unpkg filename for C# uses an underscore (`c_sharp`) while the
183
+ // TreeSitterLanguage identifier uses no separator (`csharp`); the map
184
+ // key is the type identifier, the URL is the storage path — they do
185
+ // NOT need to match, the same as how `python` maps to `tree-sitter-
186
+ // python.wasm`. This is intentional and validated by the manifest
187
+ // shape test in tree-sitter-loader-manifest.test.ts.
188
+ // ----------------------------------------------------------------
189
+ go: {
190
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-go.wasm",
191
+ sha256: "9963ca89b616eaf04b08a43bc1fb0f07b85395bec313330851f1f1ead2f755b6",
192
+ version: "0.1.13"
193
+ },
194
+ ruby: {
195
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-ruby.wasm",
196
+ sha256: "93a5022855314cdb45458c7bb026a24a0ebc3a5ff6439e542e881f14dfa13a39",
197
+ version: "0.1.13"
198
+ },
199
+ csharp: {
200
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-c_sharp.wasm",
201
+ sha256: "6266a7e32d68a3459104d994dc848df15d5672b0ea8e86d327274b694f8e6991",
202
+ version: "0.1.13"
203
+ },
204
+ java: {
205
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-java.wasm",
206
+ sha256: "637aac4415fb39a211a4f4292d63c66b5ce9c32fa2cd35464af4f681d91b9a1f",
207
+ version: "0.1.13"
208
+ },
209
+ kotlin: {
210
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-kotlin.wasm",
211
+ sha256: "b5cb00c8d06ed0f10f1dbe497205b437809d7e87db1f638721a8cfb30e044449",
212
+ version: "0.1.13"
213
+ },
214
+ elixir: {
215
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-elixir.wasm",
216
+ sha256: "82e91b9759ddca30d8978ebbfa8e347b4451b64c931f9ae62112e6db9b8fac20",
217
+ version: "0.1.13"
218
+ }
219
+ };
220
+ function getCacheDir() {
221
+ return process.env.MASSU_WASM_CACHE_DIR ?? join(homedir(), ".massu", "wasm-cache");
222
+ }
223
+ function getCachedPath(language, sha) {
224
+ return join(getCacheDir(), `${language}-${sha}.wasm`);
225
+ }
226
+ var DEFAULT_CACHE_RETAIN_COUNT = 16;
227
+ function getCacheRetainCount() {
228
+ const env = process.env.MASSU_WASM_CACHE_RETAIN;
229
+ if (env) {
230
+ const n = Number(env);
231
+ if (Number.isFinite(n) && n >= 1 && n <= 1024) return Math.floor(n);
232
+ }
233
+ return DEFAULT_CACHE_RETAIN_COUNT;
234
+ }
235
+ function touchCacheFile(path) {
236
+ try {
237
+ const now = /* @__PURE__ */ new Date();
238
+ utimesSync(path, now, now);
239
+ } catch {
240
+ }
241
+ }
242
+ function evictBeyondRetainCount(retain = getCacheRetainCount()) {
243
+ const dir = getCacheDir();
244
+ let entries;
245
+ try {
246
+ entries = readdirSync(dir);
247
+ } catch {
248
+ return;
249
+ }
250
+ const candidates = [];
251
+ for (const name of entries) {
252
+ if (!name.endsWith(".wasm")) continue;
253
+ const path = join(dir, name);
254
+ let stat;
255
+ try {
256
+ stat = lstatSync(path);
257
+ } catch {
258
+ continue;
259
+ }
260
+ if (stat.isSymbolicLink() || !stat.isFile()) {
261
+ console.error(
262
+ `[tree-sitter-loader] cache eviction skipped non-regular file: ${path} (possible symlink attack \u2014 see Phase 3.5 finding F-008).`
263
+ );
264
+ continue;
265
+ }
266
+ candidates.push({ path, mtimeMs: stat.mtimeMs });
267
+ }
268
+ if (candidates.length <= retain) return;
269
+ candidates.sort((a, b) => b.mtimeMs - a.mtimeMs);
270
+ for (const victim of candidates.slice(retain)) {
271
+ try {
272
+ unlinkSync(victim.path);
273
+ } catch {
274
+ }
275
+ }
276
+ }
277
+ function sha256(bytes) {
278
+ return createHash("sha256").update(bytes).digest("hex");
279
+ }
280
+ var parserInitPromise = null;
281
+ async function ensureParserInitialized() {
282
+ if (parserInitPromise) return parserInitPromise;
283
+ parserInitPromise = Parser.init();
284
+ return parserInitPromise;
285
+ }
286
+ var loadedGrammars = /* @__PURE__ */ new Map();
287
+ async function loadGrammar(language, options = {}) {
288
+ await ensureParserInitialized();
289
+ const cached = loadedGrammars.get(language);
290
+ if (cached) return cached;
291
+ const manifest = options.manifestOverride?.[language] ?? GRAMMAR_MANIFEST[language];
292
+ if (!manifest) {
293
+ throw new GrammarUnavailableError(
294
+ language,
295
+ new Error(`No manifest entry for language "${language}". v1 supports: ${Object.keys(GRAMMAR_MANIFEST).join(", ")}.`)
296
+ );
297
+ }
298
+ const cachePath = getCachedPath(language, manifest.sha256);
299
+ let cacheLstat;
300
+ try {
301
+ cacheLstat = lstatSync(cachePath);
302
+ } catch {
303
+ cacheLstat = null;
304
+ }
305
+ if (cacheLstat) {
306
+ if (cacheLstat.isSymbolicLink() || !cacheLstat.isFile()) {
307
+ throw new GrammarCacheSymlinkError(cachePath);
308
+ }
309
+ let bytes;
310
+ try {
311
+ bytes = readFileSync(cachePath);
312
+ } catch (e) {
313
+ bytes = new Uint8Array(0);
314
+ }
315
+ if (bytes.byteLength > 0) {
316
+ const actualSha = sha256(bytes);
317
+ if (actualSha !== manifest.sha256) {
318
+ throw new GrammarSHAMismatchError(language, manifest.sha256, actualSha);
319
+ }
320
+ const lang2 = await Language.load(bytes);
321
+ loadedGrammars.set(language, lang2);
322
+ touchCacheFile(cachePath);
323
+ return lang2;
324
+ }
325
+ }
326
+ if (!/^https:\/\//i.test(manifest.url)) {
327
+ throw new GrammarUrlNotHttpsError(manifest.url);
328
+ }
329
+ const fetchImpl = options.fetchImpl ?? globalThis.fetch;
330
+ if (!fetchImpl) {
331
+ throw new GrammarUnavailableError(
332
+ language,
333
+ new Error("No fetch implementation available (Node < 18?)")
334
+ );
335
+ }
336
+ let body;
337
+ try {
338
+ const res = await fetchImpl(manifest.url);
339
+ if (!res.ok) {
340
+ throw new Error(`HTTP ${res.status ?? "unknown"} from ${manifest.url}`);
341
+ }
342
+ body = new Uint8Array(await res.arrayBuffer());
343
+ } catch (e) {
344
+ throw new GrammarUnavailableError(language, e);
345
+ }
346
+ const downloadedSha = sha256(body);
347
+ if (downloadedSha !== manifest.sha256) {
348
+ throw new GrammarSHAMismatchError(language, manifest.sha256, downloadedSha);
349
+ }
350
+ try {
351
+ mkdirSync(dirname(cachePath), { recursive: true, mode: 448 });
352
+ try {
353
+ chmodSync(dirname(cachePath), 448);
354
+ } catch {
355
+ }
356
+ const tmpPath = `${cachePath}.tmp.${process.pid}`;
357
+ writeFileSync(tmpPath, body, { mode: 384 });
358
+ try {
359
+ chmodSync(tmpPath, 384);
360
+ } catch {
361
+ }
362
+ try {
363
+ renameSync(tmpPath, cachePath);
364
+ try {
365
+ chmodSync(cachePath, 384);
366
+ } catch {
367
+ }
368
+ } catch (e) {
369
+ try {
370
+ unlinkSync(tmpPath);
371
+ } catch {
372
+ }
373
+ throw e;
374
+ }
375
+ evictBeyondRetainCount();
376
+ } catch (e) {
377
+ console.error(
378
+ `[tree-sitter-loader] cache write failed for ${language}: ${e instanceof Error ? e.message : String(e)} \u2014 loading directly from memory.`
379
+ );
380
+ }
381
+ const lang = await Language.load(body);
382
+ loadedGrammars.set(language, lang);
383
+ return lang;
384
+ }
385
+
386
+ // src/detect/adapters/parse-guard.ts
387
+ var MAX_AST_FILE_BYTES = 1 * 1024 * 1024;
388
+ var MAX_AST_PARSE_DEPTH = 5e3;
389
+ function isParsableSource(source, sizeBytes) {
390
+ const bytes = sizeBytes ?? Buffer.byteLength(source, "utf-8");
391
+ if (bytes > MAX_AST_FILE_BYTES) {
392
+ return {
393
+ reason: "size-cap",
394
+ detail: `${bytes} bytes > ${MAX_AST_FILE_BYTES} cap`
395
+ };
396
+ }
397
+ let depth = 0;
398
+ let maxDepth = 0;
399
+ for (let i = 0; i < source.length; i++) {
400
+ const c = source.charCodeAt(i);
401
+ if (c === 0) {
402
+ return { reason: "control-bytes", detail: "NUL byte at offset " + i };
403
+ }
404
+ if (c === 40 || c === 91 || c === 123) {
405
+ depth++;
406
+ if (depth > maxDepth) maxDepth = depth;
407
+ if (depth > MAX_AST_PARSE_DEPTH) {
408
+ return {
409
+ reason: "depth-cap",
410
+ detail: `nesting depth exceeded ${MAX_AST_PARSE_DEPTH}`
411
+ };
412
+ }
413
+ } else if (c === 41 || c === 93 || c === 125) {
414
+ depth = depth > 0 ? depth - 1 : 0;
415
+ }
416
+ }
417
+ return null;
418
+ }
419
+
420
+ // ../adapter-go-chi/dist/index.js
421
+ var ROUTE_METHOD_QUERY = `
422
+ (call_expression
423
+ function: (selector_expression
424
+ field: (field_identifier) @method (#match? @method "^(Get|Post|Put|Delete|Patch|Head|Options|Connect|Trace)$"))
425
+ arguments: (argument_list
426
+ .
427
+ (interpreted_string_literal) @route_path))
428
+ `;
429
+ var MOUNT_PREFIX_QUERY = `
430
+ (call_expression
431
+ function: (selector_expression
432
+ field: (field_identifier) @_field (#eq? @_field "Mount"))
433
+ arguments: (argument_list
434
+ .
435
+ (interpreted_string_literal) @mount_path))
436
+ `;
437
+ var MIDDLEWARE_USE_QUERY = `
438
+ (call_expression
439
+ function: (selector_expression
440
+ field: (field_identifier) @_use (#eq? @_use "Use"))
441
+ arguments: (argument_list
442
+ .
443
+ (selector_expression
444
+ operand: (identifier) @_pkg (#eq? @_pkg "middleware")
445
+ field: (field_identifier) @middleware_name)))
446
+ `;
447
+ var goChiAdapter = {
448
+ id: "go-chi",
449
+ languages: ["go"],
450
+ matches(signals) {
451
+ if (!signals.goMod)
452
+ return false;
453
+ if (/github\.com\/go-chi\/chi/i.test(signals.goMod))
454
+ return true;
455
+ return false;
456
+ },
457
+ async introspect(files, _rootDir) {
458
+ if (files.length === 0) {
459
+ return { conventions: {}, provenance: [], confidence: "none" };
460
+ }
461
+ let language;
462
+ try {
463
+ language = await loadGrammar("go");
464
+ } catch (e) {
465
+ return { conventions: {}, provenance: [], confidence: "none" };
466
+ }
467
+ const parser = new Parser2();
468
+ parser.setLanguage(language);
469
+ const routeMethods = /* @__PURE__ */ new Map();
470
+ const mountBases = /* @__PURE__ */ new Map();
471
+ const middlewareNames = /* @__PURE__ */ new Map();
472
+ try {
473
+ for (const file of files) {
474
+ const skip = isParsableSource(file.content, file.size);
475
+ if (skip) {
476
+ process.stderr.write(`[massu/ast] WARN: go-chi skipping ${file.path}: ${skip.reason} (${skip.detail}). Cap=${MAX_AST_FILE_BYTES}. (Phase 3.5 mitigation)
477
+ `);
478
+ continue;
479
+ }
480
+ try {
481
+ for (const hit of runQuery(parser, file.content, ROUTE_METHOD_QUERY, "chi-route-method", file.path)) {
482
+ const method = hit.captures.method;
483
+ if (method && !routeMethods.has(method)) {
484
+ routeMethods.set(method, { line: hit.line, file: file.path });
485
+ }
486
+ }
487
+ for (const hit of runQuery(parser, file.content, MOUNT_PREFIX_QUERY, "chi-mount-prefix", file.path)) {
488
+ const raw = hit.captures.mount_path;
489
+ if (!raw)
490
+ continue;
491
+ const literal = raw.replace(/^["`]/, "").replace(/["`]$/, "");
492
+ const base = extractPrefixBase(literal);
493
+ if (base && !mountBases.has(base)) {
494
+ mountBases.set(base, { line: hit.line, file: file.path });
495
+ }
496
+ }
497
+ for (const hit of runQuery(parser, file.content, MIDDLEWARE_USE_QUERY, "chi-middleware-use", file.path)) {
498
+ const name = hit.captures.middleware_name;
499
+ if (name && !middlewareNames.has(name)) {
500
+ middlewareNames.set(name, { line: hit.line, file: file.path });
501
+ }
502
+ }
503
+ } catch (e) {
504
+ if (e instanceof InvalidQueryError) {
505
+ throw e;
506
+ }
507
+ continue;
508
+ }
509
+ }
510
+ } finally {
511
+ try {
512
+ parser.delete();
513
+ } catch {
514
+ }
515
+ }
516
+ const conventions = {};
517
+ const provenance = [];
518
+ if (routeMethods.size === 1) {
519
+ const [name, { line, file }] = routeMethods.entries().next().value;
520
+ conventions.route_method = name;
521
+ provenance.push({ field: "route_method", sourceFile: file, line, query: "chi-route-method" });
522
+ } else if (routeMethods.size >= 2) {
523
+ const [name, { line, file }] = routeMethods.entries().next().value;
524
+ conventions.route_method = name;
525
+ provenance.push({ field: "route_method", sourceFile: file, line, query: "chi-route-method" });
526
+ }
527
+ if (mountBases.size >= 1) {
528
+ const [base, { line, file }] = mountBases.entries().next().value;
529
+ conventions.mount_prefix_base = base;
530
+ provenance.push({ field: "mount_prefix_base", sourceFile: file, line, query: "chi-mount-prefix" });
531
+ }
532
+ if (middlewareNames.size >= 1) {
533
+ const [name, { line, file }] = middlewareNames.entries().next().value;
534
+ conventions.middleware_name = name;
535
+ provenance.push({ field: "middleware_name", sourceFile: file, line, query: "chi-middleware-use" });
536
+ }
537
+ let confidence;
538
+ if (Object.keys(conventions).length === 0) {
539
+ confidence = "none";
540
+ } else if (routeMethods.size === 1) {
541
+ confidence = "high";
542
+ } else if (routeMethods.size >= 2) {
543
+ confidence = "low";
544
+ } else {
545
+ confidence = "medium";
546
+ }
547
+ return { conventions, provenance, confidence };
548
+ }
549
+ };
550
+ function extractPrefixBase(prefix) {
551
+ if (!prefix.startsWith("/"))
552
+ return null;
553
+ const stripped = prefix.replace(/^\/+/, "");
554
+ const firstSeg = stripped.split("/")[0];
555
+ if (!firstSeg)
556
+ return null;
557
+ return "/" + firstSeg;
558
+ }
559
+ export {
560
+ goChiAdapter
561
+ };
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Plan 3b — Phase 3.5 (security audit): centralized AST parse-time safety.
3
+ *
4
+ * Adapters consume oversized / pathological user source. Tree-sitter is
5
+ * fast but synchronous — there is no native timeout, no native size cap.
6
+ * This module provides:
7
+ *
8
+ * 1. `MAX_AST_FILE_BYTES` — 1MB hard cap per file (covers DoS vector
9
+ * "oversized file"). Plan §1 cited 5MB; we apply a tighter bound at
10
+ * the adapter tier because adapter sample size is small (≤3 files
11
+ * per adapter) and 1MB is already an order of magnitude beyond
12
+ * reasonable convention-defining files.
13
+ * 2. `MAX_AST_PARSE_DEPTH` — 5K-deep nested-structure rejection — a
14
+ * static text scan for runaway open-paren / open-brace runs that
15
+ * would push Tree-sitter into deep recursion. Cheap pre-check.
16
+ * 3. `parseTimeout(ms, fn)` — wraps a synchronous Tree-sitter call in
17
+ * a deadline guard. Tree-sitter's pure-JS path can't be interrupted
18
+ * mid-parse, but we record the elapsed time AFTER the call returns
19
+ * and emit a stderr warning when budget is exceeded so daemon ops
20
+ * see the abuse signal.
21
+ * 4. `isParsableSource(source)` — static gate combining size + depth.
22
+ * Returns `null` if accepted, or a `{ reason, detail }` object when
23
+ * rejected so the adapter can record provenance and skip the file.
24
+ *
25
+ * Library purity: never terminates the process, no DB calls, no network.
26
+ * Pure helper module.
27
+ */
28
+ /** Hard size cap for an individual file fed to Tree-sitter. */
29
+ export declare const MAX_AST_FILE_BYTES: number;
30
+ /**
31
+ * Maximum nested-bracket depth allowed in a file. Pathological inputs
32
+ * with 10K-deep nesting can push Tree-sitter into runaway recursion on
33
+ * some grammar versions. Cheap O(n) text scan picks them off before parse.
34
+ */
35
+ export declare const MAX_AST_PARSE_DEPTH = 5000;
36
+ /**
37
+ * Per-file Tree-sitter parse budget (ms). Tree-sitter parses are
38
+ * synchronous in JS, so this is enforced as a post-call elapsed-time
39
+ * check rather than a hard timer. The check still serves the purpose of
40
+ * giving operators visibility into adversarial files.
41
+ */
42
+ export declare const MAX_AST_PARSE_MS = 2000;
43
+ export type ParseSkipReason = 'size-cap' | 'depth-cap' | 'control-bytes' | 'utf8-validation';
44
+ export interface ParseSkip {
45
+ reason: ParseSkipReason;
46
+ detail: string;
47
+ }
48
+ /**
49
+ * Static safety gate. Call BEFORE invoking `parser.parse()`. Returns
50
+ * `null` if the source is acceptable, or a `ParseSkip` describing why
51
+ * the file is rejected.
52
+ *
53
+ * Cheap to evaluate — single linear scan + size check.
54
+ */
55
+ export declare function isParsableSource(source: string, sizeBytes?: number): ParseSkip | null;
56
+ /**
57
+ * Wrap a synchronous Tree-sitter call and emit a warning when the call
58
+ * exceeds the budget. Returns the call's result regardless — callers may
59
+ * decide to discard it based on `elapsed`.
60
+ *
61
+ * Note: Tree-sitter's WASM path has no co-operative cancellation, so
62
+ * this is observability rather than a hard kill. The size + depth caps
63
+ * are the load-bearing mitigations; this is the third belt.
64
+ */
65
+ export declare function withParseDeadline<T>(fn: () => T, filePath: string, budgetMs?: number): {
66
+ value: T;
67
+ elapsedMs: number;
68
+ overBudget: boolean;
69
+ };
@@ -0,0 +1,54 @@
1
+ // src/detect/adapters/parse-guard.ts
2
+ var MAX_AST_FILE_BYTES = 1 * 1024 * 1024;
3
+ var MAX_AST_PARSE_DEPTH = 5e3;
4
+ var MAX_AST_PARSE_MS = 2e3;
5
+ function isParsableSource(source, sizeBytes) {
6
+ const bytes = sizeBytes ?? Buffer.byteLength(source, "utf-8");
7
+ if (bytes > MAX_AST_FILE_BYTES) {
8
+ return {
9
+ reason: "size-cap",
10
+ detail: `${bytes} bytes > ${MAX_AST_FILE_BYTES} cap`
11
+ };
12
+ }
13
+ let depth = 0;
14
+ let maxDepth = 0;
15
+ for (let i = 0; i < source.length; i++) {
16
+ const c = source.charCodeAt(i);
17
+ if (c === 0) {
18
+ return { reason: "control-bytes", detail: "NUL byte at offset " + i };
19
+ }
20
+ if (c === 40 || c === 91 || c === 123) {
21
+ depth++;
22
+ if (depth > maxDepth) maxDepth = depth;
23
+ if (depth > MAX_AST_PARSE_DEPTH) {
24
+ return {
25
+ reason: "depth-cap",
26
+ detail: `nesting depth exceeded ${MAX_AST_PARSE_DEPTH}`
27
+ };
28
+ }
29
+ } else if (c === 41 || c === 93 || c === 125) {
30
+ depth = depth > 0 ? depth - 1 : 0;
31
+ }
32
+ }
33
+ return null;
34
+ }
35
+ function withParseDeadline(fn, filePath, budgetMs = MAX_AST_PARSE_MS) {
36
+ const start = Date.now();
37
+ const value = fn();
38
+ const elapsedMs = Date.now() - start;
39
+ const overBudget = elapsedMs > budgetMs;
40
+ if (overBudget) {
41
+ process.stderr.write(
42
+ `[massu/ast] WARN: parse of ${filePath} took ${elapsedMs}ms (budget ${budgetMs}ms) \u2014 file may be adversarial. (Phase 3.5 mitigation)
43
+ `
44
+ );
45
+ }
46
+ return { value, elapsedMs, overBudget };
47
+ }
48
+ export {
49
+ MAX_AST_FILE_BYTES,
50
+ MAX_AST_PARSE_DEPTH,
51
+ MAX_AST_PARSE_MS,
52
+ isParsableSource,
53
+ withParseDeadline
54
+ };