@massu/core 1.5.8 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,76 @@
1
+ /**
2
+ * `@massu/core/adapter` — public adapter authoring SDK (Plan 3c gap-31, gap-35).
3
+ *
4
+ * Adapter authors import from this subpath ONLY; everything inside @massu/core
5
+ * outside this entry point is implementation detail subject to change without
6
+ * a major version bump. The contract surface here is part of the SemVer-
7
+ * stable API.
8
+ *
9
+ * Usage (adapter package author):
10
+ *
11
+ * import { defineAdapter, type CodebaseAdapter } from '@massu/core/adapter';
12
+ *
13
+ * export default defineAdapter({
14
+ * id: 'rails-active-record',
15
+ * languages: ['ruby'],
16
+ * matches(signals) {
17
+ * return Boolean(signals.gemfile?.includes('rails'));
18
+ * },
19
+ * async introspect(files, rootDir) {
20
+ * // Tree-sitter queries, AST walks, file sampling…
21
+ * return {
22
+ * conventions: { router: 'rails' },
23
+ * provenance: [{ field: 'router', value: 'config/routes.rb:1', query: 'gemfile' }],
24
+ * confidence: 'high',
25
+ * };
26
+ * },
27
+ * });
28
+ *
29
+ * Then in the adapter package's package.json:
30
+ * {
31
+ * "name": "@massu/adapter-rails",
32
+ * "version": "0.1.0",
33
+ * "main": "dist/index.js",
34
+ * "type": "module",
35
+ * "massu-adapter": true,
36
+ * "massu-adapter-api-version": "1",
37
+ * "peerDependencies": { "@massu/core": ">=1.5.0 <2.0.0" }
38
+ * }
39
+ *
40
+ * The adapter loader (Plan 3b runner.ts) does
41
+ * const mod = await import(`<package-dir>/${main}`);
42
+ * const adapter = (mod.default ?? mod) as CodebaseAdapter;
43
+ * and dispatches matches() + introspect() accordingly.
44
+ *
45
+ * defineAdapter is a NO-OP at runtime — it's an identity function that
46
+ * exists for compile-time type narrowing (so adapter authors get IDE
47
+ * autocomplete + type errors for missing fields). The factory's only job
48
+ * is to anchor the type contract; it does NOT validate at runtime, register
49
+ * anywhere, or mutate state. Runtime validation happens at the loader
50
+ * (Plan 3b runner.ts) which checks the dispatched object shape before
51
+ * invoking matches/introspect.
52
+ *
53
+ * Stability: every export here is part of @massu/core's public SemVer
54
+ * surface. Breaking changes to the CodebaseAdapter shape (renamed fields,
55
+ * removed methods) require a major version bump per the
56
+ * massu-adapter-api-version contract. Adapter packages declare
57
+ * `"massu-adapter-api-version": "1"` so the loader refuses incompatible
58
+ * majors at startup.
59
+ */
60
+ export { type CodebaseAdapter, type DetectionSignals, type SourceFile, type TreeSitterLanguage, type AdapterResult, type Provenance, type AdapterResolved, type MergedAdapterOutput, } from './detect/adapters/types.js';
61
+ import type { CodebaseAdapter } from './detect/adapters/types.js';
62
+ export { runQuery, InvalidQueryError, type RunQueryHit } from './detect/adapters/query-helpers.js';
63
+ export { loadGrammar } from './detect/adapters/tree-sitter-loader.js';
64
+ export { isParsableSource, MAX_AST_FILE_BYTES, MAX_AST_PARSE_DEPTH, MAX_AST_PARSE_MS, type ParseSkip, type ParseSkipReason, } from './detect/adapters/parse-guard.js';
65
+ /**
66
+ * Identity factory — narrows the input's type to `CodebaseAdapter` so
67
+ * authors get compile-time checking + IDE autocomplete for missing /
68
+ * mistyped fields. Runtime: returns the input unchanged. Use this in
69
+ * place of an inline `const adapter: CodebaseAdapter = { ... }`
70
+ * annotation.
71
+ *
72
+ * Returning the input (instead of `void`) means adapter packages can do
73
+ * `export default defineAdapter({ ... })` and the loader's
74
+ * `mod.default` destructuring just works.
75
+ */
76
+ export declare function defineAdapter(spec: CodebaseAdapter): CodebaseAdapter;
@@ -0,0 +1,431 @@
1
+ // src/detect/adapters/query-helpers.ts
2
+ import { Query } from "web-tree-sitter";
3
+ var InvalidQueryError = class extends Error {
4
+ queryName;
5
+ querySource;
6
+ cause;
7
+ constructor(queryName, querySource, cause) {
8
+ const causeMsg = cause instanceof Error ? cause.message : String(cause);
9
+ super(
10
+ `[query-helpers] Invalid Tree-sitter query "${queryName}": ${causeMsg}
11
+ Query source:
12
+ ${querySource}`
13
+ );
14
+ this.name = "InvalidQueryError";
15
+ this.queryName = queryName;
16
+ this.querySource = querySource;
17
+ this.cause = cause;
18
+ }
19
+ };
20
+ var queryCache = /* @__PURE__ */ new WeakMap();
21
+ function compileQuery(language, source, queryName) {
22
+ let perLang = queryCache.get(language);
23
+ if (!perLang) {
24
+ perLang = /* @__PURE__ */ new Map();
25
+ queryCache.set(language, perLang);
26
+ }
27
+ const cached = perLang.get(source);
28
+ if (cached) return cached;
29
+ let q;
30
+ try {
31
+ q = new Query(language, source);
32
+ } catch (e) {
33
+ throw new InvalidQueryError(queryName, source, e);
34
+ }
35
+ perLang.set(source, q);
36
+ return q;
37
+ }
38
+ function runQuery(parser, source, queryText, queryName, filePath) {
39
+ const language = parser.language;
40
+ if (!language) {
41
+ throw new InvalidQueryError(
42
+ queryName,
43
+ queryText,
44
+ new Error("Parser has no language assigned")
45
+ );
46
+ }
47
+ const query = compileQuery(language, queryText, queryName);
48
+ const tree = parser.parse(source);
49
+ if (!tree) return [];
50
+ let matches;
51
+ try {
52
+ matches = query.matches(tree.rootNode);
53
+ } catch (e) {
54
+ throw new InvalidQueryError(queryName, queryText, e);
55
+ }
56
+ const out = [];
57
+ for (const match of matches) {
58
+ if (!match.captures || match.captures.length === 0) continue;
59
+ const captures = {};
60
+ let earliestLine = Number.POSITIVE_INFINITY;
61
+ for (const cap of match.captures) {
62
+ const node = cap.node;
63
+ captures[cap.name] = node.text;
64
+ if (node.startPosition.row + 1 < earliestLine) {
65
+ earliestLine = node.startPosition.row + 1;
66
+ }
67
+ }
68
+ out.push({
69
+ captures,
70
+ file: filePath,
71
+ line: Number.isFinite(earliestLine) ? earliestLine : 1,
72
+ queryName
73
+ });
74
+ }
75
+ try {
76
+ tree.delete();
77
+ } catch {
78
+ }
79
+ return out;
80
+ }
81
+
82
+ // src/detect/adapters/tree-sitter-loader.ts
83
+ import { createHash } from "crypto";
84
+ import {
85
+ mkdirSync,
86
+ readdirSync,
87
+ readFileSync,
88
+ writeFileSync,
89
+ renameSync,
90
+ unlinkSync,
91
+ lstatSync,
92
+ chmodSync,
93
+ utimesSync
94
+ } from "fs";
95
+ import { homedir } from "os";
96
+ import { dirname, join } from "path";
97
+ import { Language, Parser } from "web-tree-sitter";
98
+ var GrammarSHAMismatchError = class extends Error {
99
+ language;
100
+ expected;
101
+ actual;
102
+ constructor(language, expected, actual) {
103
+ super(
104
+ `[tree-sitter-loader] SHA-256 mismatch for grammar "${language}". Expected ${expected}, got ${actual}. REFUSING to load \u2014 see Phase 3.5 audit attack vector #3.`
105
+ );
106
+ this.name = "GrammarSHAMismatchError";
107
+ this.language = language;
108
+ this.expected = expected;
109
+ this.actual = actual;
110
+ }
111
+ };
112
+ var GrammarUnavailableError = class extends Error {
113
+ language;
114
+ cause;
115
+ constructor(language, cause) {
116
+ const causeMsg = cause instanceof Error ? cause.message : cause ? String(cause) : "no cached grammar and download failed";
117
+ super(
118
+ `[tree-sitter-loader] Grammar for "${language}" is unavailable: ${causeMsg}. Falling back to regex introspection for files in ${language}.`
119
+ );
120
+ this.name = "GrammarUnavailableError";
121
+ this.language = language;
122
+ this.cause = cause;
123
+ }
124
+ };
125
+ var GrammarCacheSymlinkError = class extends Error {
126
+ cachePath;
127
+ constructor(cachePath) {
128
+ super(
129
+ `[tree-sitter-loader] Refusing to load grammar \u2014 cache path "${cachePath}" is a symlink or non-regular file. (Phase 3.5 finding #3 \u2014 symlink attack vector.)`
130
+ );
131
+ this.name = "GrammarCacheSymlinkError";
132
+ this.cachePath = cachePath;
133
+ }
134
+ };
135
+ var GrammarUrlNotHttpsError = class extends Error {
136
+ url;
137
+ constructor(url) {
138
+ super(
139
+ `[tree-sitter-loader] Refusing to download grammar from non-HTTPS URL: ${url}. Only https:// URLs are accepted. (Phase 3.5 finding #3.)`
140
+ );
141
+ this.name = "GrammarUrlNotHttpsError";
142
+ this.url = url;
143
+ }
144
+ };
145
+ var GRAMMAR_MANIFEST = {
146
+ python: {
147
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-python.wasm",
148
+ sha256: "9056d0fb0c337810d019fae350e8167786119da98f0f282aceae7ab89ee8253b",
149
+ version: "0.1.13"
150
+ },
151
+ typescript: {
152
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-typescript.wasm",
153
+ sha256: "8515404dceed38e1ed86aa34b09fcf3379fff1b4ff9dd3967bcd6d1eb5ac3d8f",
154
+ version: "0.1.13"
155
+ },
156
+ javascript: {
157
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-javascript.wasm",
158
+ sha256: "63812b9e275d26851264734868d27a1656bd44a2ef6eb3e85e6b03728c595ab5",
159
+ version: "0.1.13"
160
+ },
161
+ swift: {
162
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-swift.wasm",
163
+ sha256: "41c4fdb2249a3aa6d87eed0d383081ff09725c2248b4977043a43825980ffcc7",
164
+ version: "0.1.13"
165
+ },
166
+ // ----------------------------------------------------------------
167
+ // Plan 3c Phase 7 expansion (2026-05-07):
168
+ //
169
+ // Six additional grammars to support the registry-verified framework
170
+ // adapters (go-chi, rails, aspnet, spring, ktor, phoenix) plus the
171
+ // bundled adapters in the same language families (gin/echo/fiber,
172
+ // sinatra, etc.). All entries use the SAME pinned tree-sitter-wasms
173
+ // version (0.1.13) as the v1 four to keep the dependency surface
174
+ // single-source.
175
+ //
176
+ // SHA-256s computed 2026-05-07 via:
177
+ // curl -fsSL <url> | shasum -a 256
178
+ //
179
+ // The unpkg filename for C# uses an underscore (`c_sharp`) while the
180
+ // TreeSitterLanguage identifier uses no separator (`csharp`); the map
181
+ // key is the type identifier, the URL is the storage path — they do
182
+ // NOT need to match, the same as how `python` maps to `tree-sitter-
183
+ // python.wasm`. This is intentional and validated by the manifest
184
+ // shape test in tree-sitter-loader-manifest.test.ts.
185
+ // ----------------------------------------------------------------
186
+ go: {
187
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-go.wasm",
188
+ sha256: "9963ca89b616eaf04b08a43bc1fb0f07b85395bec313330851f1f1ead2f755b6",
189
+ version: "0.1.13"
190
+ },
191
+ ruby: {
192
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-ruby.wasm",
193
+ sha256: "93a5022855314cdb45458c7bb026a24a0ebc3a5ff6439e542e881f14dfa13a39",
194
+ version: "0.1.13"
195
+ },
196
+ csharp: {
197
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-c_sharp.wasm",
198
+ sha256: "6266a7e32d68a3459104d994dc848df15d5672b0ea8e86d327274b694f8e6991",
199
+ version: "0.1.13"
200
+ },
201
+ java: {
202
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-java.wasm",
203
+ sha256: "637aac4415fb39a211a4f4292d63c66b5ce9c32fa2cd35464af4f681d91b9a1f",
204
+ version: "0.1.13"
205
+ },
206
+ kotlin: {
207
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-kotlin.wasm",
208
+ sha256: "b5cb00c8d06ed0f10f1dbe497205b437809d7e87db1f638721a8cfb30e044449",
209
+ version: "0.1.13"
210
+ },
211
+ elixir: {
212
+ url: "https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-elixir.wasm",
213
+ sha256: "82e91b9759ddca30d8978ebbfa8e347b4451b64c931f9ae62112e6db9b8fac20",
214
+ version: "0.1.13"
215
+ }
216
+ };
217
+ function getCacheDir() {
218
+ return process.env.MASSU_WASM_CACHE_DIR ?? join(homedir(), ".massu", "wasm-cache");
219
+ }
220
+ function getCachedPath(language, sha) {
221
+ return join(getCacheDir(), `${language}-${sha}.wasm`);
222
+ }
223
+ var DEFAULT_CACHE_RETAIN_COUNT = 16;
224
+ function getCacheRetainCount() {
225
+ const env = process.env.MASSU_WASM_CACHE_RETAIN;
226
+ if (env) {
227
+ const n = Number(env);
228
+ if (Number.isFinite(n) && n >= 1 && n <= 1024) return Math.floor(n);
229
+ }
230
+ return DEFAULT_CACHE_RETAIN_COUNT;
231
+ }
232
+ function touchCacheFile(path) {
233
+ try {
234
+ const now = /* @__PURE__ */ new Date();
235
+ utimesSync(path, now, now);
236
+ } catch {
237
+ }
238
+ }
239
+ function evictBeyondRetainCount(retain = getCacheRetainCount()) {
240
+ const dir = getCacheDir();
241
+ let entries;
242
+ try {
243
+ entries = readdirSync(dir);
244
+ } catch {
245
+ return;
246
+ }
247
+ const candidates = [];
248
+ for (const name of entries) {
249
+ if (!name.endsWith(".wasm")) continue;
250
+ const path = join(dir, name);
251
+ let stat;
252
+ try {
253
+ stat = lstatSync(path);
254
+ } catch {
255
+ continue;
256
+ }
257
+ if (stat.isSymbolicLink() || !stat.isFile()) {
258
+ console.error(
259
+ `[tree-sitter-loader] cache eviction skipped non-regular file: ${path} (possible symlink attack \u2014 see Phase 3.5 finding F-008).`
260
+ );
261
+ continue;
262
+ }
263
+ candidates.push({ path, mtimeMs: stat.mtimeMs });
264
+ }
265
+ if (candidates.length <= retain) return;
266
+ candidates.sort((a, b) => b.mtimeMs - a.mtimeMs);
267
+ for (const victim of candidates.slice(retain)) {
268
+ try {
269
+ unlinkSync(victim.path);
270
+ } catch {
271
+ }
272
+ }
273
+ }
274
+ function sha256(bytes) {
275
+ return createHash("sha256").update(bytes).digest("hex");
276
+ }
277
+ var parserInitPromise = null;
278
+ async function ensureParserInitialized() {
279
+ if (parserInitPromise) return parserInitPromise;
280
+ parserInitPromise = Parser.init();
281
+ return parserInitPromise;
282
+ }
283
+ var loadedGrammars = /* @__PURE__ */ new Map();
284
+ async function loadGrammar(language, options = {}) {
285
+ await ensureParserInitialized();
286
+ const cached = loadedGrammars.get(language);
287
+ if (cached) return cached;
288
+ const manifest = options.manifestOverride?.[language] ?? GRAMMAR_MANIFEST[language];
289
+ if (!manifest) {
290
+ throw new GrammarUnavailableError(
291
+ language,
292
+ new Error(`No manifest entry for language "${language}". v1 supports: ${Object.keys(GRAMMAR_MANIFEST).join(", ")}.`)
293
+ );
294
+ }
295
+ const cachePath = getCachedPath(language, manifest.sha256);
296
+ let cacheLstat;
297
+ try {
298
+ cacheLstat = lstatSync(cachePath);
299
+ } catch {
300
+ cacheLstat = null;
301
+ }
302
+ if (cacheLstat) {
303
+ if (cacheLstat.isSymbolicLink() || !cacheLstat.isFile()) {
304
+ throw new GrammarCacheSymlinkError(cachePath);
305
+ }
306
+ let bytes;
307
+ try {
308
+ bytes = readFileSync(cachePath);
309
+ } catch (e) {
310
+ bytes = new Uint8Array(0);
311
+ }
312
+ if (bytes.byteLength > 0) {
313
+ const actualSha = sha256(bytes);
314
+ if (actualSha !== manifest.sha256) {
315
+ throw new GrammarSHAMismatchError(language, manifest.sha256, actualSha);
316
+ }
317
+ const lang2 = await Language.load(bytes);
318
+ loadedGrammars.set(language, lang2);
319
+ touchCacheFile(cachePath);
320
+ return lang2;
321
+ }
322
+ }
323
+ if (!/^https:\/\//i.test(manifest.url)) {
324
+ throw new GrammarUrlNotHttpsError(manifest.url);
325
+ }
326
+ const fetchImpl = options.fetchImpl ?? globalThis.fetch;
327
+ if (!fetchImpl) {
328
+ throw new GrammarUnavailableError(
329
+ language,
330
+ new Error("No fetch implementation available (Node < 18?)")
331
+ );
332
+ }
333
+ let body;
334
+ try {
335
+ const res = await fetchImpl(manifest.url);
336
+ if (!res.ok) {
337
+ throw new Error(`HTTP ${res.status ?? "unknown"} from ${manifest.url}`);
338
+ }
339
+ body = new Uint8Array(await res.arrayBuffer());
340
+ } catch (e) {
341
+ throw new GrammarUnavailableError(language, e);
342
+ }
343
+ const downloadedSha = sha256(body);
344
+ if (downloadedSha !== manifest.sha256) {
345
+ throw new GrammarSHAMismatchError(language, manifest.sha256, downloadedSha);
346
+ }
347
+ try {
348
+ mkdirSync(dirname(cachePath), { recursive: true, mode: 448 });
349
+ try {
350
+ chmodSync(dirname(cachePath), 448);
351
+ } catch {
352
+ }
353
+ const tmpPath = `${cachePath}.tmp.${process.pid}`;
354
+ writeFileSync(tmpPath, body, { mode: 384 });
355
+ try {
356
+ chmodSync(tmpPath, 384);
357
+ } catch {
358
+ }
359
+ try {
360
+ renameSync(tmpPath, cachePath);
361
+ try {
362
+ chmodSync(cachePath, 384);
363
+ } catch {
364
+ }
365
+ } catch (e) {
366
+ try {
367
+ unlinkSync(tmpPath);
368
+ } catch {
369
+ }
370
+ throw e;
371
+ }
372
+ evictBeyondRetainCount();
373
+ } catch (e) {
374
+ console.error(
375
+ `[tree-sitter-loader] cache write failed for ${language}: ${e instanceof Error ? e.message : String(e)} \u2014 loading directly from memory.`
376
+ );
377
+ }
378
+ const lang = await Language.load(body);
379
+ loadedGrammars.set(language, lang);
380
+ return lang;
381
+ }
382
+
383
+ // src/detect/adapters/parse-guard.ts
384
+ var MAX_AST_FILE_BYTES = 1 * 1024 * 1024;
385
+ var MAX_AST_PARSE_DEPTH = 5e3;
386
+ var MAX_AST_PARSE_MS = 2e3;
387
+ function isParsableSource(source, sizeBytes) {
388
+ const bytes = sizeBytes ?? Buffer.byteLength(source, "utf-8");
389
+ if (bytes > MAX_AST_FILE_BYTES) {
390
+ return {
391
+ reason: "size-cap",
392
+ detail: `${bytes} bytes > ${MAX_AST_FILE_BYTES} cap`
393
+ };
394
+ }
395
+ let depth = 0;
396
+ let maxDepth = 0;
397
+ for (let i = 0; i < source.length; i++) {
398
+ const c = source.charCodeAt(i);
399
+ if (c === 0) {
400
+ return { reason: "control-bytes", detail: "NUL byte at offset " + i };
401
+ }
402
+ if (c === 40 || c === 91 || c === 123) {
403
+ depth++;
404
+ if (depth > maxDepth) maxDepth = depth;
405
+ if (depth > MAX_AST_PARSE_DEPTH) {
406
+ return {
407
+ reason: "depth-cap",
408
+ detail: `nesting depth exceeded ${MAX_AST_PARSE_DEPTH}`
409
+ };
410
+ }
411
+ } else if (c === 41 || c === 93 || c === 125) {
412
+ depth = depth > 0 ? depth - 1 : 0;
413
+ }
414
+ }
415
+ return null;
416
+ }
417
+
418
+ // src/adapter.ts
419
+ function defineAdapter(spec) {
420
+ return spec;
421
+ }
422
+ export {
423
+ InvalidQueryError,
424
+ MAX_AST_FILE_BYTES,
425
+ MAX_AST_PARSE_DEPTH,
426
+ MAX_AST_PARSE_MS,
427
+ defineAdapter,
428
+ isParsableSource,
429
+ loadGrammar,
430
+ runQuery
431
+ };