@massu/core 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/commands/README.md +23 -11
  2. package/commands/massu-deploy.python-docker.md +170 -0
  3. package/commands/massu-deploy.python-fly.md +189 -0
  4. package/commands/massu-deploy.python-launchd.md +144 -0
  5. package/commands/massu-deploy.python-systemd.md +163 -0
  6. package/commands/massu-scaffold-page.swift.md +10 -10
  7. package/commands/massu-scaffold-router.python-django.md +153 -0
  8. package/commands/massu-scaffold-router.python-fastapi.md +145 -0
  9. package/dist/cli.js +9914 -4133
  10. package/dist/hooks/auto-learning-pipeline.js +45 -2
  11. package/dist/hooks/classify-failure.js +45 -2
  12. package/dist/hooks/cost-tracker.js +45 -2
  13. package/dist/hooks/fix-detector.js +45 -2
  14. package/dist/hooks/incident-pipeline.js +45 -2
  15. package/dist/hooks/post-edit-context.js +45 -2
  16. package/dist/hooks/post-tool-use.js +45 -2
  17. package/dist/hooks/pre-compact.js +45 -2
  18. package/dist/hooks/pre-delete-check.js +45 -2
  19. package/dist/hooks/quality-event.js +45 -2
  20. package/dist/hooks/rule-enforcement-pipeline.js +45 -2
  21. package/dist/hooks/session-end.js +45 -2
  22. package/dist/hooks/session-start.js +4790 -406
  23. package/dist/hooks/user-prompt.js +45 -2
  24. package/package.json +13 -4
  25. package/src/cli.ts +22 -2
  26. package/src/commands/config-refresh.ts +91 -23
  27. package/src/commands/init.ts +131 -24
  28. package/src/commands/install-commands.ts +142 -26
  29. package/src/commands/refresh-log.ts +37 -0
  30. package/src/commands/template-engine.ts +260 -0
  31. package/src/commands/watch.ts +430 -0
  32. package/src/config.ts +71 -0
  33. package/src/detect/adapters/nextjs-trpc.ts +166 -0
  34. package/src/detect/adapters/parse-guard.ts +133 -0
  35. package/src/detect/adapters/python-django.ts +208 -0
  36. package/src/detect/adapters/python-fastapi.ts +223 -0
  37. package/src/detect/adapters/query-helpers.ts +170 -0
  38. package/src/detect/adapters/runner.ts +252 -0
  39. package/src/detect/adapters/swift-swiftui.ts +171 -0
  40. package/src/detect/adapters/tree-sitter-loader.ts +467 -0
  41. package/src/detect/adapters/types.ts +173 -0
  42. package/src/detect/codebase-introspector.ts +190 -0
  43. package/src/detect/index.ts +28 -2
  44. package/src/detect/migrate.ts +4 -4
  45. package/src/detect/regex-fallback.ts +449 -0
  46. package/src/hooks/session-start.ts +94 -3
  47. package/src/lib/gitToplevel.ts +22 -0
  48. package/src/lib/installLock.ts +179 -0
  49. package/src/lib/pidLiveness.ts +67 -0
  50. package/src/lsp/auto-detect.ts +98 -0
  51. package/src/lsp/client.ts +776 -0
  52. package/src/lsp/enrich.ts +127 -0
  53. package/src/lsp/types.ts +221 -0
  54. package/src/watch/daemon.ts +385 -0
  55. package/src/watch/lockfile-detector.ts +65 -0
  56. package/src/watch/paths.ts +279 -0
  57. package/src/watch/state.ts +178 -0
@@ -0,0 +1,467 @@
1
+ // Copyright (c) 2026 Massu. All rights reserved.
2
+ // Licensed under BSL 1.1 - see LICENSE file for details.
3
+
4
+ /**
5
+ * Plan 3b — Phase 1: Tree-sitter WASM grammar loader (Strategy A).
6
+ *
7
+ * Strategy A: grammars are NOT bundled in the npm tarball. The loader downloads
8
+ * each requested grammar at first use from a pinned URL, verifies SHA-256
9
+ * against a hardcoded manifest, caches under `~/.massu/wasm-cache/`.
10
+ *
11
+ * Security model (Phase 3.5 #3):
12
+ * - SHA-256 manifest hardcoded HERE — never network-fetched.
13
+ * - Mismatch → throw `GrammarSHAMismatchError`. NO silent fallback.
14
+ * - Atomic cache write: `<lang>-<sha>.wasm.tmp.<pid>` → rename → final.
15
+ * - Offline + no-cache → throw `GrammarUnavailableError` so the runner can
16
+ * translate to a regex-fallback path with a stderr note.
17
+ *
18
+ * Phase 1 ships the CODE PATH; the actual SHA-256 values for each grammar
19
+ * URL are placeholders pending Phase 9 release-prep (`curl <url> | shasum
20
+ * -a 256`). The placeholder string is intentionally non-empty so the
21
+ * verification logic exercises the comparison branch in tests.
22
+ */
23
+
24
+ import { createHash } from 'crypto';
25
+ import {
26
+ mkdirSync,
27
+ readdirSync,
28
+ readFileSync,
29
+ writeFileSync,
30
+ renameSync,
31
+ unlinkSync,
32
+ lstatSync,
33
+ chmodSync,
34
+ utimesSync,
35
+ } from 'fs';
36
+ import { homedir } from 'os';
37
+ import { dirname, join } from 'path';
38
+ import { Language, Parser } from 'web-tree-sitter';
39
+ import type { TreeSitterLanguage } from './types.ts';
40
+
41
+ // ============================================================
42
+ // Typed errors
43
+ // ============================================================
44
+
45
+ /** Thrown when downloaded WASM SHA-256 doesn't match the hardcoded manifest. */
46
+ export class GrammarSHAMismatchError extends Error {
47
+ public readonly language: TreeSitterLanguage;
48
+ public readonly expected: string;
49
+ public readonly actual: string;
50
+ constructor(language: TreeSitterLanguage, expected: string, actual: string) {
51
+ super(
52
+ `[tree-sitter-loader] SHA-256 mismatch for grammar "${language}". ` +
53
+ `Expected ${expected}, got ${actual}. ` +
54
+ `REFUSING to load — see Phase 3.5 audit attack vector #3.`,
55
+ );
56
+ this.name = 'GrammarSHAMismatchError';
57
+ this.language = language;
58
+ this.expected = expected;
59
+ this.actual = actual;
60
+ }
61
+ }
62
+
63
+ /** Thrown when a grammar can't be obtained: download failed AND cache empty. */
64
+ export class GrammarUnavailableError extends Error {
65
+ public readonly language: TreeSitterLanguage;
66
+ public readonly cause?: unknown;
67
+ constructor(language: TreeSitterLanguage, cause?: unknown) {
68
+ const causeMsg =
69
+ cause instanceof Error ? cause.message : cause ? String(cause) : 'no cached grammar and download failed';
70
+ super(
71
+ `[tree-sitter-loader] Grammar for "${language}" is unavailable: ${causeMsg}. ` +
72
+ `Falling back to regex introspection for files in ${language}.`,
73
+ );
74
+ this.name = 'GrammarUnavailableError';
75
+ this.language = language;
76
+ this.cause = cause;
77
+ }
78
+ }
79
+
80
+ /**
81
+ * Thrown when the cache path resolves to a symlink (or any non-regular
82
+ * file). Pre-creating a symlink at the expected cache path is a known
83
+ * vector for redirecting reads/writes elsewhere on the filesystem.
84
+ * (Phase 3.5 finding #3 — symlink attack on cache dir.)
85
+ */
86
+ export class GrammarCacheSymlinkError extends Error {
87
+ public readonly cachePath: string;
88
+ constructor(cachePath: string) {
89
+ super(
90
+ `[tree-sitter-loader] Refusing to load grammar — cache path "${cachePath}" is a symlink ` +
91
+ `or non-regular file. (Phase 3.5 finding #3 — symlink attack vector.)`,
92
+ );
93
+ this.name = 'GrammarCacheSymlinkError';
94
+ this.cachePath = cachePath;
95
+ }
96
+ }
97
+
98
+ /**
99
+ * Thrown when a manifest URL is not HTTPS. The manifest is hardcoded in
100
+ * source, but defense in depth: any future edit that introduces an http://
101
+ * URL is rejected at load time, not at code review.
102
+ * (Phase 3.5 finding #3 — MITM on download.)
103
+ */
104
+ export class GrammarUrlNotHttpsError extends Error {
105
+ public readonly url: string;
106
+ constructor(url: string) {
107
+ super(
108
+ `[tree-sitter-loader] Refusing to download grammar from non-HTTPS URL: ${url}. ` +
109
+ `Only https:// URLs are accepted. (Phase 3.5 finding #3.)`,
110
+ );
111
+ this.name = 'GrammarUrlNotHttpsError';
112
+ this.url = url;
113
+ }
114
+ }
115
+
116
+ // ============================================================
117
+ // Pinned manifest
118
+ // ============================================================
119
+
120
+ interface ManifestEntry {
121
+ url: string;
122
+ sha256: string;
123
+ version: string;
124
+ }
125
+
126
+ /**
127
+ * Hardcoded grammar manifest. Source-code-resident; tampering requires a
128
+ * release.
129
+ *
130
+ * Source: `tree-sitter-wasms` npm package (https://npm.im/tree-sitter-wasms)
131
+ * — pre-built WASM binaries for Tree-sitter language parsers. NOT added as
132
+ * a dependency (per plan §Phase 0 ban on bundling); fetched from unpkg at
133
+ * first use. The individual `tree-sitter-<lang>` packages on npm do NOT
134
+ * ship `.wasm` files, only C source + native .node prebuilds — confirmed
135
+ * by inspecting unpkg `?meta` listings during Phase 9 release-prep.
136
+ *
137
+ * SHA-256 hashes computed 2026-04-28 via:
138
+ * curl -fsSL <url> | shasum -a 256
139
+ *
140
+ * The verification code path is exercised in `tree-sitter-loader.test.ts`
141
+ * by injecting test manifest entries that intentionally mismatch.
142
+ */
143
+ export const GRAMMAR_MANIFEST: Partial<Record<TreeSitterLanguage, ManifestEntry>> = {
144
+ python: {
145
+ url: 'https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-python.wasm',
146
+ sha256: '9056d0fb0c337810d019fae350e8167786119da98f0f282aceae7ab89ee8253b',
147
+ version: '0.1.13',
148
+ },
149
+ typescript: {
150
+ url: 'https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-typescript.wasm',
151
+ sha256: '8515404dceed38e1ed86aa34b09fcf3379fff1b4ff9dd3967bcd6d1eb5ac3d8f',
152
+ version: '0.1.13',
153
+ },
154
+ javascript: {
155
+ url: 'https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-javascript.wasm',
156
+ sha256: '63812b9e275d26851264734868d27a1656bd44a2ef6eb3e85e6b03728c595ab5',
157
+ version: '0.1.13',
158
+ },
159
+ swift: {
160
+ url: 'https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-swift.wasm',
161
+ sha256: '41c4fdb2249a3aa6d87eed0d383081ff09725c2248b4977043a43825980ffcc7',
162
+ version: '0.1.13',
163
+ },
164
+ };
165
+
166
+ // ============================================================
167
+ // Cache + Parser init
168
+ // ============================================================
169
+
170
+ function getCacheDir(): string {
171
+ return process.env.MASSU_WASM_CACHE_DIR ?? join(homedir(), '.massu', 'wasm-cache');
172
+ }
173
+
174
+ function getCachedPath(language: TreeSitterLanguage, sha: string): string {
175
+ return join(getCacheDir(), `${language}-${sha}.wasm`);
176
+ }
177
+
178
+ // ============================================================
179
+ // LRU cache eviction (Phase 3.5 audit F-011 — closed 2026-05-06)
180
+ // ============================================================
181
+ //
182
+ // F-011 was deferred at v1 ("at ~3MB per grammar, full cache footprint is
183
+ // <100MB — not an attack vector"). The 2026-05-06 audit-leak retrospective
184
+ // elevated it: now that the cache path + naming convention are publicly
185
+ // known (the security audit doc was visible for 9 days), opportunistic
186
+ // disk-fill attacks become slightly less hypothetical, AND the cost of
187
+ // retrofitting LRU once Plan 3c expands the supported grammar set is
188
+ // strictly higher than doing it now while only 4 grammars exist.
189
+ //
190
+ // Eviction rule: keep the N most-recently-USED entries (mtime, updated by
191
+ // the cache-hit path on every read). Default cap = 16 — leaves headroom
192
+ // for Plan 3c's 31-grammar expansion plus dev-time version churn, while
193
+ // bounding total cache to ~50MB at 3MB/grammar.
194
+
195
+ const DEFAULT_CACHE_RETAIN_COUNT = 16;
196
+
197
+ function getCacheRetainCount(): number {
198
+ const env = process.env.MASSU_WASM_CACHE_RETAIN;
199
+ if (env) {
200
+ const n = Number(env);
201
+ if (Number.isFinite(n) && n >= 1 && n <= 1024) return Math.floor(n);
202
+ }
203
+ return DEFAULT_CACHE_RETAIN_COUNT;
204
+ }
205
+
206
+ /**
207
+ * Touch a cache file's mtime to mark "most recently used." Called on every
208
+ * cache-hit. Best-effort: any failure is silently swallowed — touching is
209
+ * an optimization signal for eviction, not load-bearing.
210
+ *
211
+ * Uses utimes via writeFileSync round-trip would be expensive; instead we
212
+ * use the same filesystem touch trick as `touch -a`: open + close. On
213
+ * macOS/Linux Node, `chmodSync` to the same mode does NOT update mtime,
214
+ * so we do a no-op write of empty content via a tmp marker file. Cheaper
215
+ * approach: just rely on atime if filesystem records it. Most modern
216
+ * filesystems are mounted with `relatime` so atime updates only when
217
+ * older than mtime — which means after our first eviction-relevant
218
+ * read, atime IS the right signal.
219
+ *
220
+ * Decision: use mtime via `utimesSync` — explicit and portable.
221
+ */
222
+ function touchCacheFile(path: string): void {
223
+ try {
224
+ const now = new Date();
225
+ utimesSync(path, now, now);
226
+ } catch {
227
+ // best-effort
228
+ }
229
+ }
230
+
231
+ /**
232
+ * Evict cache entries beyond the retain count, keeping the N most recently
233
+ * used (by mtime). Called after every successful cache write. Best-effort:
234
+ * eviction failure never blocks a load.
235
+ *
236
+ * Rejects symlinks and non-regular files via lstat — the same defense as
237
+ * the cache-hit path (F-008 fix). A symlink in the cache dir is logged
238
+ * as a security warning but not deleted (don't act on attacker-controlled
239
+ * paths automatically).
240
+ */
241
+ function evictBeyondRetainCount(retain: number = getCacheRetainCount()): void {
242
+ const dir = getCacheDir();
243
+ let entries: string[];
244
+ try {
245
+ entries = readdirSync(dir);
246
+ } catch {
247
+ return; // Dir doesn't exist yet; nothing to evict.
248
+ }
249
+
250
+ const candidates: { path: string; mtimeMs: number }[] = [];
251
+ for (const name of entries) {
252
+ if (!name.endsWith('.wasm')) continue; // Don't touch non-grammar files.
253
+ const path = join(dir, name);
254
+ let stat;
255
+ try {
256
+ stat = lstatSync(path);
257
+ } catch {
258
+ continue;
259
+ }
260
+ if (stat.isSymbolicLink() || !stat.isFile()) {
261
+ // Skip — never automatically delete what could be an attacker-placed
262
+ // symlink. Surface via stderr; user's cache dir is suspect.
263
+ console.error(
264
+ `[tree-sitter-loader] cache eviction skipped non-regular file: ${path} ` +
265
+ `(possible symlink attack — see Phase 3.5 finding F-008).`,
266
+ );
267
+ continue;
268
+ }
269
+ candidates.push({ path, mtimeMs: stat.mtimeMs });
270
+ }
271
+
272
+ if (candidates.length <= retain) return;
273
+
274
+ // Sort newest-first; everything beyond `retain` is evictable.
275
+ candidates.sort((a, b) => b.mtimeMs - a.mtimeMs);
276
+ for (const victim of candidates.slice(retain)) {
277
+ try {
278
+ unlinkSync(victim.path);
279
+ } catch {
280
+ // best-effort
281
+ }
282
+ }
283
+ }
284
+
285
+ /** Test-injection hook: lets tests force eviction without writing a new grammar. */
286
+ export function _evictCacheForTest(retain?: number): void {
287
+ evictBeyondRetainCount(retain);
288
+ }
289
+
290
+ function sha256(bytes: Uint8Array): string {
291
+ return createHash('sha256').update(bytes).digest('hex');
292
+ }
293
+
294
+ let parserInitPromise: Promise<void> | null = null;
295
+
296
+ /**
297
+ * `Parser.init()` is async and must be called once before any `new Parser()`.
298
+ * This function is idempotent — repeated calls return the same promise.
299
+ *
300
+ * Test harnesses can mock this by stubbing `Parser.init`.
301
+ */
302
+ export async function ensureParserInitialized(): Promise<void> {
303
+ if (parserInitPromise) return parserInitPromise;
304
+ parserInitPromise = Parser.init();
305
+ return parserInitPromise;
306
+ }
307
+
308
+ // ============================================================
309
+ // Loader (the main entry point)
310
+ // ============================================================
311
+
312
+ interface LoaderOptions {
313
+ /**
314
+ * Test-injection: override the manifest entry for a language. Production
315
+ * callers leave this undefined; tests use it to exercise SHA-mismatch and
316
+ * download-failure paths.
317
+ */
318
+ manifestOverride?: Partial<Record<TreeSitterLanguage, ManifestEntry>>;
319
+ /**
320
+ * Test-injection: override the fetch implementation. Defaults to global
321
+ * `fetch`. Tests pass a mock that returns a fixed body or throws.
322
+ */
323
+ fetchImpl?: (url: string) => Promise<{ ok: boolean; arrayBuffer: () => Promise<ArrayBuffer>; status?: number }>;
324
+ }
325
+
326
+ const loadedGrammars = new Map<TreeSitterLanguage, Language>();
327
+
328
+ /**
329
+ * Lazy-load a Tree-sitter grammar. Only fetches/caches the grammar for
330
+ * `language`; other languages are unaffected.
331
+ *
332
+ * Order:
333
+ * 1. In-memory cache hit → return.
334
+ * 2. Disk cache hit + SHA verify pass → load from disk.
335
+ * 3. Disk cache hit + SHA mismatch → throw GrammarSHAMismatchError.
336
+ * 4. Cache miss → fetch from pinned URL → SHA verify → atomic write → load.
337
+ * 5. Fetch fails AND no cache → throw GrammarUnavailableError.
338
+ */
339
+ export async function loadGrammar(
340
+ language: TreeSitterLanguage,
341
+ options: LoaderOptions = {},
342
+ ): Promise<Language> {
343
+ await ensureParserInitialized();
344
+
345
+ const cached = loadedGrammars.get(language);
346
+ if (cached) return cached;
347
+
348
+ const manifest = options.manifestOverride?.[language] ?? GRAMMAR_MANIFEST[language];
349
+ if (!manifest) {
350
+ throw new GrammarUnavailableError(
351
+ language,
352
+ new Error(`No manifest entry for language "${language}". v1 supports: ${Object.keys(GRAMMAR_MANIFEST).join(', ')}.`),
353
+ );
354
+ }
355
+
356
+ const cachePath = getCachedPath(language, manifest.sha256);
357
+
358
+ // 2/3: disk cache check. Use lstatSync (NOT statSync) so a symlink at
359
+ // the cache path is detected and rejected — never followed.
360
+ // (Phase 3.5 finding #3 — symlink attack on cache dir.)
361
+ let cacheLstat;
362
+ try {
363
+ cacheLstat = lstatSync(cachePath);
364
+ } catch {
365
+ cacheLstat = null;
366
+ }
367
+ if (cacheLstat) {
368
+ if (cacheLstat.isSymbolicLink() || !cacheLstat.isFile()) {
369
+ throw new GrammarCacheSymlinkError(cachePath);
370
+ }
371
+ let bytes: Uint8Array;
372
+ try {
373
+ bytes = readFileSync(cachePath);
374
+ } catch (e) {
375
+ // Treat read failure as cache miss; fall through to download.
376
+ bytes = new Uint8Array(0);
377
+ }
378
+ if (bytes.byteLength > 0) {
379
+ const actualSha = sha256(bytes);
380
+ if (actualSha !== manifest.sha256) {
381
+ // Refuse to load. Don't silently re-download — that would mask
382
+ // tampering of the on-disk cache.
383
+ throw new GrammarSHAMismatchError(language, manifest.sha256, actualSha);
384
+ }
385
+ const lang = await Language.load(bytes);
386
+ loadedGrammars.set(language, lang);
387
+ // F-011 LRU: mark this entry as most-recently-used so it survives
388
+ // future evictions.
389
+ touchCacheFile(cachePath);
390
+ return lang;
391
+ }
392
+ }
393
+
394
+ // 4/5: download. Defense in depth: refuse non-HTTPS URLs.
395
+ if (!/^https:\/\//i.test(manifest.url)) {
396
+ throw new GrammarUrlNotHttpsError(manifest.url);
397
+ }
398
+
399
+ const fetchImpl = options.fetchImpl ?? (globalThis.fetch as LoaderOptions['fetchImpl']);
400
+ if (!fetchImpl) {
401
+ throw new GrammarUnavailableError(
402
+ language,
403
+ new Error('No fetch implementation available (Node < 18?)'),
404
+ );
405
+ }
406
+
407
+ let body: Uint8Array;
408
+ try {
409
+ const res = await fetchImpl(manifest.url);
410
+ if (!res.ok) {
411
+ throw new Error(`HTTP ${res.status ?? 'unknown'} from ${manifest.url}`);
412
+ }
413
+ body = new Uint8Array(await res.arrayBuffer());
414
+ } catch (e) {
415
+ throw new GrammarUnavailableError(language, e);
416
+ }
417
+
418
+ const downloadedSha = sha256(body);
419
+ if (downloadedSha !== manifest.sha256) {
420
+ throw new GrammarSHAMismatchError(language, manifest.sha256, downloadedSha);
421
+ }
422
+
423
+ // Atomic cache write. Always create the dir first.
424
+ // Mode 0o700 on the dir + 0o600 on files — owner-only access prevents
425
+ // local information disclosure of cached grammars.
426
+ // (Phase 3.5 finding #3 — file-mode hardening.)
427
+ try {
428
+ mkdirSync(dirname(cachePath), { recursive: true, mode: 0o700 });
429
+ try { chmodSync(dirname(cachePath), 0o700); } catch { /* best effort */ }
430
+ const tmpPath = `${cachePath}.tmp.${process.pid}`;
431
+ writeFileSync(tmpPath, body, { mode: 0o600 });
432
+ try { chmodSync(tmpPath, 0o600); } catch { /* best effort */ }
433
+ try {
434
+ renameSync(tmpPath, cachePath);
435
+ try { chmodSync(cachePath, 0o600); } catch { /* best effort */ }
436
+ } catch (e) {
437
+ // Try to clean up the tmp file on rename failure
438
+ try {
439
+ unlinkSync(tmpPath);
440
+ } catch {
441
+ /* ignore */
442
+ }
443
+ throw e;
444
+ }
445
+ // F-011 LRU: prune cache to retain count after every successful write.
446
+ // Best-effort — eviction failure never blocks a load.
447
+ evictBeyondRetainCount();
448
+ } catch (e) {
449
+ // Cache write failure is non-fatal — we still have `body` in memory and
450
+ // can load directly. Log to stderr per VR-USER-ERROR-MESSAGES style.
451
+ console.error(
452
+ `[tree-sitter-loader] cache write failed for ${language}: ${e instanceof Error ? e.message : String(e)} — loading directly from memory.`,
453
+ );
454
+ }
455
+
456
+ const lang = await Language.load(body);
457
+ loadedGrammars.set(language, lang);
458
+ return lang;
459
+ }
460
+
461
+ /**
462
+ * Test-only: clear in-memory loaded grammar cache. Disk cache persists.
463
+ * Production code never needs this; the in-memory map lives for the process.
464
+ */
465
+ export function __resetLoadedGrammars(): void {
466
+ loadedGrammars.clear();
467
+ }
@@ -0,0 +1,173 @@
1
+ // Copyright (c) 2026 Massu. All rights reserved.
2
+ // Licensed under BSL 1.1 - see LICENSE file for details.
3
+
4
+ /**
5
+ * Plan 3b — Phase 1: AST Adapter contract types.
6
+ *
7
+ * Lives at `packages/core/src/detect/adapters/types.ts`. All types are local —
8
+ * NONE re-exported from `web-tree-sitter`.
9
+ *
10
+ * Adapter authors import from this module only; the runner (`runner.ts`)
11
+ * orchestrates execution and the loader (`tree-sitter-loader.ts`) handles
12
+ * grammar acquisition.
13
+ *
14
+ * Per-field confidence is enforced (NOT per-adapter): a single weak field
15
+ * MUST NOT poison the rest. The runner consumes `confidence` per-adapter for
16
+ * the moment, but the merge rule reads each `conventions[field]` against the
17
+ * provenance trail to decide what survives.
18
+ */
19
+
20
+ // ============================================================
21
+ // Languages enumerated for the AST adapter set (Phase 1 + 3c)
22
+ // ============================================================
23
+
24
+ /**
25
+ * Closed-set of Tree-sitter grammars massu ships first-party adapters for.
26
+ *
27
+ * Note: this is a string-literal union, NOT re-exported from `web-tree-sitter`
28
+ * (which exposes `Language` as a class, not a name list). Phase 1 ships
29
+ * adapters for python/typescript/javascript/swift only — the remaining
30
+ * languages are reserved for Plan 3c.
31
+ */
32
+ export type TreeSitterLanguage =
33
+ | 'python'
34
+ | 'typescript'
35
+ | 'javascript'
36
+ | 'swift'
37
+ | 'rust'
38
+ | 'go'
39
+ | 'ruby'
40
+ | 'php'
41
+ | 'java'
42
+ | 'kotlin'
43
+ | 'elixir'
44
+ | 'erlang'
45
+ | 'csharp'
46
+ | 'cpp'
47
+ | 'haskell'
48
+ | 'ocaml';
49
+
50
+ // ============================================================
51
+ // Inputs to adapter dispatch
52
+ // ============================================================
53
+
54
+ /**
55
+ * Read-only signal bundle the runner builds BEFORE adapter dispatch.
56
+ *
57
+ * Adapters consume signals to answer `matches()` cheaply (no file IO inside
58
+ * `matches()` — that's why the bundle is built up-front).
59
+ */
60
+ export interface DetectionSignals {
61
+ /** Parsed `package.json` (root or first workspace) — undefined if absent. */
62
+ packageJson?: Record<string, unknown>;
63
+ /** Parsed `pyproject.toml` — undefined if absent. */
64
+ pyprojectToml?: Record<string, unknown>;
65
+ /** Raw `Gemfile` text — undefined if absent. */
66
+ gemfile?: string;
67
+ /** Parsed `Cargo.toml` — undefined if absent. */
68
+ cargoToml?: Record<string, unknown>;
69
+ /** Raw `go.mod` text — undefined if absent. */
70
+ goMod?: string;
71
+ /** Set of present directory names directly under the project root (one level). */
72
+ presentDirs: Set<string>;
73
+ /** Set of present file basenames directly under the project root (one level). */
74
+ presentFiles: Set<string>;
75
+ }
76
+
77
+ /**
78
+ * A sampled source file the runner hands to the adapter.
79
+ *
80
+ * `content` is pre-read; adapters MUST NOT re-read from disk inside
81
+ * `introspect()`. `size` is in bytes (pre-read length).
82
+ */
83
+ export interface SourceFile {
84
+ path: string;
85
+ content: string;
86
+ language: TreeSitterLanguage;
87
+ size: number;
88
+ }
89
+
90
+ // ============================================================
91
+ // Adapter contract
92
+ // ============================================================
93
+
94
+ /**
95
+ * Trail entry produced for every captured field — the user can audit
96
+ * `detected.<adapter>._provenance` to see exactly which file/line/query
97
+ * produced a value.
98
+ */
99
+ export interface Provenance {
100
+ field: string;
101
+ sourceFile: string;
102
+ line: number;
103
+ query: string;
104
+ }
105
+
106
+ export interface AdapterResult {
107
+ /**
108
+ * Becomes `detected.<adapter.id>` in `massu.config.yaml`. Field names are
109
+ * adapter-defined; values are `unknown` so adapters can return strings,
110
+ * arrays, or nested records as needed.
111
+ */
112
+ conventions: Record<string, unknown>;
113
+ /**
114
+ * Per-field provenance trail. The runner writes this to
115
+ * `detected.<adapter.id>._provenance` so a downstream auditor can verify
116
+ * any extracted value.
117
+ */
118
+ provenance: Provenance[];
119
+ /**
120
+ * 'high' : single canonical match, query produced exactly one result
121
+ * 'medium': multiple matches, all agree
122
+ * 'low' : multiple matches with disagreement (still emitted, with warning)
123
+ * 'none' : no matches, timed out, or threw — fields are dropped
124
+ */
125
+ confidence: 'high' | 'medium' | 'low' | 'none';
126
+ }
127
+
128
+ export interface CodebaseAdapter {
129
+ /** Stable adapter id, e.g. "python-fastapi". Becomes `detected.<id>` block. */
130
+ id: string;
131
+ /** Languages this adapter consumes. Used by the runner to skip work. */
132
+ languages: TreeSitterLanguage[];
133
+ /**
134
+ * Cheap signal check — must NOT do file IO. Returns true if any signal
135
+ * suggests this adapter should run.
136
+ */
137
+ matches(signals: DetectionSignals): boolean;
138
+ /**
139
+ * Sample N files (already read by the runner), run AST queries, return
140
+ * extracted conventions. May throw — the runner isolates failures.
141
+ */
142
+ introspect(files: SourceFile[], rootDir: string): Promise<AdapterResult>;
143
+ }
144
+
145
+ // ============================================================
146
+ // Runner output
147
+ // ============================================================
148
+
149
+ /**
150
+ * The runner's output: per-adapter id → its conventions block (with the
151
+ * `_provenance` map merged in). The introspector then folds this into the
152
+ * `detected.<adapter.id>` namespace alongside the existing
153
+ * `detected.python` / `detected.swift` / `detected.typescript` regex blocks.
154
+ */
155
+ export interface MergedAdapterOutput {
156
+ /** Per-adapter id → resolved conventions. */
157
+ byAdapter: Record<string, AdapterResolved>;
158
+ /** Adapters that were skipped (didn't match) for diagnostic logging. */
159
+ skipped: string[];
160
+ /** Adapters that threw during introspect — runner isolates these. */
161
+ errored: Array<{ adapterId: string; error: string }>;
162
+ }
163
+
164
+ /**
165
+ * Resolved-and-merged form of an `AdapterResult`. Provenance is folded into
166
+ * `_provenance` (key per field, value = `path:line :: query`).
167
+ */
168
+ export interface AdapterResolved {
169
+ conventions: Record<string, unknown>;
170
+ /** field-name -> "relativePath:line :: queryName". Empty when no fields. */
171
+ _provenance: Record<string, string>;
172
+ confidence: 'high' | 'medium' | 'low' | 'none';
173
+ }