@massu/core 1.2.1 → 1.4.0-soak.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +40 -0
  2. package/commands/README.md +137 -0
  3. package/commands/massu-deploy.python-docker.md +170 -0
  4. package/commands/massu-deploy.python-fly.md +189 -0
  5. package/commands/massu-deploy.python-launchd.md +144 -0
  6. package/commands/massu-deploy.python-systemd.md +163 -0
  7. package/commands/massu-deploy.python.md +200 -0
  8. package/commands/massu-scaffold-page.md +172 -59
  9. package/commands/massu-scaffold-page.swift.md +121 -0
  10. package/commands/massu-scaffold-router.python-django.md +153 -0
  11. package/commands/massu-scaffold-router.python-fastapi.md +145 -0
  12. package/commands/massu-scaffold-router.python.md +143 -0
  13. package/dist/cli.js +10170 -4138
  14. package/dist/hooks/auto-learning-pipeline.js +44 -6
  15. package/dist/hooks/classify-failure.js +44 -6
  16. package/dist/hooks/cost-tracker.js +44 -6
  17. package/dist/hooks/fix-detector.js +44 -6
  18. package/dist/hooks/incident-pipeline.js +44 -6
  19. package/dist/hooks/post-edit-context.js +44 -6
  20. package/dist/hooks/post-tool-use.js +44 -6
  21. package/dist/hooks/pre-compact.js +44 -6
  22. package/dist/hooks/pre-delete-check.js +44 -6
  23. package/dist/hooks/quality-event.js +44 -6
  24. package/dist/hooks/rule-enforcement-pipeline.js +44 -6
  25. package/dist/hooks/session-end.js +44 -6
  26. package/dist/hooks/session-start.js +4789 -410
  27. package/dist/hooks/user-prompt.js +44 -6
  28. package/package.json +10 -4
  29. package/src/cli.ts +28 -2
  30. package/src/commands/config-refresh.ts +88 -20
  31. package/src/commands/init.ts +130 -23
  32. package/src/commands/install-commands.ts +482 -42
  33. package/src/commands/refresh-log.ts +37 -0
  34. package/src/commands/show-template.ts +65 -0
  35. package/src/commands/template-engine.ts +262 -0
  36. package/src/commands/watch.ts +430 -0
  37. package/src/config.ts +69 -3
  38. package/src/detect/adapters/nextjs-trpc.ts +166 -0
  39. package/src/detect/adapters/parse-guard.ts +133 -0
  40. package/src/detect/adapters/python-django.ts +208 -0
  41. package/src/detect/adapters/python-fastapi.ts +223 -0
  42. package/src/detect/adapters/query-helpers.ts +170 -0
  43. package/src/detect/adapters/runner.ts +252 -0
  44. package/src/detect/adapters/swift-swiftui.ts +171 -0
  45. package/src/detect/adapters/tree-sitter-loader.ts +348 -0
  46. package/src/detect/adapters/types.ts +174 -0
  47. package/src/detect/codebase-introspector.ts +190 -0
  48. package/src/detect/index.ts +28 -2
  49. package/src/detect/regex-fallback.ts +449 -0
  50. package/src/hooks/session-start.ts +94 -3
  51. package/src/lib/gitToplevel.ts +22 -0
  52. package/src/lib/installLock.ts +179 -0
  53. package/src/lib/pidLiveness.ts +67 -0
  54. package/src/lsp/auto-detect.ts +89 -0
  55. package/src/lsp/client.ts +590 -0
  56. package/src/lsp/enrich.ts +127 -0
  57. package/src/lsp/types.ts +221 -0
  58. package/src/watch/daemon.ts +385 -0
  59. package/src/watch/lockfile-detector.ts +65 -0
  60. package/src/watch/paths.ts +279 -0
  61. package/src/watch/state.ts +178 -0
@@ -0,0 +1,171 @@
1
+ // Copyright (c) 2026 Massu. All rights reserved.
2
+ // Licensed under BSL 1.1 - see LICENSE file for details.
3
+
4
+ /**
5
+ * Plan 3b — Phase 1: SwiftUI AST adapter.
6
+ *
7
+ * Extracts:
8
+ * - api_client_class: identifier ending in `API` (e.g. `HedgeAPI`)
9
+ * - biometric_policy: `LAPolicy.deviceOwnerAuthenticationWithBiometrics` etc.
10
+ * - navigation_pattern: 'NavigationStack' | 'NavigationView' | null
11
+ *
12
+ * Tree-sitter Swift grammar quirks: the `tree-sitter-swift` grammar names some
13
+ * nodes differently from python/typescript. We use simpler, more permissive
14
+ * S-expressions that fall back to capture-text matching where needed.
15
+ */
16
+
17
+ import { Parser } from 'web-tree-sitter';
18
+ import type { CodebaseAdapter, AdapterResult, DetectionSignals, Provenance, SourceFile } from './types.ts';
19
+ import { runQuery, InvalidQueryError } from './query-helpers.ts';
20
+ import { loadGrammar } from './tree-sitter-loader.ts';
21
+ import { isParsableSource, MAX_AST_FILE_BYTES } from './parse-guard.ts';
22
+
23
+ // ============================================================
24
+ // Queries
25
+ // ============================================================
26
+
27
+ /**
28
+ * Identifier that looks like an API client class. Captures any uppercase-led
29
+ * identifier ending in `API`. Predicate filtering is done in JS — Swift's
30
+ * grammar doesn't surface a clean class-instantiation pattern uniformly.
31
+ */
32
+ const API_CLASS_QUERY = `
33
+ (simple_identifier) @ident
34
+ `;
35
+
36
+ /**
37
+ * `.deviceOwnerAuthentication` / `.deviceOwnerAuthenticationWithBiometrics`
38
+ * member access. Captures the property name.
39
+ */
40
+ const POLICY_QUERY = `
41
+ (navigation_expression
42
+ suffix: (navigation_suffix
43
+ (simple_identifier) @policy_name))
44
+ `;
45
+
46
+ /**
47
+ * NavigationStack / NavigationView usage. Captures any reference to either
48
+ * symbol.
49
+ */
50
+ const NAV_QUERY = `
51
+ (simple_identifier) @nav_ident
52
+ `;
53
+
54
+ // ============================================================
55
+ // Adapter
56
+ // ============================================================
57
+
58
+ const POLICY_NAMES = new Set([
59
+ 'deviceOwnerAuthentication',
60
+ 'deviceOwnerAuthenticationWithBiometrics',
61
+ ]);
62
+
63
+ export const swiftSwiftUiAdapter: CodebaseAdapter = {
64
+ id: 'swift-swiftui',
65
+ languages: ['swift'],
66
+
67
+ matches(signals: DetectionSignals): boolean {
68
+ // Swift signal: presence of Package.swift, *.xcodeproj, or Sources/ dir
69
+ if (signals.presentFiles.has('Package.swift')) return true;
70
+ for (const dir of signals.presentDirs) {
71
+ if (dir.endsWith('.xcodeproj') || dir.endsWith('.xcworkspace')) return true;
72
+ if (dir === 'Sources') return true;
73
+ }
74
+ for (const file of signals.presentFiles) {
75
+ if (file.endsWith('.swift')) return true;
76
+ }
77
+ return false;
78
+ },
79
+
80
+ async introspect(files: SourceFile[], _rootDir: string): Promise<AdapterResult> {
81
+ if (files.length === 0) {
82
+ return { conventions: {}, provenance: [], confidence: 'none' };
83
+ }
84
+
85
+ let language;
86
+ try {
87
+ language = await loadGrammar('swift');
88
+ } catch {
89
+ return { conventions: {}, provenance: [], confidence: 'none' };
90
+ }
91
+
92
+ const parser = new Parser();
93
+ parser.setLanguage(language);
94
+
95
+ const apiClasses = new Map<string, { line: number; file: string }>();
96
+ const policies = new Map<string, { line: number; file: string }>();
97
+ const navs = new Map<string, { line: number; file: string }>();
98
+
99
+ try {
100
+ for (const file of files) {
101
+ const skip = isParsableSource(file.content, file.size);
102
+ if (skip) {
103
+ process.stderr.write(
104
+ `[massu/ast] WARN: swift-swiftui skipping ${file.path}: ${skip.reason} (${skip.detail}). Cap=${MAX_AST_FILE_BYTES}. (Phase 3.5 mitigation)\n`,
105
+ );
106
+ continue;
107
+ }
108
+ try {
109
+ // API class names: filter via JS regex on the captured identifier
110
+ for (const hit of runQuery(parser, file.content, API_CLASS_QUERY, 'swift-api-class', file.path)) {
111
+ const ident = hit.captures.ident;
112
+ if (ident && /^[A-Z][A-Za-z0-9_]*API$/.test(ident) && !apiClasses.has(ident)) {
113
+ apiClasses.set(ident, { line: hit.line, file: file.path });
114
+ }
115
+ }
116
+ // Biometric policy
117
+ for (const hit of runQuery(parser, file.content, POLICY_QUERY, 'swift-biometric-policy', file.path)) {
118
+ const name = hit.captures.policy_name;
119
+ if (name && POLICY_NAMES.has(name) && !policies.has(name)) {
120
+ policies.set(name, { line: hit.line, file: file.path });
121
+ }
122
+ }
123
+ // Navigation
124
+ for (const hit of runQuery(parser, file.content, NAV_QUERY, 'swift-navigation', file.path)) {
125
+ const ident = hit.captures.nav_ident;
126
+ if ((ident === 'NavigationStack' || ident === 'NavigationView') && !navs.has(ident)) {
127
+ navs.set(ident, { line: hit.line, file: file.path });
128
+ }
129
+ }
130
+ } catch (e) {
131
+ if (e instanceof InvalidQueryError) throw e;
132
+ continue;
133
+ }
134
+ }
135
+ } finally {
136
+ try { parser.delete(); } catch { /* ignore */ }
137
+ }
138
+
139
+ const conventions: Record<string, unknown> = {};
140
+ const provenance: Provenance[] = [];
141
+
142
+ if (apiClasses.size > 0) {
143
+ const [name, { line, file }] = apiClasses.entries().next().value as [string, { line: number; file: string }];
144
+ conventions.api_client_class = name;
145
+ provenance.push({ field: 'api_client_class', sourceFile: file, line, query: 'swift-api-class' });
146
+ }
147
+ if (policies.size > 0) {
148
+ const [name, { line, file }] = policies.entries().next().value as [string, { line: number; file: string }];
149
+ conventions.biometric_policy = name;
150
+ provenance.push({ field: 'biometric_policy', sourceFile: file, line, query: 'swift-biometric-policy' });
151
+ }
152
+ if (navs.size > 0) {
153
+ const [name, { line, file }] = navs.entries().next().value as [string, { line: number; file: string }];
154
+ conventions.navigation_pattern = name;
155
+ provenance.push({ field: 'navigation_pattern', sourceFile: file, line, query: 'swift-navigation' });
156
+ }
157
+
158
+ let confidence: AdapterResult['confidence'];
159
+ if (Object.keys(conventions).length === 0) {
160
+ confidence = 'none';
161
+ } else if (apiClasses.size === 1 && policies.size <= 1) {
162
+ confidence = 'high';
163
+ } else if (apiClasses.size > 1) {
164
+ confidence = 'low';
165
+ } else {
166
+ confidence = 'medium';
167
+ }
168
+
169
+ return { conventions, provenance, confidence };
170
+ },
171
+ };
@@ -0,0 +1,348 @@
1
+ // Copyright (c) 2026 Massu. All rights reserved.
2
+ // Licensed under BSL 1.1 - see LICENSE file for details.
3
+
4
+ /**
5
+ * Plan 3b — Phase 1: Tree-sitter WASM grammar loader (Strategy A).
6
+ *
7
+ * Strategy A — locked at Phase 0 (`docs/internal/2026-04-26-ast-lsp-spec.md`
8
+ * §1, §8): grammars are NOT bundled in the npm tarball. The loader downloads
9
+ * each requested grammar at first use from a pinned URL, verifies SHA-256
10
+ * against a hardcoded manifest, caches under `~/.massu/wasm-cache/`.
11
+ *
12
+ * Security model (Phase 3.5 #3):
13
+ * - SHA-256 manifest hardcoded HERE — never network-fetched.
14
+ * - Mismatch → throw `GrammarSHAMismatchError`. NO silent fallback.
15
+ * - Atomic cache write: `<lang>-<sha>.wasm.tmp.<pid>` → rename → final.
16
+ * - Offline + no-cache → throw `GrammarUnavailableError` so the runner can
17
+ * translate to a regex-fallback path with a stderr note.
18
+ *
19
+ * Phase 1 ships the CODE PATH; the actual SHA-256 values for each grammar
20
+ * URL are placeholders pending Phase 9 release-prep (`curl <url> | shasum
21
+ * -a 256`). The placeholder string is intentionally non-empty so the
22
+ * verification logic exercises the comparison branch in tests.
23
+ */
24
+
25
+ import { createHash } from 'crypto';
26
+ import {
27
+ mkdirSync,
28
+ readFileSync,
29
+ writeFileSync,
30
+ renameSync,
31
+ unlinkSync,
32
+ lstatSync,
33
+ chmodSync,
34
+ } from 'fs';
35
+ import { homedir } from 'os';
36
+ import { dirname, join } from 'path';
37
+ import { Language, Parser } from 'web-tree-sitter';
38
+ import type { TreeSitterLanguage } from './types.ts';
39
+
40
+ // ============================================================
41
+ // Typed errors
42
+ // ============================================================
43
+
44
+ /** Thrown when downloaded WASM SHA-256 doesn't match the hardcoded manifest. */
45
+ export class GrammarSHAMismatchError extends Error {
46
+ public readonly language: TreeSitterLanguage;
47
+ public readonly expected: string;
48
+ public readonly actual: string;
49
+ constructor(language: TreeSitterLanguage, expected: string, actual: string) {
50
+ super(
51
+ `[tree-sitter-loader] SHA-256 mismatch for grammar "${language}". ` +
52
+ `Expected ${expected}, got ${actual}. ` +
53
+ `REFUSING to load — see Phase 3.5 audit attack vector #3.`,
54
+ );
55
+ this.name = 'GrammarSHAMismatchError';
56
+ this.language = language;
57
+ this.expected = expected;
58
+ this.actual = actual;
59
+ }
60
+ }
61
+
62
+ /** Thrown when a grammar can't be obtained: download failed AND cache empty. */
63
+ export class GrammarUnavailableError extends Error {
64
+ public readonly language: TreeSitterLanguage;
65
+ public readonly cause?: unknown;
66
+ constructor(language: TreeSitterLanguage, cause?: unknown) {
67
+ const causeMsg =
68
+ cause instanceof Error ? cause.message : cause ? String(cause) : 'no cached grammar and download failed';
69
+ super(
70
+ `[tree-sitter-loader] Grammar for "${language}" is unavailable: ${causeMsg}. ` +
71
+ `Falling back to regex introspection for files in ${language}.`,
72
+ );
73
+ this.name = 'GrammarUnavailableError';
74
+ this.language = language;
75
+ this.cause = cause;
76
+ }
77
+ }
78
+
79
+ /**
80
+ * Thrown when the cache path resolves to a symlink (or any non-regular
81
+ * file). Pre-creating a symlink at the expected cache path is a known
82
+ * vector for redirecting reads/writes elsewhere on the filesystem.
83
+ * (Phase 3.5 finding #3 — symlink attack on cache dir.)
84
+ */
85
+ export class GrammarCacheSymlinkError extends Error {
86
+ public readonly cachePath: string;
87
+ constructor(cachePath: string) {
88
+ super(
89
+ `[tree-sitter-loader] Refusing to load grammar — cache path "${cachePath}" is a symlink ` +
90
+ `or non-regular file. (Phase 3.5 finding #3 — symlink attack vector.)`,
91
+ );
92
+ this.name = 'GrammarCacheSymlinkError';
93
+ this.cachePath = cachePath;
94
+ }
95
+ }
96
+
97
+ /**
98
+ * Thrown when a manifest URL is not HTTPS. The manifest is hardcoded in
99
+ * source, but defense in depth: any future edit that introduces an http://
100
+ * URL is rejected at load time, not at code review.
101
+ * (Phase 3.5 finding #3 — MITM on download.)
102
+ */
103
+ export class GrammarUrlNotHttpsError extends Error {
104
+ public readonly url: string;
105
+ constructor(url: string) {
106
+ super(
107
+ `[tree-sitter-loader] Refusing to download grammar from non-HTTPS URL: ${url}. ` +
108
+ `Only https:// URLs are accepted. (Phase 3.5 finding #3.)`,
109
+ );
110
+ this.name = 'GrammarUrlNotHttpsError';
111
+ this.url = url;
112
+ }
113
+ }
114
+
115
+ // ============================================================
116
+ // Pinned manifest
117
+ // ============================================================
118
+
119
+ interface ManifestEntry {
120
+ url: string;
121
+ sha256: string;
122
+ version: string;
123
+ }
124
+
125
+ /**
126
+ * Hardcoded grammar manifest. Source-code-resident; tampering requires a
127
+ * release.
128
+ *
129
+ * Source: `tree-sitter-wasms` npm package (https://npm.im/tree-sitter-wasms)
130
+ * — pre-built WASM binaries for Tree-sitter language parsers. NOT added as
131
+ * a dependency (per plan §Phase 0 ban on bundling); fetched from unpkg at
132
+ * first use. The individual `tree-sitter-<lang>` packages on npm do NOT
133
+ * ship `.wasm` files, only C source + native .node prebuilds — confirmed
134
+ * by inspecting unpkg `?meta` listings during Phase 9 release-prep.
135
+ *
136
+ * SHA-256 hashes computed 2026-04-28 via:
137
+ * curl -fsSL <url> | shasum -a 256
138
+ *
139
+ * The verification code path is exercised in `tree-sitter-loader.test.ts`
140
+ * by injecting test manifest entries that intentionally mismatch.
141
+ */
142
+ export const GRAMMAR_MANIFEST: Partial<Record<TreeSitterLanguage, ManifestEntry>> = {
143
+ python: {
144
+ url: 'https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-python.wasm',
145
+ sha256: '9056d0fb0c337810d019fae350e8167786119da98f0f282aceae7ab89ee8253b',
146
+ version: '0.1.13',
147
+ },
148
+ typescript: {
149
+ url: 'https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-typescript.wasm',
150
+ sha256: '8515404dceed38e1ed86aa34b09fcf3379fff1b4ff9dd3967bcd6d1eb5ac3d8f',
151
+ version: '0.1.13',
152
+ },
153
+ javascript: {
154
+ url: 'https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-javascript.wasm',
155
+ sha256: '63812b9e275d26851264734868d27a1656bd44a2ef6eb3e85e6b03728c595ab5',
156
+ version: '0.1.13',
157
+ },
158
+ swift: {
159
+ url: 'https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-swift.wasm',
160
+ sha256: '41c4fdb2249a3aa6d87eed0d383081ff09725c2248b4977043a43825980ffcc7',
161
+ version: '0.1.13',
162
+ },
163
+ };
164
+
165
+ // ============================================================
166
+ // Cache + Parser init
167
+ // ============================================================
168
+
169
+ function getCacheDir(): string {
170
+ return process.env.MASSU_WASM_CACHE_DIR ?? join(homedir(), '.massu', 'wasm-cache');
171
+ }
172
+
173
+ function getCachedPath(language: TreeSitterLanguage, sha: string): string {
174
+ return join(getCacheDir(), `${language}-${sha}.wasm`);
175
+ }
176
+
177
+ function sha256(bytes: Uint8Array): string {
178
+ return createHash('sha256').update(bytes).digest('hex');
179
+ }
180
+
181
+ let parserInitPromise: Promise<void> | null = null;
182
+
183
+ /**
184
+ * `Parser.init()` is async and must be called once before any `new Parser()`.
185
+ * This function is idempotent — repeated calls return the same promise.
186
+ *
187
+ * Test harnesses can mock this by stubbing `Parser.init`.
188
+ */
189
+ export async function ensureParserInitialized(): Promise<void> {
190
+ if (parserInitPromise) return parserInitPromise;
191
+ parserInitPromise = Parser.init();
192
+ return parserInitPromise;
193
+ }
194
+
195
+ // ============================================================
196
+ // Loader (the main entry point)
197
+ // ============================================================
198
+
199
+ interface LoaderOptions {
200
+ /**
201
+ * Test-injection: override the manifest entry for a language. Production
202
+ * callers leave this undefined; tests use it to exercise SHA-mismatch and
203
+ * download-failure paths.
204
+ */
205
+ manifestOverride?: Partial<Record<TreeSitterLanguage, ManifestEntry>>;
206
+ /**
207
+ * Test-injection: override the fetch implementation. Defaults to global
208
+ * `fetch`. Tests pass a mock that returns a fixed body or throws.
209
+ */
210
+ fetchImpl?: (url: string) => Promise<{ ok: boolean; arrayBuffer: () => Promise<ArrayBuffer>; status?: number }>;
211
+ }
212
+
213
+ const loadedGrammars = new Map<TreeSitterLanguage, Language>();
214
+
215
+ /**
216
+ * Lazy-load a Tree-sitter grammar. Only fetches/caches the grammar for
217
+ * `language`; other languages are unaffected.
218
+ *
219
+ * Order:
220
+ * 1. In-memory cache hit → return.
221
+ * 2. Disk cache hit + SHA verify pass → load from disk.
222
+ * 3. Disk cache hit + SHA mismatch → throw GrammarSHAMismatchError.
223
+ * 4. Cache miss → fetch from pinned URL → SHA verify → atomic write → load.
224
+ * 5. Fetch fails AND no cache → throw GrammarUnavailableError.
225
+ */
226
+ export async function loadGrammar(
227
+ language: TreeSitterLanguage,
228
+ options: LoaderOptions = {},
229
+ ): Promise<Language> {
230
+ await ensureParserInitialized();
231
+
232
+ const cached = loadedGrammars.get(language);
233
+ if (cached) return cached;
234
+
235
+ const manifest = options.manifestOverride?.[language] ?? GRAMMAR_MANIFEST[language];
236
+ if (!manifest) {
237
+ throw new GrammarUnavailableError(
238
+ language,
239
+ new Error(`No manifest entry for language "${language}". v1 supports: ${Object.keys(GRAMMAR_MANIFEST).join(', ')}.`),
240
+ );
241
+ }
242
+
243
+ const cachePath = getCachedPath(language, manifest.sha256);
244
+
245
+ // 2/3: disk cache check. Use lstatSync (NOT statSync) so a symlink at
246
+ // the cache path is detected and rejected — never followed.
247
+ // (Phase 3.5 finding #3 — symlink attack on cache dir.)
248
+ let cacheLstat;
249
+ try {
250
+ cacheLstat = lstatSync(cachePath);
251
+ } catch {
252
+ cacheLstat = null;
253
+ }
254
+ if (cacheLstat) {
255
+ if (cacheLstat.isSymbolicLink() || !cacheLstat.isFile()) {
256
+ throw new GrammarCacheSymlinkError(cachePath);
257
+ }
258
+ let bytes: Uint8Array;
259
+ try {
260
+ bytes = readFileSync(cachePath);
261
+ } catch (e) {
262
+ // Treat read failure as cache miss; fall through to download.
263
+ bytes = new Uint8Array(0);
264
+ }
265
+ if (bytes.byteLength > 0) {
266
+ const actualSha = sha256(bytes);
267
+ if (actualSha !== manifest.sha256) {
268
+ // Refuse to load. Don't silently re-download — that would mask
269
+ // tampering of the on-disk cache.
270
+ throw new GrammarSHAMismatchError(language, manifest.sha256, actualSha);
271
+ }
272
+ const lang = await Language.load(bytes);
273
+ loadedGrammars.set(language, lang);
274
+ return lang;
275
+ }
276
+ }
277
+
278
+ // 4/5: download. Defense in depth: refuse non-HTTPS URLs.
279
+ if (!/^https:\/\//i.test(manifest.url)) {
280
+ throw new GrammarUrlNotHttpsError(manifest.url);
281
+ }
282
+
283
+ const fetchImpl = options.fetchImpl ?? (globalThis.fetch as LoaderOptions['fetchImpl']);
284
+ if (!fetchImpl) {
285
+ throw new GrammarUnavailableError(
286
+ language,
287
+ new Error('No fetch implementation available (Node < 18?)'),
288
+ );
289
+ }
290
+
291
+ let body: Uint8Array;
292
+ try {
293
+ const res = await fetchImpl(manifest.url);
294
+ if (!res.ok) {
295
+ throw new Error(`HTTP ${res.status ?? 'unknown'} from ${manifest.url}`);
296
+ }
297
+ body = new Uint8Array(await res.arrayBuffer());
298
+ } catch (e) {
299
+ throw new GrammarUnavailableError(language, e);
300
+ }
301
+
302
+ const downloadedSha = sha256(body);
303
+ if (downloadedSha !== manifest.sha256) {
304
+ throw new GrammarSHAMismatchError(language, manifest.sha256, downloadedSha);
305
+ }
306
+
307
+ // Atomic cache write. Always create the dir first.
308
+ // Mode 0o700 on the dir + 0o600 on files — owner-only access prevents
309
+ // local information disclosure of cached grammars.
310
+ // (Phase 3.5 finding #3 — file-mode hardening.)
311
+ try {
312
+ mkdirSync(dirname(cachePath), { recursive: true, mode: 0o700 });
313
+ try { chmodSync(dirname(cachePath), 0o700); } catch { /* best effort */ }
314
+ const tmpPath = `${cachePath}.tmp.${process.pid}`;
315
+ writeFileSync(tmpPath, body, { mode: 0o600 });
316
+ try { chmodSync(tmpPath, 0o600); } catch { /* best effort */ }
317
+ try {
318
+ renameSync(tmpPath, cachePath);
319
+ try { chmodSync(cachePath, 0o600); } catch { /* best effort */ }
320
+ } catch (e) {
321
+ // Try to clean up the tmp file on rename failure
322
+ try {
323
+ unlinkSync(tmpPath);
324
+ } catch {
325
+ /* ignore */
326
+ }
327
+ throw e;
328
+ }
329
+ } catch (e) {
330
+ // Cache write failure is non-fatal — we still have `body` in memory and
331
+ // can load directly. Log to stderr per VR-USER-ERROR-MESSAGES style.
332
+ console.error(
333
+ `[tree-sitter-loader] cache write failed for ${language}: ${e instanceof Error ? e.message : String(e)} — loading directly from memory.`,
334
+ );
335
+ }
336
+
337
+ const lang = await Language.load(body);
338
+ loadedGrammars.set(language, lang);
339
+ return lang;
340
+ }
341
+
342
+ /**
343
+ * Test-only: clear in-memory loaded grammar cache. Disk cache persists.
344
+ * Production code never needs this; the in-memory map lives for the process.
345
+ */
346
+ export function __resetLoadedGrammars(): void {
347
+ loadedGrammars.clear();
348
+ }
@@ -0,0 +1,174 @@
1
+ // Copyright (c) 2026 Massu. All rights reserved.
2
+ // Licensed under BSL 1.1 - see LICENSE file for details.
3
+
4
+ /**
5
+ * Plan 3b — Phase 1: AST Adapter contract types.
6
+ *
7
+ * Lives at `packages/core/src/detect/adapters/types.ts` per the spec doc
8
+ * (`docs/internal/2026-04-26-ast-lsp-spec.md` §2). All types are local —
9
+ * NONE re-exported from `web-tree-sitter`.
10
+ *
11
+ * Adapter authors import from this module only; the runner (`runner.ts`)
12
+ * orchestrates execution and the loader (`tree-sitter-loader.ts`) handles
13
+ * grammar acquisition.
14
+ *
15
+ * Per-field confidence is enforced (NOT per-adapter): a single weak field
16
+ * MUST NOT poison the rest. The runner consumes `confidence` per-adapter for
17
+ * the moment, but the merge rule reads each `conventions[field]` against the
18
+ * provenance trail to decide what survives.
19
+ */
20
+
21
+ // ============================================================
22
+ // Languages enumerated for the AST adapter set (Phase 1 + 3c)
23
+ // ============================================================
24
+
25
+ /**
26
+ * Closed-set of Tree-sitter grammars massu ships first-party adapters for.
27
+ *
28
+ * Note: this is a string-literal union, NOT re-exported from `web-tree-sitter`
29
+ * (which exposes `Language` as a class, not a name list). Phase 1 ships
30
+ * adapters for python/typescript/javascript/swift only — the remaining
31
+ * languages are reserved for Plan 3c.
32
+ */
33
+ export type TreeSitterLanguage =
34
+ | 'python'
35
+ | 'typescript'
36
+ | 'javascript'
37
+ | 'swift'
38
+ | 'rust'
39
+ | 'go'
40
+ | 'ruby'
41
+ | 'php'
42
+ | 'java'
43
+ | 'kotlin'
44
+ | 'elixir'
45
+ | 'erlang'
46
+ | 'csharp'
47
+ | 'cpp'
48
+ | 'haskell'
49
+ | 'ocaml';
50
+
51
+ // ============================================================
52
+ // Inputs to adapter dispatch
53
+ // ============================================================
54
+
55
+ /**
56
+ * Read-only signal bundle the runner builds BEFORE adapter dispatch.
57
+ *
58
+ * Adapters consume signals to answer `matches()` cheaply (no file IO inside
59
+ * `matches()` — that's why the bundle is built up-front).
60
+ */
61
+ export interface DetectionSignals {
62
+ /** Parsed `package.json` (root or first workspace) — undefined if absent. */
63
+ packageJson?: Record<string, unknown>;
64
+ /** Parsed `pyproject.toml` — undefined if absent. */
65
+ pyprojectToml?: Record<string, unknown>;
66
+ /** Raw `Gemfile` text — undefined if absent. */
67
+ gemfile?: string;
68
+ /** Parsed `Cargo.toml` — undefined if absent. */
69
+ cargoToml?: Record<string, unknown>;
70
+ /** Raw `go.mod` text — undefined if absent. */
71
+ goMod?: string;
72
+ /** Set of present directory names directly under the project root (one level). */
73
+ presentDirs: Set<string>;
74
+ /** Set of present file basenames directly under the project root (one level). */
75
+ presentFiles: Set<string>;
76
+ }
77
+
78
+ /**
79
+ * A sampled source file the runner hands to the adapter.
80
+ *
81
+ * `content` is pre-read; adapters MUST NOT re-read from disk inside
82
+ * `introspect()`. `size` is in bytes (pre-read length).
83
+ */
84
+ export interface SourceFile {
85
+ path: string;
86
+ content: string;
87
+ language: TreeSitterLanguage;
88
+ size: number;
89
+ }
90
+
91
+ // ============================================================
92
+ // Adapter contract
93
+ // ============================================================
94
+
95
+ /**
96
+ * Trail entry produced for every captured field — the user can audit
97
+ * `detected.<adapter>._provenance` to see exactly which file/line/query
98
+ * produced a value.
99
+ */
100
+ export interface Provenance {
101
+ field: string;
102
+ sourceFile: string;
103
+ line: number;
104
+ query: string;
105
+ }
106
+
107
+ export interface AdapterResult {
108
+ /**
109
+ * Becomes `detected.<adapter.id>` in `massu.config.yaml`. Field names are
110
+ * adapter-defined; values are `unknown` so adapters can return strings,
111
+ * arrays, or nested records as needed.
112
+ */
113
+ conventions: Record<string, unknown>;
114
+ /**
115
+ * Per-field provenance trail. The runner writes this to
116
+ * `detected.<adapter.id>._provenance` so a downstream auditor can verify
117
+ * any extracted value.
118
+ */
119
+ provenance: Provenance[];
120
+ /**
121
+ * 'high' : single canonical match, query produced exactly one result
122
+ * 'medium': multiple matches, all agree
123
+ * 'low' : multiple matches with disagreement (still emitted, with warning)
124
+ * 'none' : no matches, timed out, or threw — fields are dropped
125
+ */
126
+ confidence: 'high' | 'medium' | 'low' | 'none';
127
+ }
128
+
129
+ export interface CodebaseAdapter {
130
+ /** Stable adapter id, e.g. "python-fastapi". Becomes `detected.<id>` block. */
131
+ id: string;
132
+ /** Languages this adapter consumes. Used by the runner to skip work. */
133
+ languages: TreeSitterLanguage[];
134
+ /**
135
+ * Cheap signal check — must NOT do file IO. Returns true if any signal
136
+ * suggests this adapter should run.
137
+ */
138
+ matches(signals: DetectionSignals): boolean;
139
+ /**
140
+ * Sample N files (already read by the runner), run AST queries, return
141
+ * extracted conventions. May throw — the runner isolates failures.
142
+ */
143
+ introspect(files: SourceFile[], rootDir: string): Promise<AdapterResult>;
144
+ }
145
+
146
+ // ============================================================
147
+ // Runner output
148
+ // ============================================================
149
+
150
+ /**
151
+ * The runner's output: per-adapter id → its conventions block (with the
152
+ * `_provenance` map merged in). The introspector then folds this into the
153
+ * `detected.<adapter.id>` namespace alongside the existing
154
+ * `detected.python` / `detected.swift` / `detected.typescript` regex blocks.
155
+ */
156
+ export interface MergedAdapterOutput {
157
+ /** Per-adapter id → resolved conventions. */
158
+ byAdapter: Record<string, AdapterResolved>;
159
+ /** Adapters that were skipped (didn't match) for diagnostic logging. */
160
+ skipped: string[];
161
+ /** Adapters that threw during introspect — runner isolates these. */
162
+ errored: Array<{ adapterId: string; error: string }>;
163
+ }
164
+
165
+ /**
166
+ * Resolved-and-merged form of an `AdapterResult`. Provenance is folded into
167
+ * `_provenance` (key per field, value = `path:line :: query`).
168
+ */
169
+ export interface AdapterResolved {
170
+ conventions: Record<string, unknown>;
171
+ /** field-name -> "relativePath:line :: queryName". Empty when no fields. */
172
+ _provenance: Record<string, string>;
173
+ confidence: 'high' | 'medium' | 'low' | 'none';
174
+ }