@massu/core 1.3.0 → 1.4.0-soak.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/commands/README.md +23 -8
- package/commands/massu-deploy.python-docker.md +170 -0
- package/commands/massu-deploy.python-fly.md +189 -0
- package/commands/massu-deploy.python-launchd.md +144 -0
- package/commands/massu-deploy.python-systemd.md +163 -0
- package/commands/massu-scaffold-page.swift.md +10 -10
- package/commands/massu-scaffold-router.python-django.md +153 -0
- package/commands/massu-scaffold-router.python-fastapi.md +145 -0
- package/dist/cli.js +9906 -4133
- package/dist/hooks/auto-learning-pipeline.js +37 -2
- package/dist/hooks/classify-failure.js +37 -2
- package/dist/hooks/cost-tracker.js +37 -2
- package/dist/hooks/fix-detector.js +37 -2
- package/dist/hooks/incident-pipeline.js +37 -2
- package/dist/hooks/post-edit-context.js +37 -2
- package/dist/hooks/post-tool-use.js +37 -2
- package/dist/hooks/pre-compact.js +37 -2
- package/dist/hooks/pre-delete-check.js +37 -2
- package/dist/hooks/quality-event.js +37 -2
- package/dist/hooks/rule-enforcement-pipeline.js +37 -2
- package/dist/hooks/session-end.js +37 -2
- package/dist/hooks/session-start.js +4782 -406
- package/dist/hooks/user-prompt.js +37 -2
- package/package.json +10 -4
- package/src/cli.ts +22 -2
- package/src/commands/config-refresh.ts +88 -20
- package/src/commands/init.ts +130 -23
- package/src/commands/install-commands.ts +142 -26
- package/src/commands/refresh-log.ts +37 -0
- package/src/commands/template-engine.ts +262 -0
- package/src/commands/watch.ts +430 -0
- package/src/config.ts +63 -0
- package/src/detect/adapters/nextjs-trpc.ts +166 -0
- package/src/detect/adapters/parse-guard.ts +133 -0
- package/src/detect/adapters/python-django.ts +208 -0
- package/src/detect/adapters/python-fastapi.ts +223 -0
- package/src/detect/adapters/query-helpers.ts +170 -0
- package/src/detect/adapters/runner.ts +252 -0
- package/src/detect/adapters/swift-swiftui.ts +171 -0
- package/src/detect/adapters/tree-sitter-loader.ts +348 -0
- package/src/detect/adapters/types.ts +174 -0
- package/src/detect/codebase-introspector.ts +190 -0
- package/src/detect/index.ts +28 -2
- package/src/detect/regex-fallback.ts +449 -0
- package/src/hooks/session-start.ts +94 -3
- package/src/lib/gitToplevel.ts +22 -0
- package/src/lib/installLock.ts +179 -0
- package/src/lib/pidLiveness.ts +67 -0
- package/src/lsp/auto-detect.ts +89 -0
- package/src/lsp/client.ts +590 -0
- package/src/lsp/enrich.ts +127 -0
- package/src/lsp/types.ts +221 -0
- package/src/watch/daemon.ts +385 -0
- package/src/watch/lockfile-detector.ts +65 -0
- package/src/watch/paths.ts +279 -0
- package/src/watch/state.ts +178 -0
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
// Copyright (c) 2026 Massu. All rights reserved.
|
|
2
|
+
// Licensed under BSL 1.1 - see LICENSE file for details.
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Plan 3b — Phase 1: Tree-sitter WASM grammar loader (Strategy A).
|
|
6
|
+
*
|
|
7
|
+
* Strategy A — locked at Phase 0 (`docs/internal/2026-04-26-ast-lsp-spec.md`
|
|
8
|
+
* §1, §8): grammars are NOT bundled in the npm tarball. The loader downloads
|
|
9
|
+
* each requested grammar at first use from a pinned URL, verifies SHA-256
|
|
10
|
+
* against a hardcoded manifest, caches under `~/.massu/wasm-cache/`.
|
|
11
|
+
*
|
|
12
|
+
* Security model (Phase 3.5 #3):
|
|
13
|
+
* - SHA-256 manifest hardcoded HERE — never network-fetched.
|
|
14
|
+
* - Mismatch → throw `GrammarSHAMismatchError`. NO silent fallback.
|
|
15
|
+
* - Atomic cache write: `<lang>-<sha>.wasm.tmp.<pid>` → rename → final.
|
|
16
|
+
* - Offline + no-cache → throw `GrammarUnavailableError` so the runner can
|
|
17
|
+
* translate to a regex-fallback path with a stderr note.
|
|
18
|
+
*
|
|
19
|
+
* Phase 1 ships the CODE PATH; the actual SHA-256 values for each grammar
|
|
20
|
+
* URL are placeholders pending Phase 9 release-prep (`curl <url> | shasum
|
|
21
|
+
* -a 256`). The placeholder string is intentionally non-empty so the
|
|
22
|
+
* verification logic exercises the comparison branch in tests.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { createHash } from 'crypto';
|
|
26
|
+
import {
|
|
27
|
+
mkdirSync,
|
|
28
|
+
readFileSync,
|
|
29
|
+
writeFileSync,
|
|
30
|
+
renameSync,
|
|
31
|
+
unlinkSync,
|
|
32
|
+
lstatSync,
|
|
33
|
+
chmodSync,
|
|
34
|
+
} from 'fs';
|
|
35
|
+
import { homedir } from 'os';
|
|
36
|
+
import { dirname, join } from 'path';
|
|
37
|
+
import { Language, Parser } from 'web-tree-sitter';
|
|
38
|
+
import type { TreeSitterLanguage } from './types.ts';
|
|
39
|
+
|
|
40
|
+
// ============================================================
|
|
41
|
+
// Typed errors
|
|
42
|
+
// ============================================================
|
|
43
|
+
|
|
44
|
+
/** Thrown when downloaded WASM SHA-256 doesn't match the hardcoded manifest. */
|
|
45
|
+
export class GrammarSHAMismatchError extends Error {
|
|
46
|
+
public readonly language: TreeSitterLanguage;
|
|
47
|
+
public readonly expected: string;
|
|
48
|
+
public readonly actual: string;
|
|
49
|
+
constructor(language: TreeSitterLanguage, expected: string, actual: string) {
|
|
50
|
+
super(
|
|
51
|
+
`[tree-sitter-loader] SHA-256 mismatch for grammar "${language}". ` +
|
|
52
|
+
`Expected ${expected}, got ${actual}. ` +
|
|
53
|
+
`REFUSING to load — see Phase 3.5 audit attack vector #3.`,
|
|
54
|
+
);
|
|
55
|
+
this.name = 'GrammarSHAMismatchError';
|
|
56
|
+
this.language = language;
|
|
57
|
+
this.expected = expected;
|
|
58
|
+
this.actual = actual;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/** Thrown when a grammar can't be obtained: download failed AND cache empty. */
|
|
63
|
+
export class GrammarUnavailableError extends Error {
|
|
64
|
+
public readonly language: TreeSitterLanguage;
|
|
65
|
+
public readonly cause?: unknown;
|
|
66
|
+
constructor(language: TreeSitterLanguage, cause?: unknown) {
|
|
67
|
+
const causeMsg =
|
|
68
|
+
cause instanceof Error ? cause.message : cause ? String(cause) : 'no cached grammar and download failed';
|
|
69
|
+
super(
|
|
70
|
+
`[tree-sitter-loader] Grammar for "${language}" is unavailable: ${causeMsg}. ` +
|
|
71
|
+
`Falling back to regex introspection for files in ${language}.`,
|
|
72
|
+
);
|
|
73
|
+
this.name = 'GrammarUnavailableError';
|
|
74
|
+
this.language = language;
|
|
75
|
+
this.cause = cause;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Thrown when the cache path resolves to a symlink (or any non-regular
|
|
81
|
+
* file). Pre-creating a symlink at the expected cache path is a known
|
|
82
|
+
* vector for redirecting reads/writes elsewhere on the filesystem.
|
|
83
|
+
* (Phase 3.5 finding #3 — symlink attack on cache dir.)
|
|
84
|
+
*/
|
|
85
|
+
export class GrammarCacheSymlinkError extends Error {
|
|
86
|
+
public readonly cachePath: string;
|
|
87
|
+
constructor(cachePath: string) {
|
|
88
|
+
super(
|
|
89
|
+
`[tree-sitter-loader] Refusing to load grammar — cache path "${cachePath}" is a symlink ` +
|
|
90
|
+
`or non-regular file. (Phase 3.5 finding #3 — symlink attack vector.)`,
|
|
91
|
+
);
|
|
92
|
+
this.name = 'GrammarCacheSymlinkError';
|
|
93
|
+
this.cachePath = cachePath;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Thrown when a manifest URL is not HTTPS. The manifest is hardcoded in
|
|
99
|
+
* source, but defense in depth: any future edit that introduces an http://
|
|
100
|
+
* URL is rejected at load time, not at code review.
|
|
101
|
+
* (Phase 3.5 finding #3 — MITM on download.)
|
|
102
|
+
*/
|
|
103
|
+
export class GrammarUrlNotHttpsError extends Error {
|
|
104
|
+
public readonly url: string;
|
|
105
|
+
constructor(url: string) {
|
|
106
|
+
super(
|
|
107
|
+
`[tree-sitter-loader] Refusing to download grammar from non-HTTPS URL: ${url}. ` +
|
|
108
|
+
`Only https:// URLs are accepted. (Phase 3.5 finding #3.)`,
|
|
109
|
+
);
|
|
110
|
+
this.name = 'GrammarUrlNotHttpsError';
|
|
111
|
+
this.url = url;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// ============================================================
|
|
116
|
+
// Pinned manifest
|
|
117
|
+
// ============================================================
|
|
118
|
+
|
|
119
|
+
interface ManifestEntry {
|
|
120
|
+
url: string;
|
|
121
|
+
sha256: string;
|
|
122
|
+
version: string;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Hardcoded grammar manifest. Source-code-resident; tampering requires a
|
|
127
|
+
* release.
|
|
128
|
+
*
|
|
129
|
+
* Source: `tree-sitter-wasms` npm package (https://npm.im/tree-sitter-wasms)
|
|
130
|
+
* — pre-built WASM binaries for Tree-sitter language parsers. NOT added as
|
|
131
|
+
* a dependency (per plan §Phase 0 ban on bundling); fetched from unpkg at
|
|
132
|
+
* first use. The individual `tree-sitter-<lang>` packages on npm do NOT
|
|
133
|
+
* ship `.wasm` files, only C source + native .node prebuilds — confirmed
|
|
134
|
+
* by inspecting unpkg `?meta` listings during Phase 9 release-prep.
|
|
135
|
+
*
|
|
136
|
+
* SHA-256 hashes computed 2026-04-28 via:
|
|
137
|
+
* curl -fsSL <url> | shasum -a 256
|
|
138
|
+
*
|
|
139
|
+
* The verification code path is exercised in `tree-sitter-loader.test.ts`
|
|
140
|
+
* by injecting test manifest entries that intentionally mismatch.
|
|
141
|
+
*/
|
|
142
|
+
export const GRAMMAR_MANIFEST: Partial<Record<TreeSitterLanguage, ManifestEntry>> = {
|
|
143
|
+
python: {
|
|
144
|
+
url: 'https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-python.wasm',
|
|
145
|
+
sha256: '9056d0fb0c337810d019fae350e8167786119da98f0f282aceae7ab89ee8253b',
|
|
146
|
+
version: '0.1.13',
|
|
147
|
+
},
|
|
148
|
+
typescript: {
|
|
149
|
+
url: 'https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-typescript.wasm',
|
|
150
|
+
sha256: '8515404dceed38e1ed86aa34b09fcf3379fff1b4ff9dd3967bcd6d1eb5ac3d8f',
|
|
151
|
+
version: '0.1.13',
|
|
152
|
+
},
|
|
153
|
+
javascript: {
|
|
154
|
+
url: 'https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-javascript.wasm',
|
|
155
|
+
sha256: '63812b9e275d26851264734868d27a1656bd44a2ef6eb3e85e6b03728c595ab5',
|
|
156
|
+
version: '0.1.13',
|
|
157
|
+
},
|
|
158
|
+
swift: {
|
|
159
|
+
url: 'https://unpkg.com/tree-sitter-wasms@0.1.13/out/tree-sitter-swift.wasm',
|
|
160
|
+
sha256: '41c4fdb2249a3aa6d87eed0d383081ff09725c2248b4977043a43825980ffcc7',
|
|
161
|
+
version: '0.1.13',
|
|
162
|
+
},
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
// ============================================================
|
|
166
|
+
// Cache + Parser init
|
|
167
|
+
// ============================================================
|
|
168
|
+
|
|
169
|
+
function getCacheDir(): string {
|
|
170
|
+
return process.env.MASSU_WASM_CACHE_DIR ?? join(homedir(), '.massu', 'wasm-cache');
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function getCachedPath(language: TreeSitterLanguage, sha: string): string {
|
|
174
|
+
return join(getCacheDir(), `${language}-${sha}.wasm`);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function sha256(bytes: Uint8Array): string {
|
|
178
|
+
return createHash('sha256').update(bytes).digest('hex');
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
let parserInitPromise: Promise<void> | null = null;
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* `Parser.init()` is async and must be called once before any `new Parser()`.
|
|
185
|
+
* This function is idempotent — repeated calls return the same promise.
|
|
186
|
+
*
|
|
187
|
+
* Test harnesses can mock this by stubbing `Parser.init`.
|
|
188
|
+
*/
|
|
189
|
+
export async function ensureParserInitialized(): Promise<void> {
|
|
190
|
+
if (parserInitPromise) return parserInitPromise;
|
|
191
|
+
parserInitPromise = Parser.init();
|
|
192
|
+
return parserInitPromise;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// ============================================================
|
|
196
|
+
// Loader (the main entry point)
|
|
197
|
+
// ============================================================
|
|
198
|
+
|
|
199
|
+
interface LoaderOptions {
|
|
200
|
+
/**
|
|
201
|
+
* Test-injection: override the manifest entry for a language. Production
|
|
202
|
+
* callers leave this undefined; tests use it to exercise SHA-mismatch and
|
|
203
|
+
* download-failure paths.
|
|
204
|
+
*/
|
|
205
|
+
manifestOverride?: Partial<Record<TreeSitterLanguage, ManifestEntry>>;
|
|
206
|
+
/**
|
|
207
|
+
* Test-injection: override the fetch implementation. Defaults to global
|
|
208
|
+
* `fetch`. Tests pass a mock that returns a fixed body or throws.
|
|
209
|
+
*/
|
|
210
|
+
fetchImpl?: (url: string) => Promise<{ ok: boolean; arrayBuffer: () => Promise<ArrayBuffer>; status?: number }>;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const loadedGrammars = new Map<TreeSitterLanguage, Language>();
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Lazy-load a Tree-sitter grammar. Only fetches/caches the grammar for
|
|
217
|
+
* `language`; other languages are unaffected.
|
|
218
|
+
*
|
|
219
|
+
* Order:
|
|
220
|
+
* 1. In-memory cache hit → return.
|
|
221
|
+
* 2. Disk cache hit + SHA verify pass → load from disk.
|
|
222
|
+
* 3. Disk cache hit + SHA mismatch → throw GrammarSHAMismatchError.
|
|
223
|
+
* 4. Cache miss → fetch from pinned URL → SHA verify → atomic write → load.
|
|
224
|
+
* 5. Fetch fails AND no cache → throw GrammarUnavailableError.
|
|
225
|
+
*/
|
|
226
|
+
export async function loadGrammar(
|
|
227
|
+
language: TreeSitterLanguage,
|
|
228
|
+
options: LoaderOptions = {},
|
|
229
|
+
): Promise<Language> {
|
|
230
|
+
await ensureParserInitialized();
|
|
231
|
+
|
|
232
|
+
const cached = loadedGrammars.get(language);
|
|
233
|
+
if (cached) return cached;
|
|
234
|
+
|
|
235
|
+
const manifest = options.manifestOverride?.[language] ?? GRAMMAR_MANIFEST[language];
|
|
236
|
+
if (!manifest) {
|
|
237
|
+
throw new GrammarUnavailableError(
|
|
238
|
+
language,
|
|
239
|
+
new Error(`No manifest entry for language "${language}". v1 supports: ${Object.keys(GRAMMAR_MANIFEST).join(', ')}.`),
|
|
240
|
+
);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const cachePath = getCachedPath(language, manifest.sha256);
|
|
244
|
+
|
|
245
|
+
// 2/3: disk cache check. Use lstatSync (NOT statSync) so a symlink at
|
|
246
|
+
// the cache path is detected and rejected — never followed.
|
|
247
|
+
// (Phase 3.5 finding #3 — symlink attack on cache dir.)
|
|
248
|
+
let cacheLstat;
|
|
249
|
+
try {
|
|
250
|
+
cacheLstat = lstatSync(cachePath);
|
|
251
|
+
} catch {
|
|
252
|
+
cacheLstat = null;
|
|
253
|
+
}
|
|
254
|
+
if (cacheLstat) {
|
|
255
|
+
if (cacheLstat.isSymbolicLink() || !cacheLstat.isFile()) {
|
|
256
|
+
throw new GrammarCacheSymlinkError(cachePath);
|
|
257
|
+
}
|
|
258
|
+
let bytes: Uint8Array;
|
|
259
|
+
try {
|
|
260
|
+
bytes = readFileSync(cachePath);
|
|
261
|
+
} catch (e) {
|
|
262
|
+
// Treat read failure as cache miss; fall through to download.
|
|
263
|
+
bytes = new Uint8Array(0);
|
|
264
|
+
}
|
|
265
|
+
if (bytes.byteLength > 0) {
|
|
266
|
+
const actualSha = sha256(bytes);
|
|
267
|
+
if (actualSha !== manifest.sha256) {
|
|
268
|
+
// Refuse to load. Don't silently re-download — that would mask
|
|
269
|
+
// tampering of the on-disk cache.
|
|
270
|
+
throw new GrammarSHAMismatchError(language, manifest.sha256, actualSha);
|
|
271
|
+
}
|
|
272
|
+
const lang = await Language.load(bytes);
|
|
273
|
+
loadedGrammars.set(language, lang);
|
|
274
|
+
return lang;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// 4/5: download. Defense in depth: refuse non-HTTPS URLs.
|
|
279
|
+
if (!/^https:\/\//i.test(manifest.url)) {
|
|
280
|
+
throw new GrammarUrlNotHttpsError(manifest.url);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
const fetchImpl = options.fetchImpl ?? (globalThis.fetch as LoaderOptions['fetchImpl']);
|
|
284
|
+
if (!fetchImpl) {
|
|
285
|
+
throw new GrammarUnavailableError(
|
|
286
|
+
language,
|
|
287
|
+
new Error('No fetch implementation available (Node < 18?)'),
|
|
288
|
+
);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
let body: Uint8Array;
|
|
292
|
+
try {
|
|
293
|
+
const res = await fetchImpl(manifest.url);
|
|
294
|
+
if (!res.ok) {
|
|
295
|
+
throw new Error(`HTTP ${res.status ?? 'unknown'} from ${manifest.url}`);
|
|
296
|
+
}
|
|
297
|
+
body = new Uint8Array(await res.arrayBuffer());
|
|
298
|
+
} catch (e) {
|
|
299
|
+
throw new GrammarUnavailableError(language, e);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
const downloadedSha = sha256(body);
|
|
303
|
+
if (downloadedSha !== manifest.sha256) {
|
|
304
|
+
throw new GrammarSHAMismatchError(language, manifest.sha256, downloadedSha);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// Atomic cache write. Always create the dir first.
|
|
308
|
+
// Mode 0o700 on the dir + 0o600 on files — owner-only access prevents
|
|
309
|
+
// local information disclosure of cached grammars.
|
|
310
|
+
// (Phase 3.5 finding #3 — file-mode hardening.)
|
|
311
|
+
try {
|
|
312
|
+
mkdirSync(dirname(cachePath), { recursive: true, mode: 0o700 });
|
|
313
|
+
try { chmodSync(dirname(cachePath), 0o700); } catch { /* best effort */ }
|
|
314
|
+
const tmpPath = `${cachePath}.tmp.${process.pid}`;
|
|
315
|
+
writeFileSync(tmpPath, body, { mode: 0o600 });
|
|
316
|
+
try { chmodSync(tmpPath, 0o600); } catch { /* best effort */ }
|
|
317
|
+
try {
|
|
318
|
+
renameSync(tmpPath, cachePath);
|
|
319
|
+
try { chmodSync(cachePath, 0o600); } catch { /* best effort */ }
|
|
320
|
+
} catch (e) {
|
|
321
|
+
// Try to clean up the tmp file on rename failure
|
|
322
|
+
try {
|
|
323
|
+
unlinkSync(tmpPath);
|
|
324
|
+
} catch {
|
|
325
|
+
/* ignore */
|
|
326
|
+
}
|
|
327
|
+
throw e;
|
|
328
|
+
}
|
|
329
|
+
} catch (e) {
|
|
330
|
+
// Cache write failure is non-fatal — we still have `body` in memory and
|
|
331
|
+
// can load directly. Log to stderr per VR-USER-ERROR-MESSAGES style.
|
|
332
|
+
console.error(
|
|
333
|
+
`[tree-sitter-loader] cache write failed for ${language}: ${e instanceof Error ? e.message : String(e)} — loading directly from memory.`,
|
|
334
|
+
);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
const lang = await Language.load(body);
|
|
338
|
+
loadedGrammars.set(language, lang);
|
|
339
|
+
return lang;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* Test-only: clear in-memory loaded grammar cache. Disk cache persists.
|
|
344
|
+
* Production code never needs this; the in-memory map lives for the process.
|
|
345
|
+
*/
|
|
346
|
+
export function __resetLoadedGrammars(): void {
|
|
347
|
+
loadedGrammars.clear();
|
|
348
|
+
}
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
// Copyright (c) 2026 Massu. All rights reserved.
|
|
2
|
+
// Licensed under BSL 1.1 - see LICENSE file for details.
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Plan 3b — Phase 1: AST Adapter contract types.
|
|
6
|
+
*
|
|
7
|
+
* Lives at `packages/core/src/detect/adapters/types.ts` per the spec doc
|
|
8
|
+
* (`docs/internal/2026-04-26-ast-lsp-spec.md` §2). All types are local —
|
|
9
|
+
* NONE re-exported from `web-tree-sitter`.
|
|
10
|
+
*
|
|
11
|
+
* Adapter authors import from this module only; the runner (`runner.ts`)
|
|
12
|
+
* orchestrates execution and the loader (`tree-sitter-loader.ts`) handles
|
|
13
|
+
* grammar acquisition.
|
|
14
|
+
*
|
|
15
|
+
* Per-field confidence is enforced (NOT per-adapter): a single weak field
|
|
16
|
+
* MUST NOT poison the rest. The runner consumes `confidence` per-adapter for
|
|
17
|
+
* the moment, but the merge rule reads each `conventions[field]` against the
|
|
18
|
+
* provenance trail to decide what survives.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
// ============================================================
|
|
22
|
+
// Languages enumerated for the AST adapter set (Phase 1 + 3c)
|
|
23
|
+
// ============================================================
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Closed-set of Tree-sitter grammars massu ships first-party adapters for.
|
|
27
|
+
*
|
|
28
|
+
* Note: this is a string-literal union, NOT re-exported from `web-tree-sitter`
|
|
29
|
+
* (which exposes `Language` as a class, not a name list). Phase 1 ships
|
|
30
|
+
* adapters for python/typescript/javascript/swift only — the remaining
|
|
31
|
+
* languages are reserved for Plan 3c.
|
|
32
|
+
*/
|
|
33
|
+
export type TreeSitterLanguage =
|
|
34
|
+
| 'python'
|
|
35
|
+
| 'typescript'
|
|
36
|
+
| 'javascript'
|
|
37
|
+
| 'swift'
|
|
38
|
+
| 'rust'
|
|
39
|
+
| 'go'
|
|
40
|
+
| 'ruby'
|
|
41
|
+
| 'php'
|
|
42
|
+
| 'java'
|
|
43
|
+
| 'kotlin'
|
|
44
|
+
| 'elixir'
|
|
45
|
+
| 'erlang'
|
|
46
|
+
| 'csharp'
|
|
47
|
+
| 'cpp'
|
|
48
|
+
| 'haskell'
|
|
49
|
+
| 'ocaml';
|
|
50
|
+
|
|
51
|
+
// ============================================================
|
|
52
|
+
// Inputs to adapter dispatch
|
|
53
|
+
// ============================================================
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Read-only signal bundle the runner builds BEFORE adapter dispatch.
|
|
57
|
+
*
|
|
58
|
+
* Adapters consume signals to answer `matches()` cheaply (no file IO inside
|
|
59
|
+
* `matches()` — that's why the bundle is built up-front).
|
|
60
|
+
*/
|
|
61
|
+
export interface DetectionSignals {
|
|
62
|
+
/** Parsed `package.json` (root or first workspace) — undefined if absent. */
|
|
63
|
+
packageJson?: Record<string, unknown>;
|
|
64
|
+
/** Parsed `pyproject.toml` — undefined if absent. */
|
|
65
|
+
pyprojectToml?: Record<string, unknown>;
|
|
66
|
+
/** Raw `Gemfile` text — undefined if absent. */
|
|
67
|
+
gemfile?: string;
|
|
68
|
+
/** Parsed `Cargo.toml` — undefined if absent. */
|
|
69
|
+
cargoToml?: Record<string, unknown>;
|
|
70
|
+
/** Raw `go.mod` text — undefined if absent. */
|
|
71
|
+
goMod?: string;
|
|
72
|
+
/** Set of present directory names directly under the project root (one level). */
|
|
73
|
+
presentDirs: Set<string>;
|
|
74
|
+
/** Set of present file basenames directly under the project root (one level). */
|
|
75
|
+
presentFiles: Set<string>;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* A sampled source file the runner hands to the adapter.
|
|
80
|
+
*
|
|
81
|
+
* `content` is pre-read; adapters MUST NOT re-read from disk inside
|
|
82
|
+
* `introspect()`. `size` is in bytes (pre-read length).
|
|
83
|
+
*/
|
|
84
|
+
export interface SourceFile {
|
|
85
|
+
path: string;
|
|
86
|
+
content: string;
|
|
87
|
+
language: TreeSitterLanguage;
|
|
88
|
+
size: number;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// ============================================================
|
|
92
|
+
// Adapter contract
|
|
93
|
+
// ============================================================
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Trail entry produced for every captured field — the user can audit
|
|
97
|
+
* `detected.<adapter>._provenance` to see exactly which file/line/query
|
|
98
|
+
* produced a value.
|
|
99
|
+
*/
|
|
100
|
+
export interface Provenance {
|
|
101
|
+
field: string;
|
|
102
|
+
sourceFile: string;
|
|
103
|
+
line: number;
|
|
104
|
+
query: string;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export interface AdapterResult {
|
|
108
|
+
/**
|
|
109
|
+
* Becomes `detected.<adapter.id>` in `massu.config.yaml`. Field names are
|
|
110
|
+
* adapter-defined; values are `unknown` so adapters can return strings,
|
|
111
|
+
* arrays, or nested records as needed.
|
|
112
|
+
*/
|
|
113
|
+
conventions: Record<string, unknown>;
|
|
114
|
+
/**
|
|
115
|
+
* Per-field provenance trail. The runner writes this to
|
|
116
|
+
* `detected.<adapter.id>._provenance` so a downstream auditor can verify
|
|
117
|
+
* any extracted value.
|
|
118
|
+
*/
|
|
119
|
+
provenance: Provenance[];
|
|
120
|
+
/**
|
|
121
|
+
* 'high' : single canonical match, query produced exactly one result
|
|
122
|
+
* 'medium': multiple matches, all agree
|
|
123
|
+
* 'low' : multiple matches with disagreement (still emitted, with warning)
|
|
124
|
+
* 'none' : no matches, timed out, or threw — fields are dropped
|
|
125
|
+
*/
|
|
126
|
+
confidence: 'high' | 'medium' | 'low' | 'none';
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export interface CodebaseAdapter {
|
|
130
|
+
/** Stable adapter id, e.g. "python-fastapi". Becomes `detected.<id>` block. */
|
|
131
|
+
id: string;
|
|
132
|
+
/** Languages this adapter consumes. Used by the runner to skip work. */
|
|
133
|
+
languages: TreeSitterLanguage[];
|
|
134
|
+
/**
|
|
135
|
+
* Cheap signal check — must NOT do file IO. Returns true if any signal
|
|
136
|
+
* suggests this adapter should run.
|
|
137
|
+
*/
|
|
138
|
+
matches(signals: DetectionSignals): boolean;
|
|
139
|
+
/**
|
|
140
|
+
* Sample N files (already read by the runner), run AST queries, return
|
|
141
|
+
* extracted conventions. May throw — the runner isolates failures.
|
|
142
|
+
*/
|
|
143
|
+
introspect(files: SourceFile[], rootDir: string): Promise<AdapterResult>;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// ============================================================
|
|
147
|
+
// Runner output
|
|
148
|
+
// ============================================================
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* The runner's output: per-adapter id → its conventions block (with the
|
|
152
|
+
* `_provenance` map merged in). The introspector then folds this into the
|
|
153
|
+
* `detected.<adapter.id>` namespace alongside the existing
|
|
154
|
+
* `detected.python` / `detected.swift` / `detected.typescript` regex blocks.
|
|
155
|
+
*/
|
|
156
|
+
export interface MergedAdapterOutput {
|
|
157
|
+
/** Per-adapter id → resolved conventions. */
|
|
158
|
+
byAdapter: Record<string, AdapterResolved>;
|
|
159
|
+
/** Adapters that were skipped (didn't match) for diagnostic logging. */
|
|
160
|
+
skipped: string[];
|
|
161
|
+
/** Adapters that threw during introspect — runner isolates these. */
|
|
162
|
+
errored: Array<{ adapterId: string; error: string }>;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Resolved-and-merged form of an `AdapterResult`. Provenance is folded into
|
|
167
|
+
* `_provenance` (key per field, value = `path:line :: query`).
|
|
168
|
+
*/
|
|
169
|
+
export interface AdapterResolved {
|
|
170
|
+
conventions: Record<string, unknown>;
|
|
171
|
+
/** field-name -> "relativePath:line :: queryName". Empty when no fields. */
|
|
172
|
+
_provenance: Record<string, string>;
|
|
173
|
+
confidence: 'high' | 'medium' | 'low' | 'none';
|
|
174
|
+
}
|