@massu/core 1.2.1 → 1.4.0-soak.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +40 -0
  2. package/commands/README.md +137 -0
  3. package/commands/massu-deploy.python-docker.md +170 -0
  4. package/commands/massu-deploy.python-fly.md +189 -0
  5. package/commands/massu-deploy.python-launchd.md +144 -0
  6. package/commands/massu-deploy.python-systemd.md +163 -0
  7. package/commands/massu-deploy.python.md +200 -0
  8. package/commands/massu-scaffold-page.md +172 -59
  9. package/commands/massu-scaffold-page.swift.md +121 -0
  10. package/commands/massu-scaffold-router.python-django.md +153 -0
  11. package/commands/massu-scaffold-router.python-fastapi.md +145 -0
  12. package/commands/massu-scaffold-router.python.md +143 -0
  13. package/dist/cli.js +10170 -4138
  14. package/dist/hooks/auto-learning-pipeline.js +44 -6
  15. package/dist/hooks/classify-failure.js +44 -6
  16. package/dist/hooks/cost-tracker.js +44 -6
  17. package/dist/hooks/fix-detector.js +44 -6
  18. package/dist/hooks/incident-pipeline.js +44 -6
  19. package/dist/hooks/post-edit-context.js +44 -6
  20. package/dist/hooks/post-tool-use.js +44 -6
  21. package/dist/hooks/pre-compact.js +44 -6
  22. package/dist/hooks/pre-delete-check.js +44 -6
  23. package/dist/hooks/quality-event.js +44 -6
  24. package/dist/hooks/rule-enforcement-pipeline.js +44 -6
  25. package/dist/hooks/session-end.js +44 -6
  26. package/dist/hooks/session-start.js +4789 -410
  27. package/dist/hooks/user-prompt.js +44 -6
  28. package/package.json +10 -4
  29. package/src/cli.ts +28 -2
  30. package/src/commands/config-refresh.ts +88 -20
  31. package/src/commands/init.ts +130 -23
  32. package/src/commands/install-commands.ts +482 -42
  33. package/src/commands/refresh-log.ts +37 -0
  34. package/src/commands/show-template.ts +65 -0
  35. package/src/commands/template-engine.ts +262 -0
  36. package/src/commands/watch.ts +430 -0
  37. package/src/config.ts +69 -3
  38. package/src/detect/adapters/nextjs-trpc.ts +166 -0
  39. package/src/detect/adapters/parse-guard.ts +133 -0
  40. package/src/detect/adapters/python-django.ts +208 -0
  41. package/src/detect/adapters/python-fastapi.ts +223 -0
  42. package/src/detect/adapters/query-helpers.ts +170 -0
  43. package/src/detect/adapters/runner.ts +252 -0
  44. package/src/detect/adapters/swift-swiftui.ts +171 -0
  45. package/src/detect/adapters/tree-sitter-loader.ts +348 -0
  46. package/src/detect/adapters/types.ts +174 -0
  47. package/src/detect/codebase-introspector.ts +190 -0
  48. package/src/detect/index.ts +28 -2
  49. package/src/detect/regex-fallback.ts +449 -0
  50. package/src/hooks/session-start.ts +94 -3
  51. package/src/lib/gitToplevel.ts +22 -0
  52. package/src/lib/installLock.ts +179 -0
  53. package/src/lib/pidLiveness.ts +67 -0
  54. package/src/lsp/auto-detect.ts +89 -0
  55. package/src/lsp/client.ts +590 -0
  56. package/src/lsp/enrich.ts +127 -0
  57. package/src/lsp/types.ts +221 -0
  58. package/src/watch/daemon.ts +385 -0
  59. package/src/watch/lockfile-detector.ts +65 -0
  60. package/src/watch/paths.ts +279 -0
  61. package/src/watch/state.ts +178 -0
@@ -0,0 +1,223 @@
1
+ // Copyright (c) 2026 Massu. All rights reserved.
2
+ // Licensed under BSL 1.1 - see LICENSE file for details.
3
+
4
+ /**
5
+ * Plan 3b — Phase 1: FastAPI AST adapter.
6
+ *
7
+ * Extracts:
8
+ * - auth_dep: name passed to `Depends(...)` in router files
9
+ * - api_prefix_base: first path segment of `APIRouter(prefix="/...")`
10
+ * - test_async_pattern: `@pytest.mark.asyncio` (with or without parens)
11
+ *
12
+ * Confidence rules:
13
+ * - 'high' if the auth dep is found exactly ONCE in routers/ and matches
14
+ * known FastAPI signatures.
15
+ * - 'medium' if found in non-routers/ paths (e.g., a deps.py module).
16
+ * - 'low' if multiple candidate auth deps are found (ambiguous — but still
17
+ * emitted so the user can see what was found).
18
+ * - 'none' if no `Depends(...)` calls found AND no `APIRouter(prefix=)` —
19
+ * adapter doesn't apply, regex fallback takes over.
20
+ *
21
+ * Does NOT use regex on file content — only Tree-sitter S-expression queries
22
+ * compiled via `query-helpers.ts`. Regex would be the regex-fallback path.
23
+ */
24
+
25
+ import { Parser } from 'web-tree-sitter';
26
+ import type { CodebaseAdapter, AdapterResult, DetectionSignals, Provenance, SourceFile } from './types.ts';
27
+ import { runQuery, InvalidQueryError } from './query-helpers.ts';
28
+ import { loadGrammar } from './tree-sitter-loader.ts';
29
+ import { isParsableSource, MAX_AST_FILE_BYTES } from './parse-guard.ts';
30
+
31
+ // ============================================================
32
+ // Tree-sitter S-expression queries
33
+ // ============================================================
34
+
35
+ /**
36
+ * Auth dependency: catches `Depends(get_current_user)`, `Depends(require_tier_or_guardian)`,
37
+ * etc. Anchored on the canonical `Depends` call shape.
38
+ *
39
+ * Per the spec doc §3, predicate constraints (#eq?) keep the query from
40
+ * matching arbitrary `<x>(<y>)` calls.
41
+ */
42
+ const AUTH_DEP_QUERY = `
43
+ (call
44
+ function: (identifier) @_callee (#eq? @_callee "Depends")
45
+ arguments: (argument_list
46
+ (identifier) @auth_dep))
47
+ `;
48
+
49
+ /**
50
+ * APIRouter prefix: `APIRouter(prefix="/api/orders", ...)`. Captures the
51
+ * string literal so the runner can split off the base segment.
52
+ */
53
+ const API_PREFIX_QUERY = `
54
+ (call
55
+ function: (identifier) @_callee (#eq? @_callee "APIRouter")
56
+ arguments: (argument_list
57
+ (keyword_argument
58
+ name: (identifier) @_kw (#eq? @_kw "prefix")
59
+ value: (string) @prefix_value)))
60
+ `;
61
+
62
+ /**
63
+ * `@pytest.mark.asyncio` decorator. Captures the decorator name string for
64
+ * provenance; the value field is fixed as the canonical form.
65
+ */
66
+ const PYTEST_ASYNCIO_QUERY = `
67
+ (decorator
68
+ (attribute
69
+ object: (attribute
70
+ object: (identifier) @_pkg (#eq? @_pkg "pytest")
71
+ attribute: (identifier) @_mark (#eq? @_mark "mark"))
72
+ attribute: (identifier) @_marker (#eq? @_marker "asyncio"))) @decorator
73
+ `;
74
+
75
+ // ============================================================
76
+ // Adapter
77
+ // ============================================================
78
+
79
+ export const pythonFastApiAdapter: CodebaseAdapter = {
80
+ id: 'python-fastapi',
81
+ languages: ['python'],
82
+
83
+ matches(signals: DetectionSignals): boolean {
84
+ // Cheap signal-only check. No file IO. Match if:
85
+ // 1. pyproject.toml mentions fastapi (raw text contains 'fastapi'), OR
86
+ // 2. project has a routers/ directory (FastAPI convention), OR
87
+ // 3. project has app/ + python files at top level
88
+ const pyToml = signals.pyprojectToml as { __raw?: string } | undefined;
89
+ if (pyToml?.__raw && /\bfastapi\b/i.test(pyToml.__raw)) return true;
90
+ if (signals.presentDirs.has('routers')) return true;
91
+ if (signals.presentDirs.has('app') && signals.presentFiles.has('main.py')) return true;
92
+ return false;
93
+ },
94
+
95
+ async introspect(files: SourceFile[], _rootDir: string): Promise<AdapterResult> {
96
+ if (files.length === 0) {
97
+ return { conventions: {}, provenance: [], confidence: 'none' };
98
+ }
99
+
100
+ let language;
101
+ try {
102
+ language = await loadGrammar('python');
103
+ } catch (e) {
104
+ // Grammar unavailable → adapter returns 'none' so regex fallback takes
105
+ // over. The runner's stderr line is emitted at the introspector tier.
106
+ return { conventions: {}, provenance: [], confidence: 'none' };
107
+ }
108
+
109
+ const parser = new Parser();
110
+ parser.setLanguage(language);
111
+
112
+ // Per-field collection: { value -> { fileLine, queryName } }
113
+ const authDeps = new Map<string, { line: number; file: string }>();
114
+ const prefixBases = new Map<string, { line: number; file: string }>();
115
+ const testAsyncPatterns = new Map<string, { line: number; file: string }>();
116
+
117
+ try {
118
+ for (const file of files) {
119
+ // Phase 3.5 fix: defense-in-depth size + depth gate at adapter
120
+ // tier (the runner also gates, but adapters may be invoked
121
+ // directly from tests/CLI).
122
+ const skip = isParsableSource(file.content, file.size);
123
+ if (skip) {
124
+ process.stderr.write(
125
+ `[massu/ast] WARN: python-fastapi skipping ${file.path}: ${skip.reason} (${skip.detail}). Cap=${MAX_AST_FILE_BYTES}. (Phase 3.5 mitigation)\n`,
126
+ );
127
+ continue;
128
+ }
129
+ try {
130
+ // Auth dep
131
+ for (const hit of runQuery(parser, file.content, AUTH_DEP_QUERY, 'fastapi-auth-dep', file.path)) {
132
+ const name = hit.captures.auth_dep;
133
+ if (name && !authDeps.has(name)) {
134
+ authDeps.set(name, { line: hit.line, file: file.path });
135
+ }
136
+ }
137
+ // API prefix
138
+ for (const hit of runQuery(parser, file.content, API_PREFIX_QUERY, 'fastapi-api-prefix', file.path)) {
139
+ const raw = hit.captures.prefix_value;
140
+ if (!raw) continue;
141
+ // Strip enclosing quotes (string node text includes them)
142
+ const literal = raw.replace(/^['"]/, '').replace(/['"]$/, '');
143
+ const base = extractPrefixBase(literal);
144
+ if (base && !prefixBases.has(base)) {
145
+ prefixBases.set(base, { line: hit.line, file: file.path });
146
+ }
147
+ }
148
+ // pytest.mark.asyncio
149
+ for (const hit of runQuery(parser, file.content, PYTEST_ASYNCIO_QUERY, 'fastapi-pytest-asyncio', file.path)) {
150
+ const pat = '@pytest.mark.asyncio';
151
+ if (!testAsyncPatterns.has(pat)) {
152
+ testAsyncPatterns.set(pat, { line: hit.line, file: file.path });
153
+ }
154
+ }
155
+ } catch (e) {
156
+ if (e instanceof InvalidQueryError) {
157
+ // Compile-time failure of OUR query is a developer bug — surface it.
158
+ throw e;
159
+ }
160
+ // Per-file parse error: skip this file, keep going. Tree-sitter is
161
+ // error-tolerant so this is rare; usually means we got a binary or
162
+ // a non-Python file mislabeled.
163
+ continue;
164
+ }
165
+ }
166
+ } finally {
167
+ try { parser.delete(); } catch { /* ignore */ }
168
+ }
169
+
170
+ // Build result
171
+ const conventions: Record<string, unknown> = {};
172
+ const provenance: Provenance[] = [];
173
+
174
+ // Auth dep: high if exactly 1, low if >1 (still emit first), none if 0.
175
+ if (authDeps.size === 1) {
176
+ const [name, { line, file }] = authDeps.entries().next().value as [string, { line: number; file: string }];
177
+ conventions.auth_dep = name;
178
+ provenance.push({ field: 'auth_dep', sourceFile: file, line, query: 'fastapi-auth-dep' });
179
+ } else if (authDeps.size >= 2) {
180
+ // Ambiguous — prefer the first-seen (stable order from input file list).
181
+ const [name, { line, file }] = authDeps.entries().next().value as [string, { line: number; file: string }];
182
+ conventions.auth_dep = name;
183
+ provenance.push({ field: 'auth_dep', sourceFile: file, line, query: 'fastapi-auth-dep' });
184
+ }
185
+
186
+ if (prefixBases.size >= 1) {
187
+ const [base, { line, file }] = prefixBases.entries().next().value as [string, { line: number; file: string }];
188
+ conventions.api_prefix_base = base;
189
+ provenance.push({ field: 'api_prefix_base', sourceFile: file, line, query: 'fastapi-api-prefix' });
190
+ }
191
+
192
+ if (testAsyncPatterns.size >= 1) {
193
+ const [pat, { line, file }] = testAsyncPatterns.entries().next().value as [string, { line: number; file: string }];
194
+ conventions.test_async_pattern = pat;
195
+ provenance.push({ field: 'test_async_pattern', sourceFile: file, line, query: 'fastapi-pytest-asyncio' });
196
+ }
197
+
198
+ let confidence: AdapterResult['confidence'];
199
+ if (Object.keys(conventions).length === 0) {
200
+ confidence = 'none';
201
+ } else if (authDeps.size === 1 || (authDeps.size === 0 && prefixBases.size > 0)) {
202
+ confidence = 'high';
203
+ } else if (authDeps.size >= 2) {
204
+ confidence = 'low';
205
+ } else {
206
+ confidence = 'medium';
207
+ }
208
+
209
+ return { conventions, provenance, confidence };
210
+ },
211
+ };
212
+
213
+ // ============================================================
214
+ // Helpers
215
+ // ============================================================
216
+
217
+ function extractPrefixBase(prefix: string): string | null {
218
+ if (!prefix.startsWith('/')) return null;
219
+ const stripped = prefix.replace(/^\/+/, '');
220
+ const firstSeg = stripped.split('/')[0];
221
+ if (!firstSeg) return null;
222
+ return '/' + firstSeg;
223
+ }
@@ -0,0 +1,170 @@
1
+ // Copyright (c) 2026 Massu. All rights reserved.
2
+ // Licensed under BSL 1.1 - see LICENSE file for details.
3
+
4
+ /**
5
+ * Plan 3b — Phase 1: Tree-sitter query wrapper.
6
+ *
7
+ * Adapters consume the helpers in this file — never the raw `web-tree-sitter`
8
+ * API. This keeps the surface area minimal and testable.
9
+ *
10
+ * Design:
11
+ * - `compileQuery` caches compiled `Query` instances per (language, source)
12
+ * tuple. Compiling an S-expression is non-trivial; cache hit-rate is
13
+ * critical when the same query runs across N sampled files.
14
+ * - `runQuery` returns the captures as `{captures, file, line}` records so
15
+ * adapters never need to touch raw `Node` objects.
16
+ * - `InvalidQueryError` is the typed error thrown when an S-expression is
17
+ * malformed; never let a raw `Error` reach the adapter (per audit-iter-5
18
+ * fix HH test (b)).
19
+ */
20
+
21
+ import { Query, type Language, type Node, type Parser, type QueryMatch } from 'web-tree-sitter';
22
+
23
+ /**
24
+ * Thrown when an S-expression query string fails to compile against the
25
+ * supplied grammar. Carries the original message and the offending source
26
+ * so adapter authors can debug.
27
+ */
28
+ export class InvalidQueryError extends Error {
29
+ public readonly queryName: string;
30
+ public readonly querySource: string;
31
+ public readonly cause?: unknown;
32
+ constructor(queryName: string, querySource: string, cause: unknown) {
33
+ const causeMsg = cause instanceof Error ? cause.message : String(cause);
34
+ super(
35
+ `[query-helpers] Invalid Tree-sitter query "${queryName}": ${causeMsg}\n` +
36
+ `Query source:\n${querySource}`,
37
+ );
38
+ this.name = 'InvalidQueryError';
39
+ this.queryName = queryName;
40
+ this.querySource = querySource;
41
+ this.cause = cause;
42
+ }
43
+ }
44
+
45
+ // ============================================================
46
+ // Query compile cache
47
+ // ============================================================
48
+
49
+ // We key by Language identity (not by name) AND by source string. The Query
50
+ // type from web-tree-sitter is opaque; we store it directly.
51
+ const queryCache = new WeakMap<Language, Map<string, Query>>();
52
+
53
+ /**
54
+ * Compile (and cache) an S-expression query against `language`.
55
+ *
56
+ * Throws `InvalidQueryError` (NOT raw Error) on malformed S-expressions —
57
+ * adapters can catch this without losing the typed boundary.
58
+ *
59
+ * Cache lookup is O(1) on the (Language, source) tuple via WeakMap+Map.
60
+ */
61
+ export function compileQuery(
62
+ language: Language,
63
+ source: string,
64
+ queryName: string,
65
+ ): Query {
66
+ let perLang = queryCache.get(language);
67
+ if (!perLang) {
68
+ perLang = new Map();
69
+ queryCache.set(language, perLang);
70
+ }
71
+ const cached = perLang.get(source);
72
+ if (cached) return cached;
73
+
74
+ let q: Query;
75
+ try {
76
+ q = new Query(language, source);
77
+ } catch (e) {
78
+ throw new InvalidQueryError(queryName, source, e);
79
+ }
80
+
81
+ perLang.set(source, q);
82
+ return q;
83
+ }
84
+
85
+ // ============================================================
86
+ // Capture extraction
87
+ // ============================================================
88
+
89
+ export interface RunQueryHit {
90
+ /**
91
+ * Capture name → captured text. If the same capture name appears multiple
92
+ * times in a single match, the LAST occurrence wins (callers usually want
93
+ * the most-specific one).
94
+ */
95
+ captures: Record<string, string>;
96
+ /** Absolute path to the file being parsed. */
97
+ file: string;
98
+ /** 1-based line number of the FIRST capture in the match. */
99
+ line: number;
100
+ /** Name of the query (used for provenance). */
101
+ queryName: string;
102
+ }
103
+
104
+ /**
105
+ * Run a compiled query against a parsed tree. Returns a flat list of hits.
106
+ *
107
+ * Each match becomes one `RunQueryHit`. The `line` is computed from the
108
+ * earliest-starting capture in the match (1-based). Note that this helper is
109
+ * intentionally narrow — it is NOT a general node-walker. Adapters that need
110
+ * tree traversal should compose multiple queries instead.
111
+ */
112
+ export function runQuery(
113
+ parser: Parser,
114
+ source: string,
115
+ queryText: string,
116
+ queryName: string,
117
+ filePath: string,
118
+ ): RunQueryHit[] {
119
+ const language = parser.language;
120
+ if (!language) {
121
+ throw new InvalidQueryError(
122
+ queryName,
123
+ queryText,
124
+ new Error('Parser has no language assigned'),
125
+ );
126
+ }
127
+ const query = compileQuery(language, queryText, queryName);
128
+
129
+ const tree = parser.parse(source);
130
+ if (!tree) return [];
131
+
132
+ let matches: QueryMatch[];
133
+ try {
134
+ matches = query.matches(tree.rootNode);
135
+ } catch (e) {
136
+ // Match-time errors are unusual (compile-time catches most), but we still
137
+ // wrap to keep the typed-error contract.
138
+ throw new InvalidQueryError(queryName, queryText, e);
139
+ }
140
+
141
+ const out: RunQueryHit[] = [];
142
+ for (const match of matches) {
143
+ if (!match.captures || match.captures.length === 0) continue;
144
+ const captures: Record<string, string> = {};
145
+ let earliestLine = Number.POSITIVE_INFINITY;
146
+ for (const cap of match.captures) {
147
+ const node: Node = cap.node;
148
+ captures[cap.name] = node.text;
149
+ if (node.startPosition.row + 1 < earliestLine) {
150
+ earliestLine = node.startPosition.row + 1;
151
+ }
152
+ }
153
+ out.push({
154
+ captures,
155
+ file: filePath,
156
+ line: Number.isFinite(earliestLine) ? earliestLine : 1,
157
+ queryName,
158
+ });
159
+ }
160
+
161
+ // Per Tree-sitter docs: trees should be deleted to free WASM memory.
162
+ // Adapters call runQuery once per file so this cleanup is local.
163
+ try {
164
+ tree.delete();
165
+ } catch {
166
+ /* deletion is best-effort — some test mocks don't implement delete */
167
+ }
168
+
169
+ return out;
170
+ }
@@ -0,0 +1,252 @@
1
+ // Copyright (c) 2026 Massu. All rights reserved.
2
+ // Licensed under BSL 1.1 - see LICENSE file for details.
3
+
4
+ /**
5
+ * Plan 3b — Phase 1: AST adapter runner.
6
+ *
7
+ * Orchestrates: filter adapters via `matches()`, run them, isolate failures
8
+ * via per-adapter try/catch (audit-iter-5 fix HH test (d)), and merge their
9
+ * results.
10
+ *
11
+ * Confidence merge rule (spec §5):
12
+ * - 'high' / 'medium' / 'low' → field is written, with per-field provenance.
13
+ * - 'none' → field DROPPED (introspect's regex fallback may then emit it).
14
+ *
15
+ * AST-wins rule:
16
+ * - When the same conventions key appears in two adapters that BOTH return
17
+ * non-'none', the FIRST adapter (by source-list order) wins. This is
18
+ * deterministic — adapters are listed in `runner.ts`'s static array,
19
+ * never user-provided.
20
+ */
21
+
22
+ import { basename, relative } from 'path';
23
+ import type {
24
+ AdapterResolved,
25
+ CodebaseAdapter,
26
+ DetectionSignals,
27
+ MergedAdapterOutput,
28
+ Provenance,
29
+ SourceFile,
30
+ } from './types.ts';
31
+ import { isParsableSource, MAX_AST_FILE_BYTES } from './parse-guard.ts';
32
+
33
+ export interface RunAdaptersOptions {
34
+ /**
35
+ * Optional file sampler — given an adapter and the project root, returns
36
+ * the SourceFile[] the adapter should consume. If omitted, the runner
37
+ * passes an empty file list (useful in unit tests where the caller has
38
+ * already constructed adapters that don't need files).
39
+ */
40
+ sampleFiles?: (adapter: CodebaseAdapter, rootDir: string) => Promise<SourceFile[]> | SourceFile[];
41
+ }
42
+
43
+ /**
44
+ * Run a static list of adapters against a project root.
45
+ *
46
+ * Per-adapter try/catch isolation: a single adapter throwing MUST NOT crash
47
+ * the runner. The error is captured in `errored[]` and the runner continues.
48
+ *
49
+ * @param adapters - Static list of first-party adapters (no user-authored
50
+ * adapters at v1 — Plan 3c will add discovery).
51
+ * @param rootDir - Absolute project root.
52
+ * @param signals - Pre-built `DetectionSignals` (manifest reads, present
53
+ * dirs/files). Adapters consume these read-only.
54
+ * @param options - Hooks for testing.
55
+ */
56
+ export async function runAdapters(
57
+ adapters: CodebaseAdapter[],
58
+ rootDir: string,
59
+ signals: DetectionSignals,
60
+ options: RunAdaptersOptions = {},
61
+ ): Promise<MergedAdapterOutput> {
62
+ const out: MergedAdapterOutput = {
63
+ byAdapter: {},
64
+ skipped: [],
65
+ errored: [],
66
+ };
67
+
68
+ // AST-wins / per-adapter merge:
69
+ // Each adapter writes to its own `detected.<adapter.id>` namespace, so
70
+ // global field collisions across adapters can't happen at the conventions
71
+ // level. The "AST-wins" rule in the spec applies at the introspector tier
72
+ // (regex fallback only fills fields the adapter returned 'none' for).
73
+ // Within a single adapter, if `conventions` repeats a key (shouldn't, but
74
+ // defensively), the first occurrence wins. For multiple adapters with the
75
+ // same id (shouldn't, but defensively), the first wins.
76
+
77
+ for (const adapter of adapters) {
78
+ if (out.byAdapter[adapter.id] || out.skipped.includes(adapter.id)) {
79
+ // Duplicate adapter id → skip the second one to preserve first-wins.
80
+ continue;
81
+ }
82
+ let matches: boolean;
83
+ try {
84
+ matches = adapter.matches(signals);
85
+ } catch (e) {
86
+ out.errored.push({
87
+ adapterId: adapter.id,
88
+ error: `matches() threw: ${e instanceof Error ? e.message : String(e)}`,
89
+ });
90
+ continue;
91
+ }
92
+ if (!matches) {
93
+ out.skipped.push(adapter.id);
94
+ continue;
95
+ }
96
+
97
+ let files: SourceFile[];
98
+ try {
99
+ files = options.sampleFiles
100
+ ? await options.sampleFiles(adapter, rootDir)
101
+ : [];
102
+ } catch (e) {
103
+ out.errored.push({
104
+ adapterId: adapter.id,
105
+ error: `sampleFiles threw: ${e instanceof Error ? e.message : String(e)}`,
106
+ });
107
+ continue;
108
+ }
109
+
110
+ // Phase 3.5 fix: size + depth + control-byte gate. Drop adversarial
111
+ // inputs BEFORE the adapter sees them — adapters trust this layer.
112
+ // Files dropped here are logged once per drop so operators see the
113
+ // signal; the adapter then runs against the surviving subset.
114
+ const safeFiles: SourceFile[] = [];
115
+ for (const f of files) {
116
+ const skip = isParsableSource(f.content, f.size);
117
+ if (skip) {
118
+ process.stderr.write(
119
+ `[massu/ast] WARN: skipping ${f.path} for adapter ${adapter.id}: ${skip.reason} (${skip.detail}). Cap=${MAX_AST_FILE_BYTES} bytes. (Phase 3.5 mitigation)\n`,
120
+ );
121
+ continue;
122
+ }
123
+ safeFiles.push(f);
124
+ }
125
+ files = safeFiles;
126
+
127
+ let result;
128
+ try {
129
+ result = await adapter.introspect(files, rootDir);
130
+ } catch (e) {
131
+ out.errored.push({
132
+ adapterId: adapter.id,
133
+ error: `introspect() threw: ${e instanceof Error ? e.message : String(e)}`,
134
+ });
135
+ continue;
136
+ }
137
+
138
+ // 'none' confidence drops the entire adapter result. The runner records
139
+ // that the adapter was attempted (in `byAdapter`) so callers can see it
140
+ // ran, but with empty conventions. introspect()'s regex fallback then
141
+ // takes over for the field.
142
+ if (result.confidence === 'none') {
143
+ out.byAdapter[adapter.id] = {
144
+ conventions: {},
145
+ _provenance: {},
146
+ confidence: 'none',
147
+ };
148
+ continue;
149
+ }
150
+
151
+ // Merge: keep first occurrence of each field (defensive against an
152
+ // adapter accidentally writing the same field twice).
153
+ const conventions: Record<string, unknown> = {};
154
+ const provenanceMap: Record<string, string> = {};
155
+ for (const [field, value] of Object.entries(result.conventions)) {
156
+ if (value === null || value === undefined) continue;
157
+ if (field in conventions) continue;
158
+ conventions[field] = value;
159
+ }
160
+ for (const p of result.provenance) {
161
+ if (p.field in provenanceMap) continue;
162
+ provenanceMap[p.field] = formatProvenance(p, rootDir);
163
+ }
164
+
165
+ const resolved: AdapterResolved = {
166
+ conventions,
167
+ _provenance: provenanceMap,
168
+ confidence: result.confidence,
169
+ };
170
+ out.byAdapter[adapter.id] = resolved;
171
+ }
172
+
173
+ return out;
174
+ }
175
+
176
+ function formatProvenance(p: Provenance, rootDir: string): string {
177
+ const rel = p.sourceFile.startsWith(rootDir + '/')
178
+ ? relative(rootDir, p.sourceFile)
179
+ : basename(p.sourceFile);
180
+ return `${rel}:${p.line} :: ${p.query}`;
181
+ }
182
+
183
+ // ============================================================
184
+ // Signal builder — used by codebase-introspector to feed the runner
185
+ // ============================================================
186
+
187
+ import { existsSync, readdirSync, readFileSync, statSync } from 'fs';
188
+ import { join } from 'path';
189
+
190
+ /**
191
+ * Build a `DetectionSignals` bundle by reading manifest files at the project
192
+ * root. Cheap (one-level dir scan + a handful of file reads). Failures
193
+ * degrade gracefully — a missing manifest just means that field is undefined.
194
+ */
195
+ export function buildDetectionSignals(rootDir: string): DetectionSignals {
196
+ const presentDirs = new Set<string>();
197
+ const presentFiles = new Set<string>();
198
+ try {
199
+ for (const entry of readdirSync(rootDir)) {
200
+ if (entry.startsWith('.')) continue;
201
+ try {
202
+ const st = statSync(join(rootDir, entry));
203
+ if (st.isDirectory()) presentDirs.add(entry);
204
+ else if (st.isFile()) presentFiles.add(entry);
205
+ } catch {
206
+ /* ignore */
207
+ }
208
+ }
209
+ } catch {
210
+ /* unreadable root → empty signals */
211
+ }
212
+
213
+ return {
214
+ packageJson: tryReadJson(join(rootDir, 'package.json')),
215
+ pyprojectToml: tryReadToml(join(rootDir, 'pyproject.toml')),
216
+ gemfile: tryReadString(join(rootDir, 'Gemfile')),
217
+ cargoToml: tryReadToml(join(rootDir, 'Cargo.toml')),
218
+ goMod: tryReadString(join(rootDir, 'go.mod')),
219
+ presentDirs,
220
+ presentFiles,
221
+ };
222
+ }
223
+
224
+ function tryReadString(path: string): string | undefined {
225
+ if (!existsSync(path)) return undefined;
226
+ try {
227
+ return readFileSync(path, 'utf-8');
228
+ } catch {
229
+ return undefined;
230
+ }
231
+ }
232
+
233
+ function tryReadJson(path: string): Record<string, unknown> | undefined {
234
+ const txt = tryReadString(path);
235
+ if (!txt) return undefined;
236
+ try {
237
+ const parsed = JSON.parse(txt);
238
+ return typeof parsed === 'object' && parsed !== null ? (parsed as Record<string, unknown>) : undefined;
239
+ } catch {
240
+ return undefined;
241
+ }
242
+ }
243
+
244
+ function tryReadToml(path: string): Record<string, unknown> | undefined {
245
+ const txt = tryReadString(path);
246
+ if (!txt) return undefined;
247
+ // Cheap signal-only parse: we just need top-level table presence + keys.
248
+ // Avoid pulling the full toml parser for this; check `[project]`/`[tool.x]`
249
+ // headers and treat `tool.poetry.dependencies` etc. as opaque text-search.
250
+ // Adapters that need structured data can grep `txt` themselves.
251
+ return { __raw: txt };
252
+ }