@massu/core 1.2.1 → 1.4.0-soak.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -0
- package/commands/README.md +137 -0
- package/commands/massu-deploy.python-docker.md +170 -0
- package/commands/massu-deploy.python-fly.md +189 -0
- package/commands/massu-deploy.python-launchd.md +144 -0
- package/commands/massu-deploy.python-systemd.md +163 -0
- package/commands/massu-deploy.python.md +200 -0
- package/commands/massu-scaffold-page.md +172 -59
- package/commands/massu-scaffold-page.swift.md +121 -0
- package/commands/massu-scaffold-router.python-django.md +153 -0
- package/commands/massu-scaffold-router.python-fastapi.md +145 -0
- package/commands/massu-scaffold-router.python.md +143 -0
- package/dist/cli.js +10170 -4138
- package/dist/hooks/auto-learning-pipeline.js +44 -6
- package/dist/hooks/classify-failure.js +44 -6
- package/dist/hooks/cost-tracker.js +44 -6
- package/dist/hooks/fix-detector.js +44 -6
- package/dist/hooks/incident-pipeline.js +44 -6
- package/dist/hooks/post-edit-context.js +44 -6
- package/dist/hooks/post-tool-use.js +44 -6
- package/dist/hooks/pre-compact.js +44 -6
- package/dist/hooks/pre-delete-check.js +44 -6
- package/dist/hooks/quality-event.js +44 -6
- package/dist/hooks/rule-enforcement-pipeline.js +44 -6
- package/dist/hooks/session-end.js +44 -6
- package/dist/hooks/session-start.js +4789 -410
- package/dist/hooks/user-prompt.js +44 -6
- package/package.json +10 -4
- package/src/cli.ts +28 -2
- package/src/commands/config-refresh.ts +88 -20
- package/src/commands/init.ts +130 -23
- package/src/commands/install-commands.ts +482 -42
- package/src/commands/refresh-log.ts +37 -0
- package/src/commands/show-template.ts +65 -0
- package/src/commands/template-engine.ts +262 -0
- package/src/commands/watch.ts +430 -0
- package/src/config.ts +69 -3
- package/src/detect/adapters/nextjs-trpc.ts +166 -0
- package/src/detect/adapters/parse-guard.ts +133 -0
- package/src/detect/adapters/python-django.ts +208 -0
- package/src/detect/adapters/python-fastapi.ts +223 -0
- package/src/detect/adapters/query-helpers.ts +170 -0
- package/src/detect/adapters/runner.ts +252 -0
- package/src/detect/adapters/swift-swiftui.ts +171 -0
- package/src/detect/adapters/tree-sitter-loader.ts +348 -0
- package/src/detect/adapters/types.ts +174 -0
- package/src/detect/codebase-introspector.ts +190 -0
- package/src/detect/index.ts +28 -2
- package/src/detect/regex-fallback.ts +449 -0
- package/src/hooks/session-start.ts +94 -3
- package/src/lib/gitToplevel.ts +22 -0
- package/src/lib/installLock.ts +179 -0
- package/src/lib/pidLiveness.ts +67 -0
- package/src/lsp/auto-detect.ts +89 -0
- package/src/lsp/client.ts +590 -0
- package/src/lsp/enrich.ts +127 -0
- package/src/lsp/types.ts +221 -0
- package/src/watch/daemon.ts +385 -0
- package/src/watch/lockfile-detector.ts +65 -0
- package/src/watch/paths.ts +279 -0
- package/src/watch/state.ts +178 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
// Copyright (c) 2026 Massu. All rights reserved.
|
|
2
|
+
// Licensed under BSL 1.1 - see LICENSE file for details.
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Plan 3b — Phase 1: FastAPI AST adapter.
|
|
6
|
+
*
|
|
7
|
+
* Extracts:
|
|
8
|
+
* - auth_dep: name passed to `Depends(...)` in router files
|
|
9
|
+
* - api_prefix_base: first path segment of `APIRouter(prefix="/...")`
|
|
10
|
+
* - test_async_pattern: `@pytest.mark.asyncio` (with or without parens)
|
|
11
|
+
*
|
|
12
|
+
* Confidence rules:
|
|
13
|
+
* - 'high' if the auth dep is found exactly ONCE in routers/ and matches
|
|
14
|
+
* known FastAPI signatures.
|
|
15
|
+
* - 'medium' if found in non-routers/ paths (e.g., a deps.py module).
|
|
16
|
+
* - 'low' if multiple candidate auth deps are found (ambiguous — but still
|
|
17
|
+
* emitted so the user can see what was found).
|
|
18
|
+
* - 'none' if no `Depends(...)` calls found AND no `APIRouter(prefix=)` —
|
|
19
|
+
* adapter doesn't apply, regex fallback takes over.
|
|
20
|
+
*
|
|
21
|
+
* Does NOT use regex on file content — only Tree-sitter S-expression queries
|
|
22
|
+
* compiled via `query-helpers.ts`. Regex would be the regex-fallback path.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { Parser } from 'web-tree-sitter';
|
|
26
|
+
import type { CodebaseAdapter, AdapterResult, DetectionSignals, Provenance, SourceFile } from './types.ts';
|
|
27
|
+
import { runQuery, InvalidQueryError } from './query-helpers.ts';
|
|
28
|
+
import { loadGrammar } from './tree-sitter-loader.ts';
|
|
29
|
+
import { isParsableSource, MAX_AST_FILE_BYTES } from './parse-guard.ts';
|
|
30
|
+
|
|
31
|
+
// ============================================================
|
|
32
|
+
// Tree-sitter S-expression queries
|
|
33
|
+
// ============================================================
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Auth dependency: catches `Depends(get_current_user)`, `Depends(require_tier_or_guardian)`,
|
|
37
|
+
* etc. Anchored on the canonical `Depends` call shape.
|
|
38
|
+
*
|
|
39
|
+
* Per the spec doc §3, predicate constraints (#eq?) keep the query from
|
|
40
|
+
* matching arbitrary `<x>(<y>)` calls.
|
|
41
|
+
*/
|
|
42
|
+
const AUTH_DEP_QUERY = `
|
|
43
|
+
(call
|
|
44
|
+
function: (identifier) @_callee (#eq? @_callee "Depends")
|
|
45
|
+
arguments: (argument_list
|
|
46
|
+
(identifier) @auth_dep))
|
|
47
|
+
`;
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* APIRouter prefix: `APIRouter(prefix="/api/orders", ...)`. Captures the
|
|
51
|
+
* string literal so the runner can split off the base segment.
|
|
52
|
+
*/
|
|
53
|
+
const API_PREFIX_QUERY = `
|
|
54
|
+
(call
|
|
55
|
+
function: (identifier) @_callee (#eq? @_callee "APIRouter")
|
|
56
|
+
arguments: (argument_list
|
|
57
|
+
(keyword_argument
|
|
58
|
+
name: (identifier) @_kw (#eq? @_kw "prefix")
|
|
59
|
+
value: (string) @prefix_value)))
|
|
60
|
+
`;
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* `@pytest.mark.asyncio` decorator. Captures the decorator name string for
|
|
64
|
+
* provenance; the value field is fixed as the canonical form.
|
|
65
|
+
*/
|
|
66
|
+
const PYTEST_ASYNCIO_QUERY = `
|
|
67
|
+
(decorator
|
|
68
|
+
(attribute
|
|
69
|
+
object: (attribute
|
|
70
|
+
object: (identifier) @_pkg (#eq? @_pkg "pytest")
|
|
71
|
+
attribute: (identifier) @_mark (#eq? @_mark "mark"))
|
|
72
|
+
attribute: (identifier) @_marker (#eq? @_marker "asyncio"))) @decorator
|
|
73
|
+
`;
|
|
74
|
+
|
|
75
|
+
// ============================================================
|
|
76
|
+
// Adapter
|
|
77
|
+
// ============================================================
|
|
78
|
+
|
|
79
|
+
export const pythonFastApiAdapter: CodebaseAdapter = {
|
|
80
|
+
id: 'python-fastapi',
|
|
81
|
+
languages: ['python'],
|
|
82
|
+
|
|
83
|
+
matches(signals: DetectionSignals): boolean {
|
|
84
|
+
// Cheap signal-only check. No file IO. Match if:
|
|
85
|
+
// 1. pyproject.toml mentions fastapi (raw text contains 'fastapi'), OR
|
|
86
|
+
// 2. project has a routers/ directory (FastAPI convention), OR
|
|
87
|
+
// 3. project has app/ + python files at top level
|
|
88
|
+
const pyToml = signals.pyprojectToml as { __raw?: string } | undefined;
|
|
89
|
+
if (pyToml?.__raw && /\bfastapi\b/i.test(pyToml.__raw)) return true;
|
|
90
|
+
if (signals.presentDirs.has('routers')) return true;
|
|
91
|
+
if (signals.presentDirs.has('app') && signals.presentFiles.has('main.py')) return true;
|
|
92
|
+
return false;
|
|
93
|
+
},
|
|
94
|
+
|
|
95
|
+
async introspect(files: SourceFile[], _rootDir: string): Promise<AdapterResult> {
|
|
96
|
+
if (files.length === 0) {
|
|
97
|
+
return { conventions: {}, provenance: [], confidence: 'none' };
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
let language;
|
|
101
|
+
try {
|
|
102
|
+
language = await loadGrammar('python');
|
|
103
|
+
} catch (e) {
|
|
104
|
+
// Grammar unavailable → adapter returns 'none' so regex fallback takes
|
|
105
|
+
// over. The runner's stderr line is emitted at the introspector tier.
|
|
106
|
+
return { conventions: {}, provenance: [], confidence: 'none' };
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const parser = new Parser();
|
|
110
|
+
parser.setLanguage(language);
|
|
111
|
+
|
|
112
|
+
// Per-field collection: { value -> { fileLine, queryName } }
|
|
113
|
+
const authDeps = new Map<string, { line: number; file: string }>();
|
|
114
|
+
const prefixBases = new Map<string, { line: number; file: string }>();
|
|
115
|
+
const testAsyncPatterns = new Map<string, { line: number; file: string }>();
|
|
116
|
+
|
|
117
|
+
try {
|
|
118
|
+
for (const file of files) {
|
|
119
|
+
// Phase 3.5 fix: defense-in-depth size + depth gate at adapter
|
|
120
|
+
// tier (the runner also gates, but adapters may be invoked
|
|
121
|
+
// directly from tests/CLI).
|
|
122
|
+
const skip = isParsableSource(file.content, file.size);
|
|
123
|
+
if (skip) {
|
|
124
|
+
process.stderr.write(
|
|
125
|
+
`[massu/ast] WARN: python-fastapi skipping ${file.path}: ${skip.reason} (${skip.detail}). Cap=${MAX_AST_FILE_BYTES}. (Phase 3.5 mitigation)\n`,
|
|
126
|
+
);
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
try {
|
|
130
|
+
// Auth dep
|
|
131
|
+
for (const hit of runQuery(parser, file.content, AUTH_DEP_QUERY, 'fastapi-auth-dep', file.path)) {
|
|
132
|
+
const name = hit.captures.auth_dep;
|
|
133
|
+
if (name && !authDeps.has(name)) {
|
|
134
|
+
authDeps.set(name, { line: hit.line, file: file.path });
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
// API prefix
|
|
138
|
+
for (const hit of runQuery(parser, file.content, API_PREFIX_QUERY, 'fastapi-api-prefix', file.path)) {
|
|
139
|
+
const raw = hit.captures.prefix_value;
|
|
140
|
+
if (!raw) continue;
|
|
141
|
+
// Strip enclosing quotes (string node text includes them)
|
|
142
|
+
const literal = raw.replace(/^['"]/, '').replace(/['"]$/, '');
|
|
143
|
+
const base = extractPrefixBase(literal);
|
|
144
|
+
if (base && !prefixBases.has(base)) {
|
|
145
|
+
prefixBases.set(base, { line: hit.line, file: file.path });
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
// pytest.mark.asyncio
|
|
149
|
+
for (const hit of runQuery(parser, file.content, PYTEST_ASYNCIO_QUERY, 'fastapi-pytest-asyncio', file.path)) {
|
|
150
|
+
const pat = '@pytest.mark.asyncio';
|
|
151
|
+
if (!testAsyncPatterns.has(pat)) {
|
|
152
|
+
testAsyncPatterns.set(pat, { line: hit.line, file: file.path });
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
} catch (e) {
|
|
156
|
+
if (e instanceof InvalidQueryError) {
|
|
157
|
+
// Compile-time failure of OUR query is a developer bug — surface it.
|
|
158
|
+
throw e;
|
|
159
|
+
}
|
|
160
|
+
// Per-file parse error: skip this file, keep going. Tree-sitter is
|
|
161
|
+
// error-tolerant so this is rare; usually means we got a binary or
|
|
162
|
+
// a non-Python file mislabeled.
|
|
163
|
+
continue;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
} finally {
|
|
167
|
+
try { parser.delete(); } catch { /* ignore */ }
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Build result
|
|
171
|
+
const conventions: Record<string, unknown> = {};
|
|
172
|
+
const provenance: Provenance[] = [];
|
|
173
|
+
|
|
174
|
+
// Auth dep: high if exactly 1, low if >1 (still emit first), none if 0.
|
|
175
|
+
if (authDeps.size === 1) {
|
|
176
|
+
const [name, { line, file }] = authDeps.entries().next().value as [string, { line: number; file: string }];
|
|
177
|
+
conventions.auth_dep = name;
|
|
178
|
+
provenance.push({ field: 'auth_dep', sourceFile: file, line, query: 'fastapi-auth-dep' });
|
|
179
|
+
} else if (authDeps.size >= 2) {
|
|
180
|
+
// Ambiguous — prefer the first-seen (stable order from input file list).
|
|
181
|
+
const [name, { line, file }] = authDeps.entries().next().value as [string, { line: number; file: string }];
|
|
182
|
+
conventions.auth_dep = name;
|
|
183
|
+
provenance.push({ field: 'auth_dep', sourceFile: file, line, query: 'fastapi-auth-dep' });
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if (prefixBases.size >= 1) {
|
|
187
|
+
const [base, { line, file }] = prefixBases.entries().next().value as [string, { line: number; file: string }];
|
|
188
|
+
conventions.api_prefix_base = base;
|
|
189
|
+
provenance.push({ field: 'api_prefix_base', sourceFile: file, line, query: 'fastapi-api-prefix' });
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if (testAsyncPatterns.size >= 1) {
|
|
193
|
+
const [pat, { line, file }] = testAsyncPatterns.entries().next().value as [string, { line: number; file: string }];
|
|
194
|
+
conventions.test_async_pattern = pat;
|
|
195
|
+
provenance.push({ field: 'test_async_pattern', sourceFile: file, line, query: 'fastapi-pytest-asyncio' });
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
let confidence: AdapterResult['confidence'];
|
|
199
|
+
if (Object.keys(conventions).length === 0) {
|
|
200
|
+
confidence = 'none';
|
|
201
|
+
} else if (authDeps.size === 1 || (authDeps.size === 0 && prefixBases.size > 0)) {
|
|
202
|
+
confidence = 'high';
|
|
203
|
+
} else if (authDeps.size >= 2) {
|
|
204
|
+
confidence = 'low';
|
|
205
|
+
} else {
|
|
206
|
+
confidence = 'medium';
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
return { conventions, provenance, confidence };
|
|
210
|
+
},
|
|
211
|
+
};
|
|
212
|
+
|
|
213
|
+
// ============================================================
|
|
214
|
+
// Helpers
|
|
215
|
+
// ============================================================
|
|
216
|
+
|
|
217
|
+
function extractPrefixBase(prefix: string): string | null {
|
|
218
|
+
if (!prefix.startsWith('/')) return null;
|
|
219
|
+
const stripped = prefix.replace(/^\/+/, '');
|
|
220
|
+
const firstSeg = stripped.split('/')[0];
|
|
221
|
+
if (!firstSeg) return null;
|
|
222
|
+
return '/' + firstSeg;
|
|
223
|
+
}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
// Copyright (c) 2026 Massu. All rights reserved.
|
|
2
|
+
// Licensed under BSL 1.1 - see LICENSE file for details.
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Plan 3b — Phase 1: Tree-sitter query wrapper.
|
|
6
|
+
*
|
|
7
|
+
* Adapters consume the helpers in this file — never the raw `web-tree-sitter`
|
|
8
|
+
* API. This keeps the surface area minimal and testable.
|
|
9
|
+
*
|
|
10
|
+
* Design:
|
|
11
|
+
* - `compileQuery` caches compiled `Query` instances per (language, source)
|
|
12
|
+
* tuple. Compiling an S-expression is non-trivial; cache hit-rate is
|
|
13
|
+
* critical when the same query runs across N sampled files.
|
|
14
|
+
* - `runQuery` returns the captures as `{captures, file, line}` records so
|
|
15
|
+
* adapters never need to touch raw `Node` objects.
|
|
16
|
+
* - `InvalidQueryError` is the typed error thrown when an S-expression is
|
|
17
|
+
* malformed; never let a raw `Error` reach the adapter (per audit-iter-5
|
|
18
|
+
* fix HH test (b)).
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { Query, type Language, type Node, type Parser, type QueryMatch } from 'web-tree-sitter';
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Thrown when an S-expression query string fails to compile against the
|
|
25
|
+
* supplied grammar. Carries the original message and the offending source
|
|
26
|
+
* so adapter authors can debug.
|
|
27
|
+
*/
|
|
28
|
+
export class InvalidQueryError extends Error {
|
|
29
|
+
public readonly queryName: string;
|
|
30
|
+
public readonly querySource: string;
|
|
31
|
+
public readonly cause?: unknown;
|
|
32
|
+
constructor(queryName: string, querySource: string, cause: unknown) {
|
|
33
|
+
const causeMsg = cause instanceof Error ? cause.message : String(cause);
|
|
34
|
+
super(
|
|
35
|
+
`[query-helpers] Invalid Tree-sitter query "${queryName}": ${causeMsg}\n` +
|
|
36
|
+
`Query source:\n${querySource}`,
|
|
37
|
+
);
|
|
38
|
+
this.name = 'InvalidQueryError';
|
|
39
|
+
this.queryName = queryName;
|
|
40
|
+
this.querySource = querySource;
|
|
41
|
+
this.cause = cause;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ============================================================
|
|
46
|
+
// Query compile cache
|
|
47
|
+
// ============================================================
|
|
48
|
+
|
|
49
|
+
// We key by Language identity (not by name) AND by source string. The Query
|
|
50
|
+
// type from web-tree-sitter is opaque; we store it directly.
|
|
51
|
+
const queryCache = new WeakMap<Language, Map<string, Query>>();
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Compile (and cache) an S-expression query against `language`.
|
|
55
|
+
*
|
|
56
|
+
* Throws `InvalidQueryError` (NOT raw Error) on malformed S-expressions —
|
|
57
|
+
* adapters can catch this without losing the typed boundary.
|
|
58
|
+
*
|
|
59
|
+
* Cache lookup is O(1) on the (Language, source) tuple via WeakMap+Map.
|
|
60
|
+
*/
|
|
61
|
+
export function compileQuery(
|
|
62
|
+
language: Language,
|
|
63
|
+
source: string,
|
|
64
|
+
queryName: string,
|
|
65
|
+
): Query {
|
|
66
|
+
let perLang = queryCache.get(language);
|
|
67
|
+
if (!perLang) {
|
|
68
|
+
perLang = new Map();
|
|
69
|
+
queryCache.set(language, perLang);
|
|
70
|
+
}
|
|
71
|
+
const cached = perLang.get(source);
|
|
72
|
+
if (cached) return cached;
|
|
73
|
+
|
|
74
|
+
let q: Query;
|
|
75
|
+
try {
|
|
76
|
+
q = new Query(language, source);
|
|
77
|
+
} catch (e) {
|
|
78
|
+
throw new InvalidQueryError(queryName, source, e);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
perLang.set(source, q);
|
|
82
|
+
return q;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// ============================================================
|
|
86
|
+
// Capture extraction
|
|
87
|
+
// ============================================================
|
|
88
|
+
|
|
89
|
+
export interface RunQueryHit {
|
|
90
|
+
/**
|
|
91
|
+
* Capture name → captured text. If the same capture name appears multiple
|
|
92
|
+
* times in a single match, the LAST occurrence wins (callers usually want
|
|
93
|
+
* the most-specific one).
|
|
94
|
+
*/
|
|
95
|
+
captures: Record<string, string>;
|
|
96
|
+
/** Absolute path to the file being parsed. */
|
|
97
|
+
file: string;
|
|
98
|
+
/** 1-based line number of the FIRST capture in the match. */
|
|
99
|
+
line: number;
|
|
100
|
+
/** Name of the query (used for provenance). */
|
|
101
|
+
queryName: string;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Run a compiled query against a parsed tree. Returns a flat list of hits.
|
|
106
|
+
*
|
|
107
|
+
* Each match becomes one `RunQueryHit`. The `line` is computed from the
|
|
108
|
+
* earliest-starting capture in the match (1-based). Note that this helper is
|
|
109
|
+
* intentionally narrow — it is NOT a general node-walker. Adapters that need
|
|
110
|
+
* tree traversal should compose multiple queries instead.
|
|
111
|
+
*/
|
|
112
|
+
export function runQuery(
|
|
113
|
+
parser: Parser,
|
|
114
|
+
source: string,
|
|
115
|
+
queryText: string,
|
|
116
|
+
queryName: string,
|
|
117
|
+
filePath: string,
|
|
118
|
+
): RunQueryHit[] {
|
|
119
|
+
const language = parser.language;
|
|
120
|
+
if (!language) {
|
|
121
|
+
throw new InvalidQueryError(
|
|
122
|
+
queryName,
|
|
123
|
+
queryText,
|
|
124
|
+
new Error('Parser has no language assigned'),
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
const query = compileQuery(language, queryText, queryName);
|
|
128
|
+
|
|
129
|
+
const tree = parser.parse(source);
|
|
130
|
+
if (!tree) return [];
|
|
131
|
+
|
|
132
|
+
let matches: QueryMatch[];
|
|
133
|
+
try {
|
|
134
|
+
matches = query.matches(tree.rootNode);
|
|
135
|
+
} catch (e) {
|
|
136
|
+
// Match-time errors are unusual (compile-time catches most), but we still
|
|
137
|
+
// wrap to keep the typed-error contract.
|
|
138
|
+
throw new InvalidQueryError(queryName, queryText, e);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const out: RunQueryHit[] = [];
|
|
142
|
+
for (const match of matches) {
|
|
143
|
+
if (!match.captures || match.captures.length === 0) continue;
|
|
144
|
+
const captures: Record<string, string> = {};
|
|
145
|
+
let earliestLine = Number.POSITIVE_INFINITY;
|
|
146
|
+
for (const cap of match.captures) {
|
|
147
|
+
const node: Node = cap.node;
|
|
148
|
+
captures[cap.name] = node.text;
|
|
149
|
+
if (node.startPosition.row + 1 < earliestLine) {
|
|
150
|
+
earliestLine = node.startPosition.row + 1;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
out.push({
|
|
154
|
+
captures,
|
|
155
|
+
file: filePath,
|
|
156
|
+
line: Number.isFinite(earliestLine) ? earliestLine : 1,
|
|
157
|
+
queryName,
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Per Tree-sitter docs: trees should be deleted to free WASM memory.
|
|
162
|
+
// Adapters call runQuery once per file so this cleanup is local.
|
|
163
|
+
try {
|
|
164
|
+
tree.delete();
|
|
165
|
+
} catch {
|
|
166
|
+
/* deletion is best-effort — some test mocks don't implement delete */
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return out;
|
|
170
|
+
}
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
// Copyright (c) 2026 Massu. All rights reserved.
|
|
2
|
+
// Licensed under BSL 1.1 - see LICENSE file for details.
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Plan 3b — Phase 1: AST adapter runner.
|
|
6
|
+
*
|
|
7
|
+
* Orchestrates: filter adapters via `matches()`, run them, isolate failures
|
|
8
|
+
* via per-adapter try/catch (audit-iter-5 fix HH test (d)), and merge their
|
|
9
|
+
* results.
|
|
10
|
+
*
|
|
11
|
+
* Confidence merge rule (spec §5):
|
|
12
|
+
* - 'high' / 'medium' / 'low' → field is written, with per-field provenance.
|
|
13
|
+
* - 'none' → field DROPPED (introspect's regex fallback may then emit it).
|
|
14
|
+
*
|
|
15
|
+
* AST-wins rule:
|
|
16
|
+
* - When the same conventions key appears in two adapters that BOTH return
|
|
17
|
+
* non-'none', the FIRST adapter (by source-list order) wins. This is
|
|
18
|
+
* deterministic — adapters are listed in `runner.ts`'s static array,
|
|
19
|
+
* never user-provided.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { basename, relative } from 'path';
|
|
23
|
+
import type {
|
|
24
|
+
AdapterResolved,
|
|
25
|
+
CodebaseAdapter,
|
|
26
|
+
DetectionSignals,
|
|
27
|
+
MergedAdapterOutput,
|
|
28
|
+
Provenance,
|
|
29
|
+
SourceFile,
|
|
30
|
+
} from './types.ts';
|
|
31
|
+
import { isParsableSource, MAX_AST_FILE_BYTES } from './parse-guard.ts';
|
|
32
|
+
|
|
33
|
+
export interface RunAdaptersOptions {
|
|
34
|
+
/**
|
|
35
|
+
* Optional file sampler — given an adapter and the project root, returns
|
|
36
|
+
* the SourceFile[] the adapter should consume. If omitted, the runner
|
|
37
|
+
* passes an empty file list (useful in unit tests where the caller has
|
|
38
|
+
* already constructed adapters that don't need files).
|
|
39
|
+
*/
|
|
40
|
+
sampleFiles?: (adapter: CodebaseAdapter, rootDir: string) => Promise<SourceFile[]> | SourceFile[];
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Run a static list of adapters against a project root.
|
|
45
|
+
*
|
|
46
|
+
* Per-adapter try/catch isolation: a single adapter throwing MUST NOT crash
|
|
47
|
+
* the runner. The error is captured in `errored[]` and the runner continues.
|
|
48
|
+
*
|
|
49
|
+
* @param adapters - Static list of first-party adapters (no user-authored
|
|
50
|
+
* adapters at v1 — Plan 3c will add discovery).
|
|
51
|
+
* @param rootDir - Absolute project root.
|
|
52
|
+
* @param signals - Pre-built `DetectionSignals` (manifest reads, present
|
|
53
|
+
* dirs/files). Adapters consume these read-only.
|
|
54
|
+
* @param options - Hooks for testing.
|
|
55
|
+
*/
|
|
56
|
+
export async function runAdapters(
|
|
57
|
+
adapters: CodebaseAdapter[],
|
|
58
|
+
rootDir: string,
|
|
59
|
+
signals: DetectionSignals,
|
|
60
|
+
options: RunAdaptersOptions = {},
|
|
61
|
+
): Promise<MergedAdapterOutput> {
|
|
62
|
+
const out: MergedAdapterOutput = {
|
|
63
|
+
byAdapter: {},
|
|
64
|
+
skipped: [],
|
|
65
|
+
errored: [],
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
// AST-wins / per-adapter merge:
|
|
69
|
+
// Each adapter writes to its own `detected.<adapter.id>` namespace, so
|
|
70
|
+
// global field collisions across adapters can't happen at the conventions
|
|
71
|
+
// level. The "AST-wins" rule in the spec applies at the introspector tier
|
|
72
|
+
// (regex fallback only fills fields the adapter returned 'none' for).
|
|
73
|
+
// Within a single adapter, if `conventions` repeats a key (shouldn't, but
|
|
74
|
+
// defensively), the first occurrence wins. For multiple adapters with the
|
|
75
|
+
// same id (shouldn't, but defensively), the first wins.
|
|
76
|
+
|
|
77
|
+
for (const adapter of adapters) {
|
|
78
|
+
if (out.byAdapter[adapter.id] || out.skipped.includes(adapter.id)) {
|
|
79
|
+
// Duplicate adapter id → skip the second one to preserve first-wins.
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
let matches: boolean;
|
|
83
|
+
try {
|
|
84
|
+
matches = adapter.matches(signals);
|
|
85
|
+
} catch (e) {
|
|
86
|
+
out.errored.push({
|
|
87
|
+
adapterId: adapter.id,
|
|
88
|
+
error: `matches() threw: ${e instanceof Error ? e.message : String(e)}`,
|
|
89
|
+
});
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
if (!matches) {
|
|
93
|
+
out.skipped.push(adapter.id);
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
let files: SourceFile[];
|
|
98
|
+
try {
|
|
99
|
+
files = options.sampleFiles
|
|
100
|
+
? await options.sampleFiles(adapter, rootDir)
|
|
101
|
+
: [];
|
|
102
|
+
} catch (e) {
|
|
103
|
+
out.errored.push({
|
|
104
|
+
adapterId: adapter.id,
|
|
105
|
+
error: `sampleFiles threw: ${e instanceof Error ? e.message : String(e)}`,
|
|
106
|
+
});
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Phase 3.5 fix: size + depth + control-byte gate. Drop adversarial
|
|
111
|
+
// inputs BEFORE the adapter sees them — adapters trust this layer.
|
|
112
|
+
// Files dropped here are logged once per drop so operators see the
|
|
113
|
+
// signal; the adapter then runs against the surviving subset.
|
|
114
|
+
const safeFiles: SourceFile[] = [];
|
|
115
|
+
for (const f of files) {
|
|
116
|
+
const skip = isParsableSource(f.content, f.size);
|
|
117
|
+
if (skip) {
|
|
118
|
+
process.stderr.write(
|
|
119
|
+
`[massu/ast] WARN: skipping ${f.path} for adapter ${adapter.id}: ${skip.reason} (${skip.detail}). Cap=${MAX_AST_FILE_BYTES} bytes. (Phase 3.5 mitigation)\n`,
|
|
120
|
+
);
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
safeFiles.push(f);
|
|
124
|
+
}
|
|
125
|
+
files = safeFiles;
|
|
126
|
+
|
|
127
|
+
let result;
|
|
128
|
+
try {
|
|
129
|
+
result = await adapter.introspect(files, rootDir);
|
|
130
|
+
} catch (e) {
|
|
131
|
+
out.errored.push({
|
|
132
|
+
adapterId: adapter.id,
|
|
133
|
+
error: `introspect() threw: ${e instanceof Error ? e.message : String(e)}`,
|
|
134
|
+
});
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// 'none' confidence drops the entire adapter result. The runner records
|
|
139
|
+
// that the adapter was attempted (in `byAdapter`) so callers can see it
|
|
140
|
+
// ran, but with empty conventions. introspect()'s regex fallback then
|
|
141
|
+
// takes over for the field.
|
|
142
|
+
if (result.confidence === 'none') {
|
|
143
|
+
out.byAdapter[adapter.id] = {
|
|
144
|
+
conventions: {},
|
|
145
|
+
_provenance: {},
|
|
146
|
+
confidence: 'none',
|
|
147
|
+
};
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Merge: keep first occurrence of each field (defensive against an
|
|
152
|
+
// adapter accidentally writing the same field twice).
|
|
153
|
+
const conventions: Record<string, unknown> = {};
|
|
154
|
+
const provenanceMap: Record<string, string> = {};
|
|
155
|
+
for (const [field, value] of Object.entries(result.conventions)) {
|
|
156
|
+
if (value === null || value === undefined) continue;
|
|
157
|
+
if (field in conventions) continue;
|
|
158
|
+
conventions[field] = value;
|
|
159
|
+
}
|
|
160
|
+
for (const p of result.provenance) {
|
|
161
|
+
if (p.field in provenanceMap) continue;
|
|
162
|
+
provenanceMap[p.field] = formatProvenance(p, rootDir);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const resolved: AdapterResolved = {
|
|
166
|
+
conventions,
|
|
167
|
+
_provenance: provenanceMap,
|
|
168
|
+
confidence: result.confidence,
|
|
169
|
+
};
|
|
170
|
+
out.byAdapter[adapter.id] = resolved;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return out;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function formatProvenance(p: Provenance, rootDir: string): string {
|
|
177
|
+
const rel = p.sourceFile.startsWith(rootDir + '/')
|
|
178
|
+
? relative(rootDir, p.sourceFile)
|
|
179
|
+
: basename(p.sourceFile);
|
|
180
|
+
return `${rel}:${p.line} :: ${p.query}`;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// ============================================================
|
|
184
|
+
// Signal builder — used by codebase-introspector to feed the runner
|
|
185
|
+
// ============================================================
|
|
186
|
+
|
|
187
|
+
import { existsSync, readdirSync, readFileSync, statSync } from 'fs';
|
|
188
|
+
import { join } from 'path';
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Build a `DetectionSignals` bundle by reading manifest files at the project
|
|
192
|
+
* root. Cheap (one-level dir scan + a handful of file reads). Failures
|
|
193
|
+
* degrade gracefully — a missing manifest just means that field is undefined.
|
|
194
|
+
*/
|
|
195
|
+
export function buildDetectionSignals(rootDir: string): DetectionSignals {
|
|
196
|
+
const presentDirs = new Set<string>();
|
|
197
|
+
const presentFiles = new Set<string>();
|
|
198
|
+
try {
|
|
199
|
+
for (const entry of readdirSync(rootDir)) {
|
|
200
|
+
if (entry.startsWith('.')) continue;
|
|
201
|
+
try {
|
|
202
|
+
const st = statSync(join(rootDir, entry));
|
|
203
|
+
if (st.isDirectory()) presentDirs.add(entry);
|
|
204
|
+
else if (st.isFile()) presentFiles.add(entry);
|
|
205
|
+
} catch {
|
|
206
|
+
/* ignore */
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
} catch {
|
|
210
|
+
/* unreadable root → empty signals */
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return {
|
|
214
|
+
packageJson: tryReadJson(join(rootDir, 'package.json')),
|
|
215
|
+
pyprojectToml: tryReadToml(join(rootDir, 'pyproject.toml')),
|
|
216
|
+
gemfile: tryReadString(join(rootDir, 'Gemfile')),
|
|
217
|
+
cargoToml: tryReadToml(join(rootDir, 'Cargo.toml')),
|
|
218
|
+
goMod: tryReadString(join(rootDir, 'go.mod')),
|
|
219
|
+
presentDirs,
|
|
220
|
+
presentFiles,
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function tryReadString(path: string): string | undefined {
|
|
225
|
+
if (!existsSync(path)) return undefined;
|
|
226
|
+
try {
|
|
227
|
+
return readFileSync(path, 'utf-8');
|
|
228
|
+
} catch {
|
|
229
|
+
return undefined;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
function tryReadJson(path: string): Record<string, unknown> | undefined {
|
|
234
|
+
const txt = tryReadString(path);
|
|
235
|
+
if (!txt) return undefined;
|
|
236
|
+
try {
|
|
237
|
+
const parsed = JSON.parse(txt);
|
|
238
|
+
return typeof parsed === 'object' && parsed !== null ? (parsed as Record<string, unknown>) : undefined;
|
|
239
|
+
} catch {
|
|
240
|
+
return undefined;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function tryReadToml(path: string): Record<string, unknown> | undefined {
|
|
245
|
+
const txt = tryReadString(path);
|
|
246
|
+
if (!txt) return undefined;
|
|
247
|
+
// Cheap signal-only parse: we just need top-level table presence + keys.
|
|
248
|
+
// Avoid pulling the full toml parser for this; check `[project]`/`[tool.x]`
|
|
249
|
+
// headers and treat `tool.poetry.dependencies` etc. as opaque text-search.
|
|
250
|
+
// Adapters that need structured data can grep `txt` themselves.
|
|
251
|
+
return { __raw: txt };
|
|
252
|
+
}
|