@pugi/cli 0.1.0-beta.100 → 0.1.0-beta.101
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/core/codegraph/parser.js +574 -47
- package/dist/core/codegraph/queries/go.scm +57 -0
- package/dist/core/codegraph/queries/javascript.scm +56 -0
- package/dist/core/codegraph/queries/python.scm +55 -0
- package/dist/core/codegraph/queries/rust.scm +63 -0
- package/dist/core/codegraph/queries/typescript.scm +91 -0
- package/dist/core/codegraph/reindex.js +218 -0
- package/dist/core/codegraph/resolve-edges.js +107 -0
- package/dist/core/codegraph/watcher.js +440 -0
- package/dist/core/diagnostics/probes/sandbox.js +7 -12
- package/dist/core/engine/prompts.js +32 -0
- package/dist/core/eval/v1/ledger.js +83 -0
- package/dist/core/eval/v1/runner.js +280 -0
- package/dist/core/eval/v1/scoring.js +68 -0
- package/dist/core/eval/v1/task-loader.js +191 -0
- package/dist/core/eval/v1/types.js +14 -0
- package/dist/core/eval/v1/verifier.js +176 -0
- package/dist/core/eval/v1/yaml-parser.js +250 -0
- package/dist/core/sandboxing/adapter.js +31 -17
- package/dist/core/sandboxing/bubblewrap.js +209 -0
- package/dist/core/sandboxing/index.js +32 -3
- package/dist/core/sandboxing/policy.js +97 -0
- package/dist/core/sandboxing/seatbelt.js +69 -21
- package/dist/core/settings.js +31 -7
- package/dist/runtime/cli.js +58 -0
- package/dist/runtime/commands/eval-v1.js +266 -0
- package/dist/runtime/commands/index-cmd.js +125 -19
- package/dist/runtime/commands/servers-cli.js +182 -0
- package/dist/runtime/version.js +1 -1
- package/dist/tools/bash.js +187 -3
- package/package.json +10 -3
package/README.md
CHANGED
|
@@ -134,6 +134,8 @@ pugi review --triple # local triple-review evidence bundle
|
|
|
134
134
|
pugi review --triple --remote
|
|
135
135
|
# call Anvil for 3-model consensus
|
|
136
136
|
pugi handoff --web # hand the session off to the cabinet
|
|
137
|
+
pugi servers # list dev servers tracked by server_start
|
|
138
|
+
pugi servers stop all # kill every alive tracked server (orphan rescue)
|
|
137
139
|
pugi sessions # list sessions from .pugi/index.json
|
|
138
140
|
pugi sessions --rebuild # rebuild the index from events.jsonl
|
|
139
141
|
pugi deploy --target vercel my-vercel-project --project proj_42
|
|
@@ -1,61 +1,82 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Pugi local symbol index - tree-sitter parser
|
|
2
|
+
* Pugi local symbol index - tree-sitter parser (PR L1).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* Reads one source file, parses it with the appropriate tree-sitter
|
|
5
|
+
* grammar, runs the language's symbol + edge query
|
|
6
|
+
* (`./queries/<lang>.scm`), and emits a `ParseResult`:
|
|
6
7
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
8
|
+
* - `symbols`: complete `Symbol[]` ready for `db.ts:insertSymbols`.
|
|
9
|
+
* - `pendingEdges`: name-keyed edges. `db.Edge` requires assigned
|
|
10
|
+
* SQLite row ids on both endpoints, which the parser cannot
|
|
11
|
+
* supply - the orchestrator (in `runtime/commands/index-cmd.ts`)
|
|
12
|
+
* inserts symbols first, builds a name map per file, and then
|
|
13
|
+
* resolves pending edges before calling `insertEdges`. Cross-file
|
|
14
|
+
* edges land when the target symbol exists in the index at flush
|
|
15
|
+
* time.
|
|
15
16
|
*
|
|
16
|
-
*
|
|
17
|
-
* lets the rest of the scaffold ship in this PR without smuggling in
|
|
18
|
-
* tree-sitter native modules. tree-sitter has a prebuilt-binary
|
|
19
|
-
* distribution but it is one more wheel-per-platform surface that
|
|
20
|
-
* compounds the install-time risk the session-store memo warned about
|
|
21
|
-
* (see `core/repl/store/session-store.ts` lines 20-36).
|
|
17
|
+
* Design decisions:
|
|
22
18
|
*
|
|
23
|
-
* The
|
|
24
|
-
*
|
|
25
|
-
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
19
|
+
* - The contract types in `./types.ts` are FROZEN. We extend the
|
|
20
|
+
* `ParseResult` shape locally (add `pendingEdges`) rather than
|
|
21
|
+
* changing `Edge` to carry names. `db.ts` stays single-purpose.
|
|
22
|
+
* - Tree-sitter has built-in error recovery via `SyntaxNode.hasError`.
|
|
23
|
+
* Broken sources still produce a partial tree; we walk it as-is
|
|
24
|
+
* and surface the partial symbol set rather than throwing. This
|
|
25
|
+
* mirrors Aider / Cody / Sourcegraph - all production indexers
|
|
26
|
+
* swallow parse errors and trust the user to notice via downstream
|
|
27
|
+
* symptoms.
|
|
28
|
+
* - The performance budget (200 ms per 5K LOC file) is enforced by
|
|
29
|
+
* `setTimeoutMicros` on the parser and a wall-clock check after
|
|
30
|
+
* the query pass. Over-budget files emit to stderr (unless
|
|
31
|
+
* silenced) rather than throwing.
|
|
32
|
+
* - Query files (`.scm`) are loaded once per process and the
|
|
33
|
+
* compiled `Parser.Query` instances are cached on the grammar.
|
|
34
|
+
* The query constructor is the expensive step (~30-50 ms for a
|
|
35
|
+
* 20-pattern query) - the cache keeps reindex throughput high.
|
|
36
|
+
*
|
|
37
|
+
* Native dependency caveat: `tree-sitter` + the per-language grammar
|
|
38
|
+
* packages are native modules. The `node-gyp-build` runtime they ship
|
|
39
|
+
* resolves prebuilt binaries for darwin-{arm64,x64},
|
|
40
|
+
* linux-{arm64,x64}, win32-arm64 - the set the @pugi/cli customer
|
|
41
|
+
* matrix actually covers. On an unsupported platform `require()`
|
|
42
|
+
* throws at the top-level import; we lazy-load the grammar (and the
|
|
43
|
+
* runtime itself, via dynamic import) so a missing prebuild degrades
|
|
44
|
+
* to "unsupported file type" rather than crashing the CLI on startup.
|
|
45
|
+
* See `loadBindings`.
|
|
28
46
|
*/
|
|
47
|
+
import { promises as fs, readFileSync } from 'node:fs';
|
|
48
|
+
import { extname, dirname, resolve } from 'node:path';
|
|
49
|
+
import { fileURLToPath } from 'node:url';
|
|
50
|
+
import { performance } from 'node:perf_hooks';
|
|
51
|
+
/** Wall-clock perf budget in milliseconds for a single file. */
|
|
52
|
+
const PERF_BUDGET_MS = 200;
|
|
29
53
|
/**
|
|
30
|
-
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
*
|
|
36
|
-
* @param path Absolute or workspace-relative path. The real impl will
|
|
37
|
-
* read the file, sha256 it, route by extension to the right
|
|
38
|
-
* tree-sitter grammar, and walk the resulting CST.
|
|
39
|
-
*/
|
|
40
|
-
export async function parseFile(path) {
|
|
41
|
-
// Touch the parameter so TS strict noUnusedParameters doesn't fire.
|
|
42
|
-
// The real impl in PR L1 reads the file at this path.
|
|
43
|
-
void path;
|
|
44
|
-
return {
|
|
45
|
-
symbols: [],
|
|
46
|
-
edges: [],
|
|
47
|
-
stub: true,
|
|
48
|
-
};
|
|
49
|
-
}
|
|
54
|
+
* tree-sitter accepts a microseconds timeout on the parser itself.
|
|
55
|
+
* We cap parsing at 2x the wall-clock budget to give the query
|
|
56
|
+
* phase headroom while still bounding pathological cases.
|
|
57
|
+
*/
|
|
58
|
+
const PARSER_TIMEOUT_MICROS = PERF_BUDGET_MS * 1000 * 2;
|
|
50
59
|
/**
|
|
51
|
-
*
|
|
60
|
+
* Extensions the parser handles. Mirror of the docs in `db.ts` /
|
|
61
|
+
* `types.ts:IndexedLanguage`. Anything not in this map returns an
|
|
62
|
+
* empty ParseResult without touching tree-sitter at all.
|
|
63
|
+
*/
|
|
64
|
+
const EXT_TO_LANG = Object.freeze({
|
|
65
|
+
'.ts': 'typescript',
|
|
66
|
+
'.tsx': 'tsx',
|
|
67
|
+
'.js': 'javascript',
|
|
68
|
+
'.jsx': 'javascript',
|
|
69
|
+
'.mjs': 'javascript',
|
|
70
|
+
'.cjs': 'javascript',
|
|
71
|
+
'.py': 'python',
|
|
72
|
+
'.go': 'go',
|
|
73
|
+
'.rs': 'rust',
|
|
74
|
+
});
|
|
75
|
+
/**
|
|
76
|
+
* Stable list of file extensions the v1 parser supports. Exported
|
|
52
77
|
* so the watcher (PR L2) and the CLI command (`pugi index`) can
|
|
53
78
|
* pre-filter the file walk without instantiating a parser. The set
|
|
54
79
|
* mirrors `INDEXED_LANGUAGES` in `./types.ts`.
|
|
55
|
-
*
|
|
56
|
-
* `.cjs` and `.mjs` map to javascript; `.tsx` and `.jsx` map to
|
|
57
|
-
* typescript / javascript respectively (tree-sitter-typescript ships
|
|
58
|
-
* a TSX grammar variant; same for JSX in tree-sitter-javascript).
|
|
59
80
|
*/
|
|
60
81
|
export const PARSEABLE_EXTENSIONS = Object.freeze([
|
|
61
82
|
'.ts',
|
|
@@ -68,4 +89,510 @@ export const PARSEABLE_EXTENSIONS = Object.freeze([
|
|
|
68
89
|
'.go',
|
|
69
90
|
'.rs',
|
|
70
91
|
]);
|
|
92
|
+
/** Tag for the typescript-with-jsx variant of the tree-sitter grammar. */
|
|
93
|
+
const TSX_TAG = 'tsx';
|
|
94
|
+
let bindingsCache = null;
|
|
95
|
+
let bindingsError = null;
|
|
96
|
+
let bindingsErrorReported = false;
|
|
97
|
+
/**
|
|
98
|
+
* Lazy-load tree-sitter runtime + grammar bindings. Bundled here so
|
|
99
|
+
* that a missing prebuild on the host platform produces a "no
|
|
100
|
+
* parser" branch rather than a startup-time `require()` throw that
|
|
101
|
+
* crashes `pugi` before the operator sees a prompt.
|
|
102
|
+
*
|
|
103
|
+
* Returns null when the native module is unavailable. Callers
|
|
104
|
+
* downgrade to "unsupported file type" - the CLI logs the underlying
|
|
105
|
+
* error to stderr once per process for transparency.
|
|
106
|
+
*/
|
|
107
|
+
async function loadBindings(quiet) {
|
|
108
|
+
if (bindingsCache)
|
|
109
|
+
return bindingsCache;
|
|
110
|
+
if (bindingsError) {
|
|
111
|
+
if (!quiet && !bindingsErrorReported) {
|
|
112
|
+
bindingsErrorReported = true;
|
|
113
|
+
process.stderr.write(`pugi index: tree-sitter unavailable on this platform - ${bindingsError.message}\n`);
|
|
114
|
+
}
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
try {
|
|
118
|
+
// Dynamic import so a missing prebuild surfaces as a normal
|
|
119
|
+
// Error and the rest of the CLI keeps working. `tree-sitter`
|
|
120
|
+
// itself ships a CJS entry; the ESM dynamic import wraps it
|
|
121
|
+
// transparently.
|
|
122
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
123
|
+
const tsModule = await import('tree-sitter');
|
|
124
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
125
|
+
const ParserCtor = tsModule.default ?? tsModule;
|
|
126
|
+
const QueryCtor = ParserCtor.Query;
|
|
127
|
+
if (typeof QueryCtor !== 'function') {
|
|
128
|
+
throw new Error('tree-sitter module loaded but Parser.Query is missing - rebuild the native binding');
|
|
129
|
+
}
|
|
130
|
+
const langs = new Map();
|
|
131
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
132
|
+
const pickLang = async (
|
|
133
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
134
|
+
importer,
|
|
135
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
136
|
+
pick) => {
|
|
137
|
+
const mod = await importer();
|
|
138
|
+
const language = pick(mod);
|
|
139
|
+
if (!language) {
|
|
140
|
+
throw new Error('grammar load produced no Language object');
|
|
141
|
+
}
|
|
142
|
+
return language;
|
|
143
|
+
};
|
|
144
|
+
const typescript = await pickLang(() => import('tree-sitter-typescript'), (m) => m.typescript ?? m.default?.typescript);
|
|
145
|
+
const tsx = await pickLang(() => import('tree-sitter-typescript'), (m) => m.tsx ?? m.default?.tsx);
|
|
146
|
+
const javascript = await pickLang(() => import('tree-sitter-javascript'), (m) => m.default ?? m);
|
|
147
|
+
const python = await pickLang(() => import('tree-sitter-python'), (m) => m.default ?? m);
|
|
148
|
+
const go = await pickLang(() => import('tree-sitter-go'), (m) => m.default ?? m);
|
|
149
|
+
const rust = await pickLang(() => import('tree-sitter-rust'), (m) => m.default ?? m);
|
|
150
|
+
const tsQuerySrc = readQuerySource('typescript');
|
|
151
|
+
const jsQuerySrc = readQuerySource('javascript');
|
|
152
|
+
const pyQuerySrc = readQuerySource('python');
|
|
153
|
+
const goQuerySrc = readQuerySource('go');
|
|
154
|
+
const rsQuerySrc = readQuerySource('rust');
|
|
155
|
+
langs.set('typescript', {
|
|
156
|
+
language: typescript,
|
|
157
|
+
query: new QueryCtor(typescript, tsQuerySrc),
|
|
158
|
+
});
|
|
159
|
+
langs.set(TSX_TAG, {
|
|
160
|
+
language: tsx,
|
|
161
|
+
query: new QueryCtor(tsx, tsQuerySrc),
|
|
162
|
+
});
|
|
163
|
+
langs.set('javascript', {
|
|
164
|
+
language: javascript,
|
|
165
|
+
query: new QueryCtor(javascript, jsQuerySrc),
|
|
166
|
+
});
|
|
167
|
+
langs.set('python', {
|
|
168
|
+
language: python,
|
|
169
|
+
query: new QueryCtor(python, pyQuerySrc),
|
|
170
|
+
});
|
|
171
|
+
langs.set('go', {
|
|
172
|
+
language: go,
|
|
173
|
+
query: new QueryCtor(go, goQuerySrc),
|
|
174
|
+
});
|
|
175
|
+
langs.set('rust', {
|
|
176
|
+
language: rust,
|
|
177
|
+
query: new QueryCtor(rust, rsQuerySrc),
|
|
178
|
+
});
|
|
179
|
+
bindingsCache = { ParserCtor, langs };
|
|
180
|
+
return bindingsCache;
|
|
181
|
+
}
|
|
182
|
+
catch (err) {
|
|
183
|
+
bindingsError = err instanceof Error ? err : new Error(String(err));
|
|
184
|
+
if (!quiet && !bindingsErrorReported) {
|
|
185
|
+
bindingsErrorReported = true;
|
|
186
|
+
process.stderr.write(`pugi index: tree-sitter unavailable on this platform - ${bindingsError.message}\n`);
|
|
187
|
+
}
|
|
188
|
+
return null;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Reset the bindings cache. Exposed for tests so a forced reload
|
|
193
|
+
* exercises the cold path. Not part of the public API.
|
|
194
|
+
*/
|
|
195
|
+
export function __resetBindingsCacheForTests() {
|
|
196
|
+
bindingsCache = null;
|
|
197
|
+
bindingsError = null;
|
|
198
|
+
bindingsErrorReported = false;
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Resolve `<here>/queries/<lang>.scm`. tsc copies the `.scm` files
|
|
202
|
+
* to `dist/core/codegraph/queries/` via `scripts/copy-queries.mjs`;
|
|
203
|
+
* at runtime (both via `tsx` against `src/` and via `node` against
|
|
204
|
+
* `dist/`) `import.meta.url` already points to the right tree.
|
|
205
|
+
*/
|
|
206
|
+
function readQuerySource(lang) {
|
|
207
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
208
|
+
const path = resolve(here, 'queries', `${lang}.scm`);
|
|
209
|
+
return readFileSync(path, 'utf8');
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Resolve the language tag for a given file path. Returns null when
|
|
213
|
+
* the extension is not in the v1 set. Exported for the watcher and
|
|
214
|
+
* tests; the CLI does the same check via PARSEABLE_EXTENSIONS first.
|
|
215
|
+
*/
|
|
216
|
+
export function detectLanguage(path) {
|
|
217
|
+
const ext = extname(path).toLowerCase();
|
|
218
|
+
return EXT_TO_LANG[ext] ?? null;
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Parse one source file. The signature is the stable contract that
|
|
222
|
+
* db.ts, the watcher (PR L2), and the MCP tools (PR L3) consume.
|
|
223
|
+
*
|
|
224
|
+
* Behavior summary:
|
|
225
|
+
*
|
|
226
|
+
* - Unsupported extension - `{symbols: [], pendingEdges: [],
|
|
227
|
+
* language: null}` without reading the file.
|
|
228
|
+
* - tree-sitter unavailable (no prebuild) - same shape, plus a
|
|
229
|
+
* one-time stderr message.
|
|
230
|
+
* - Source bytes large or syntactically broken - partial result;
|
|
231
|
+
* `SyntaxNode.hasError` does not abort the walk.
|
|
232
|
+
* - Perf budget exceeded - result is returned anyway; a one-line
|
|
233
|
+
* warning to stderr (unless `opts.quiet`) so reindex throughput
|
|
234
|
+
* issues surface without aborting the run.
|
|
235
|
+
*
|
|
236
|
+
* Throws only on un-recoverable IO errors (file disappeared
|
|
237
|
+
* mid-read on the supplied workspace). Callers wrap in try/catch
|
|
238
|
+
* when they want to tolerate that as well.
|
|
239
|
+
*/
|
|
240
|
+
export async function parseFile(path, opts = {}) {
|
|
241
|
+
const language = detectLanguage(path);
|
|
242
|
+
if (!language) {
|
|
243
|
+
return { symbols: [], pendingEdges: [], language: null, parseMs: 0 };
|
|
244
|
+
}
|
|
245
|
+
const bindings = await loadBindings(opts.quiet === true);
|
|
246
|
+
if (!bindings) {
|
|
247
|
+
return { symbols: [], pendingEdges: [], language: null, parseMs: 0 };
|
|
248
|
+
}
|
|
249
|
+
const langEntry = bindings.langs.get(language);
|
|
250
|
+
if (!langEntry) {
|
|
251
|
+
return { symbols: [], pendingEdges: [], language: null, parseMs: 0 };
|
|
252
|
+
}
|
|
253
|
+
const source = opts.sourceOverride !== undefined
|
|
254
|
+
? opts.sourceOverride
|
|
255
|
+
: await fs.readFile(path, 'utf8');
|
|
256
|
+
const relPath = opts.relPath ?? path;
|
|
257
|
+
const start = performance.now();
|
|
258
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
259
|
+
const parser = new bindings.ParserCtor();
|
|
260
|
+
parser.setLanguage(langEntry.language);
|
|
261
|
+
// node-tree-sitter's `setTimeoutMicros` is a hard cap that returns
|
|
262
|
+
// null from `parse()` when exceeded. We catch null and emit an
|
|
263
|
+
// empty result so the rest of the index walk keeps making progress.
|
|
264
|
+
if (typeof parser.setTimeoutMicros === 'function') {
|
|
265
|
+
parser.setTimeoutMicros(PARSER_TIMEOUT_MICROS);
|
|
266
|
+
}
|
|
267
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
268
|
+
let tree = null;
|
|
269
|
+
try {
|
|
270
|
+
tree = parser.parse(source);
|
|
271
|
+
}
|
|
272
|
+
catch (err) {
|
|
273
|
+
if (!opts.quiet) {
|
|
274
|
+
process.stderr.write(`pugi index: parse error in ${relPath}: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
275
|
+
}
|
|
276
|
+
return {
|
|
277
|
+
symbols: [],
|
|
278
|
+
pendingEdges: [],
|
|
279
|
+
language: tagForResult(language),
|
|
280
|
+
parseMs: performance.now() - start,
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
if (!tree || !tree.rootNode) {
|
|
284
|
+
return {
|
|
285
|
+
symbols: [],
|
|
286
|
+
pendingEdges: [],
|
|
287
|
+
language: tagForResult(language),
|
|
288
|
+
parseMs: performance.now() - start,
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
const { symbols, pendingEdges } = extract(tree.rootNode, langEntry.query, relPath, language);
|
|
292
|
+
const parseMs = performance.now() - start;
|
|
293
|
+
if (parseMs > PERF_BUDGET_MS && !opts.quiet) {
|
|
294
|
+
const loc = countLines(source);
|
|
295
|
+
process.stderr.write(`pugi index: ${relPath} parsed in ${parseMs.toFixed(1)} ms (${loc} LOC) - over ${PERF_BUDGET_MS} ms budget\n`);
|
|
296
|
+
}
|
|
297
|
+
return {
|
|
298
|
+
symbols,
|
|
299
|
+
pendingEdges,
|
|
300
|
+
language: tagForResult(language),
|
|
301
|
+
parseMs,
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
/**
|
|
305
|
+
* Map the tree-sitter grammar tag back to the customer-facing
|
|
306
|
+
* language label used by `Symbol` / `IndexedLanguage`. We collapse
|
|
307
|
+
* the tsx grammar back to `typescript` so the FTS5 + db layer sees
|
|
308
|
+
* one tag per extension family.
|
|
309
|
+
*/
|
|
310
|
+
function tagForResult(grammarTag) {
|
|
311
|
+
if (grammarTag === TSX_TAG)
|
|
312
|
+
return 'typescript';
|
|
313
|
+
return grammarTag;
|
|
314
|
+
}
|
|
315
|
+
/**
|
|
316
|
+
* Walk every capture produced by the language query and split it
|
|
317
|
+
* into `Symbol` rows and `PendingEdge` entries. The capture-name
|
|
318
|
+
* convention is documented in each `.scm` file head; here we
|
|
319
|
+
* centralize the parsing of `@symbol.<kind>.def` /
|
|
320
|
+
* `@symbol.<kind>.name` / `@edge.<kind>.target` /
|
|
321
|
+
* `@edge.<kind>.from`.
|
|
322
|
+
*
|
|
323
|
+
* Scope is computed by walking up the syntax tree from the
|
|
324
|
+
* definition node and collecting the names of enclosing class /
|
|
325
|
+
* function / module declarations. The dotted form matches the
|
|
326
|
+
* convention in `types.ts:Symbol.scope`. Cost is bounded by tree
|
|
327
|
+
* depth, which is bounded by source nesting; on a 5K LOC file
|
|
328
|
+
* scope resolution is dominated by the query pass itself.
|
|
329
|
+
*/
|
|
330
|
+
function extract(
|
|
331
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
332
|
+
rootNode,
|
|
333
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
334
|
+
query, file, grammarTag) {
|
|
335
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
336
|
+
const matches = query.matches(rootNode);
|
|
337
|
+
const symbols = [];
|
|
338
|
+
/**
|
|
339
|
+
* Map from def-node identity to the index in `symbols`. The same
|
|
340
|
+
* def may appear in multiple matches (a class with both an extends
|
|
341
|
+
* and implements clause); we dedupe to keep one Symbol row per
|
|
342
|
+
* declaration site.
|
|
343
|
+
*/
|
|
344
|
+
const symbolByDefId = new Map();
|
|
345
|
+
/**
|
|
346
|
+
* Map from caller-side definition node id to the canonical name +
|
|
347
|
+
* scope pair. Used when a later @edge.calls.target capture lands
|
|
348
|
+
* inside that definition and we need to know who originated the
|
|
349
|
+
* edge.
|
|
350
|
+
*/
|
|
351
|
+
const defContextByNodeId = new Map();
|
|
352
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
353
|
+
const collectSymbol = (defNode, nameNode, kind) => {
|
|
354
|
+
if (symbolByDefId.has(defNode.id))
|
|
355
|
+
return;
|
|
356
|
+
const scope = computeScope(defNode, grammarTag);
|
|
357
|
+
const symbol = {
|
|
358
|
+
name: nameNode.text,
|
|
359
|
+
kind,
|
|
360
|
+
file,
|
|
361
|
+
line: defNode.startPosition.row + 1,
|
|
362
|
+
column: defNode.startPosition.column,
|
|
363
|
+
scope,
|
|
364
|
+
};
|
|
365
|
+
const sig = trimSignature(defNode);
|
|
366
|
+
if (sig)
|
|
367
|
+
symbol.signature = sig;
|
|
368
|
+
symbolByDefId.set(defNode.id, symbols.length);
|
|
369
|
+
symbols.push(symbol);
|
|
370
|
+
defContextByNodeId.set(defNode.id, {
|
|
371
|
+
name: symbol.name,
|
|
372
|
+
scope: symbol.scope,
|
|
373
|
+
kind: symbol.kind,
|
|
374
|
+
});
|
|
375
|
+
};
|
|
376
|
+
for (const m of matches) {
|
|
377
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
378
|
+
const defs = {};
|
|
379
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
380
|
+
const names = {};
|
|
381
|
+
for (const cap of m.captures) {
|
|
382
|
+
const parts = cap.name.split('.');
|
|
383
|
+
if (parts[0] !== 'symbol' || parts.length < 3)
|
|
384
|
+
continue;
|
|
385
|
+
const kind = parts[1];
|
|
386
|
+
const role = parts[2];
|
|
387
|
+
if (role === 'def')
|
|
388
|
+
defs[kind] = cap.node;
|
|
389
|
+
else if (role === 'name')
|
|
390
|
+
names[kind] = cap.node;
|
|
391
|
+
}
|
|
392
|
+
for (const kind of Object.keys(defs)) {
|
|
393
|
+
const def = defs[kind];
|
|
394
|
+
const nm = names[kind];
|
|
395
|
+
if (!def || !nm)
|
|
396
|
+
continue;
|
|
397
|
+
if (!isSymbolKind(kind))
|
|
398
|
+
continue;
|
|
399
|
+
collectSymbol(def, nm, kind);
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
const pendingEdges = [];
|
|
403
|
+
for (const m of matches) {
|
|
404
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
405
|
+
const fromCaps = {};
|
|
406
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
407
|
+
const targetCaps = {};
|
|
408
|
+
for (const cap of m.captures) {
|
|
409
|
+
const parts = cap.name.split('.');
|
|
410
|
+
if (parts[0] !== 'edge' || parts.length < 3)
|
|
411
|
+
continue;
|
|
412
|
+
const kind = parts[1];
|
|
413
|
+
const role = parts[2];
|
|
414
|
+
if (role === 'from')
|
|
415
|
+
fromCaps[kind] = cap.node;
|
|
416
|
+
else if (role === 'target')
|
|
417
|
+
targetCaps[kind] = cap.node;
|
|
418
|
+
}
|
|
419
|
+
for (const kind of Object.keys(targetCaps)) {
|
|
420
|
+
const tgt = targetCaps[kind];
|
|
421
|
+
if (!tgt)
|
|
422
|
+
continue;
|
|
423
|
+
if (!isEdgeKind(kind))
|
|
424
|
+
continue;
|
|
425
|
+
const fromNode = fromCaps[kind];
|
|
426
|
+
let fromName;
|
|
427
|
+
let fromScope;
|
|
428
|
+
if (fromNode) {
|
|
429
|
+
fromName = fromNode.text;
|
|
430
|
+
fromScope = computeScope(fromNode, grammarTag);
|
|
431
|
+
}
|
|
432
|
+
else {
|
|
433
|
+
const ctx = findEnclosingDef(tgt, defContextByNodeId);
|
|
434
|
+
if (!ctx) {
|
|
435
|
+
// No tracked caller in this file - skip; the orchestrator
|
|
436
|
+
// can't store an orphan with no fromSymbolId. We could in
|
|
437
|
+
// theory emit a "file-level" edge, but the db schema does
|
|
438
|
+
// not model that today and PR L4 PageRank does not need it.
|
|
439
|
+
continue;
|
|
440
|
+
}
|
|
441
|
+
fromName = ctx.name;
|
|
442
|
+
fromScope = ctx.scope;
|
|
443
|
+
}
|
|
444
|
+
pendingEdges.push({
|
|
445
|
+
fromName,
|
|
446
|
+
fromScope,
|
|
447
|
+
toName: tgt.text,
|
|
448
|
+
kind,
|
|
449
|
+
line: tgt.startPosition.row + 1,
|
|
450
|
+
});
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
return { symbols, pendingEdges };
|
|
454
|
+
}
|
|
455
|
+
/**
|
|
456
|
+
* Walk parents until we hit a node that owns a tracked symbol. The
|
|
457
|
+
* map is keyed by `SyntaxNode.id`, which is stable within a tree.
|
|
458
|
+
*/
|
|
459
|
+
function findEnclosingDef(
|
|
460
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
461
|
+
startNode, byId) {
|
|
462
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
463
|
+
let n = startNode.parent;
|
|
464
|
+
while (n) {
|
|
465
|
+
const ctx = byId.get(n.id);
|
|
466
|
+
if (ctx)
|
|
467
|
+
return ctx;
|
|
468
|
+
n = n.parent;
|
|
469
|
+
}
|
|
470
|
+
return null;
|
|
471
|
+
}
|
|
472
|
+
/**
|
|
473
|
+
* Walk parents to build a dotted scope path. We collect ancestor
|
|
474
|
+
* names for class / interface / function / method / module
|
|
475
|
+
* declarations. Cross-language node-type names diverge - the picker
|
|
476
|
+
* uses a small grammar-tagged switch to match the right node types.
|
|
477
|
+
*/
|
|
478
|
+
function computeScope(
|
|
479
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
480
|
+
defNode, grammarTag) {
|
|
481
|
+
const names = [];
|
|
482
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
483
|
+
let n = defNode.parent;
|
|
484
|
+
while (n) {
|
|
485
|
+
const tag = scopeNameFor(n, grammarTag);
|
|
486
|
+
if (tag)
|
|
487
|
+
names.unshift(tag);
|
|
488
|
+
n = n.parent;
|
|
489
|
+
}
|
|
490
|
+
return names.join(grammarTag === 'rust' ? '::' : '.');
|
|
491
|
+
}
|
|
492
|
+
/**
|
|
493
|
+
* Per-grammar lookup: given a syntax node, return the
|
|
494
|
+
* operator-facing name to contribute to scope, or null if this node
|
|
495
|
+
* is not a scope. Centralized here so adding a new grammar means
|
|
496
|
+
* appending one branch.
|
|
497
|
+
*/
|
|
498
|
+
function scopeNameFor(
|
|
499
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
500
|
+
node, grammarTag) {
|
|
501
|
+
const type = node.type;
|
|
502
|
+
switch (grammarTag) {
|
|
503
|
+
case 'typescript':
|
|
504
|
+
case 'tsx':
|
|
505
|
+
case 'javascript':
|
|
506
|
+
if (type === 'class_declaration' ||
|
|
507
|
+
type === 'abstract_class_declaration' ||
|
|
508
|
+
type === 'interface_declaration' ||
|
|
509
|
+
type === 'function_declaration' ||
|
|
510
|
+
type === 'method_definition') {
|
|
511
|
+
return nodeFieldText(node, 'name');
|
|
512
|
+
}
|
|
513
|
+
return null;
|
|
514
|
+
case 'python':
|
|
515
|
+
if (type === 'class_definition' || type === 'function_definition') {
|
|
516
|
+
return nodeFieldText(node, 'name');
|
|
517
|
+
}
|
|
518
|
+
return null;
|
|
519
|
+
case 'go':
|
|
520
|
+
if (type === 'function_declaration' ||
|
|
521
|
+
type === 'method_declaration' ||
|
|
522
|
+
type === 'type_declaration') {
|
|
523
|
+
return nodeFieldText(node, 'name');
|
|
524
|
+
}
|
|
525
|
+
return null;
|
|
526
|
+
case 'rust':
|
|
527
|
+
if (type === 'mod_item' ||
|
|
528
|
+
type === 'function_item' ||
|
|
529
|
+
type === 'struct_item' ||
|
|
530
|
+
type === 'enum_item' ||
|
|
531
|
+
type === 'trait_item') {
|
|
532
|
+
return nodeFieldText(node, 'name');
|
|
533
|
+
}
|
|
534
|
+
return null;
|
|
535
|
+
default:
|
|
536
|
+
return null;
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
/**
|
|
540
|
+
* Extract `node.childForFieldName(field)?.text` defensively.
|
|
541
|
+
* Returns null if either the field is absent OR the child has no
|
|
542
|
+
* text. We do NOT throw; absent field names in tree-sitter happen
|
|
543
|
+
* frequently on partial trees and broken syntax.
|
|
544
|
+
*/
|
|
545
|
+
function nodeFieldText(
|
|
546
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
547
|
+
node, field) {
|
|
548
|
+
if (typeof node.childForFieldName !== 'function')
|
|
549
|
+
return null;
|
|
550
|
+
const child = node.childForFieldName(field);
|
|
551
|
+
if (!child)
|
|
552
|
+
return null;
|
|
553
|
+
const txt = child.text;
|
|
554
|
+
return typeof txt === 'string' && txt.length > 0 ? txt : null;
|
|
555
|
+
}
|
|
556
|
+
/**
|
|
557
|
+
* Cap signature snippets to the first line, trimmed to 120 chars.
|
|
558
|
+
* The FTS5 column does not need full bodies; readers ask for the
|
|
559
|
+
* file and line instead. Keeps storage flat.
|
|
560
|
+
*/
|
|
561
|
+
function trimSignature(
|
|
562
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
563
|
+
defNode) {
|
|
564
|
+
const text = defNode.text;
|
|
565
|
+
if (typeof text !== 'string' || text.length === 0)
|
|
566
|
+
return null;
|
|
567
|
+
const firstLine = text.split(/\r?\n/, 1)[0]?.trim() ?? '';
|
|
568
|
+
if (firstLine.length === 0)
|
|
569
|
+
return null;
|
|
570
|
+
if (firstLine.length <= 120)
|
|
571
|
+
return firstLine;
|
|
572
|
+
return `${firstLine.slice(0, 117)}...`;
|
|
573
|
+
}
|
|
574
|
+
function countLines(s) {
|
|
575
|
+
let n = 1;
|
|
576
|
+
for (let i = 0; i < s.length; i += 1) {
|
|
577
|
+
if (s.charCodeAt(i) === 10)
|
|
578
|
+
n += 1;
|
|
579
|
+
}
|
|
580
|
+
return n;
|
|
581
|
+
}
|
|
582
|
+
function isSymbolKind(s) {
|
|
583
|
+
return (s === 'function' ||
|
|
584
|
+
s === 'class' ||
|
|
585
|
+
s === 'method' ||
|
|
586
|
+
s === 'interface' ||
|
|
587
|
+
s === 'type' ||
|
|
588
|
+
s === 'variable' ||
|
|
589
|
+
s === 'import');
|
|
590
|
+
}
|
|
591
|
+
function isEdgeKind(s) {
|
|
592
|
+
return (s === 'calls' ||
|
|
593
|
+
s === 'extends' ||
|
|
594
|
+
s === 'implements' ||
|
|
595
|
+
s === 'imports' ||
|
|
596
|
+
s === 'references');
|
|
597
|
+
}
|
|
71
598
|
//# sourceMappingURL=parser.js.map
|