@optave/codegraph 3.9.5 → 3.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -16
- package/dist/ast-analysis/engine.d.ts.map +1 -1
- package/dist/ast-analysis/engine.js +4 -3
- package/dist/ast-analysis/engine.js.map +1 -1
- package/dist/ast-analysis/rules/csharp.d.ts.map +1 -1
- package/dist/ast-analysis/rules/csharp.js +8 -1
- package/dist/ast-analysis/rules/csharp.js.map +1 -1
- package/dist/ast-analysis/rules/go.d.ts.map +1 -1
- package/dist/ast-analysis/rules/go.js +4 -1
- package/dist/ast-analysis/rules/go.js.map +1 -1
- package/dist/ast-analysis/rules/index.d.ts +6 -0
- package/dist/ast-analysis/rules/index.d.ts.map +1 -1
- package/dist/ast-analysis/rules/index.js +151 -4
- package/dist/ast-analysis/rules/index.js.map +1 -1
- package/dist/ast-analysis/rules/java.d.ts.map +1 -1
- package/dist/ast-analysis/rules/java.js +5 -1
- package/dist/ast-analysis/rules/java.js.map +1 -1
- package/dist/ast-analysis/rules/php.d.ts.map +1 -1
- package/dist/ast-analysis/rules/php.js +6 -1
- package/dist/ast-analysis/rules/php.js.map +1 -1
- package/dist/ast-analysis/rules/python.d.ts.map +1 -1
- package/dist/ast-analysis/rules/python.js +5 -1
- package/dist/ast-analysis/rules/python.js.map +1 -1
- package/dist/ast-analysis/rules/ruby.d.ts.map +1 -1
- package/dist/ast-analysis/rules/ruby.js +4 -1
- package/dist/ast-analysis/rules/ruby.js.map +1 -1
- package/dist/ast-analysis/rules/rust.d.ts.map +1 -1
- package/dist/ast-analysis/rules/rust.js +5 -1
- package/dist/ast-analysis/rules/rust.js.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts +2 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.js +171 -37
- package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
- package/dist/domain/graph/builder/context.d.ts +10 -0
- package/dist/domain/graph/builder/context.d.ts.map +1 -1
- package/dist/domain/graph/builder/context.js +10 -0
- package/dist/domain/graph/builder/context.js.map +1 -1
- package/dist/domain/graph/builder/helpers.d.ts +7 -2
- package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
- package/dist/domain/graph/builder/helpers.js +7 -2
- package/dist/domain/graph/builder/helpers.js.map +1 -1
- package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
- package/dist/domain/graph/builder/pipeline.js +210 -34
- package/dist/domain/graph/builder/pipeline.js.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.js +8 -0
- package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.d.ts +24 -0
- package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.js +117 -3
- package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.js +9 -6
- package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts +30 -0
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.js +36 -13
- package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
- package/dist/domain/parser.d.ts +54 -1
- package/dist/domain/parser.d.ts.map +1 -1
- package/dist/domain/parser.js +181 -10
- package/dist/domain/parser.js.map +1 -1
- package/dist/domain/search/models.js +2 -2
- package/dist/domain/wasm-worker-entry.js +15 -14
- package/dist/domain/wasm-worker-entry.js.map +1 -1
- package/dist/features/ast.d.ts.map +1 -1
- package/dist/features/ast.js +11 -9
- package/dist/features/ast.js.map +1 -1
- package/dist/infrastructure/config.d.ts +1 -0
- package/dist/infrastructure/config.d.ts.map +1 -1
- package/dist/infrastructure/config.js +1 -0
- package/dist/infrastructure/config.js.map +1 -1
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/mcp/server.js +14 -8
- package/dist/mcp/server.js.map +1 -1
- package/dist/mcp/tool-registry.d.ts +1 -1
- package/dist/mcp/tool-registry.d.ts.map +1 -1
- package/dist/mcp/tool-registry.js +19 -5
- package/dist/mcp/tool-registry.js.map +1 -1
- package/dist/types.d.ts +1 -0
- package/dist/types.d.ts.map +1 -1
- package/grammars/tree-sitter-erlang.wasm +0 -0
- package/package.json +8 -7
- package/src/ast-analysis/engine.ts +14 -2
- package/src/ast-analysis/rules/csharp.ts +8 -1
- package/src/ast-analysis/rules/go.ts +4 -1
- package/src/ast-analysis/rules/index.ts +181 -4
- package/src/ast-analysis/rules/java.ts +5 -1
- package/src/ast-analysis/rules/php.ts +6 -1
- package/src/ast-analysis/rules/python.ts +5 -1
- package/src/ast-analysis/rules/ruby.ts +4 -1
- package/src/ast-analysis/rules/rust.ts +5 -1
- package/src/ast-analysis/visitors/ast-store-visitor.ts +165 -34
- package/src/domain/graph/builder/context.ts +10 -0
- package/src/domain/graph/builder/helpers.ts +8 -3
- package/src/domain/graph/builder/pipeline.ts +234 -36
- package/src/domain/graph/builder/stages/collect-files.ts +9 -0
- package/src/domain/graph/builder/stages/detect-changes.ts +130 -4
- package/src/domain/graph/builder/stages/finalize.ts +9 -6
- package/src/domain/graph/builder/stages/insert-nodes.ts +38 -14
- package/src/domain/parser.ts +205 -9
- package/src/domain/search/models.ts +2 -2
- package/src/domain/wasm-worker-entry.ts +23 -13
- package/src/features/ast.ts +22 -9
- package/src/infrastructure/config.ts +1 -0
- package/src/mcp/server.ts +16 -9
- package/src/mcp/tool-registry.ts +23 -5
- package/src/types.ts +1 -0
|
@@ -12,10 +12,12 @@ import path from 'node:path';
|
|
|
12
12
|
import { performance } from 'node:perf_hooks';
|
|
13
13
|
import { bulkNodeIdsByFile } from '../../../../db/index.js';
|
|
14
14
|
import { debug } from '../../../../infrastructure/logger.js';
|
|
15
|
+
import { normalizePath } from '../../../../shared/constants.js';
|
|
15
16
|
import { toErrorMessage } from '../../../../shared/errors.js';
|
|
16
17
|
import type {
|
|
17
18
|
BetterSqlite3Database,
|
|
18
19
|
ExtractorOutput,
|
|
20
|
+
FileToParse,
|
|
19
21
|
MetadataUpdate,
|
|
20
22
|
SqliteStatement,
|
|
21
23
|
} from '../../../../types.js';
|
|
@@ -90,16 +92,30 @@ function marshalSymbolBatches(allSymbols: Map<string, ExtractorOutput>): InsertN
|
|
|
90
92
|
return batches;
|
|
91
93
|
}
|
|
92
94
|
|
|
93
|
-
/**
|
|
94
|
-
|
|
95
|
-
|
|
95
|
+
/**
|
|
96
|
+
* Build file hash entries for every collected file, including those that
|
|
97
|
+
* produced zero symbols (empty files, parsers that silently no-op'd, or
|
|
98
|
+
* optional-language extensions whose grammar wasn't installed). Iterating the
|
|
99
|
+
* symbol map instead would skip such files and leave them missing from
|
|
100
|
+
* `file_hashes`, which permanently breaks the JS-side fast-skip pre-flight on
|
|
101
|
+
* any subsequent no-op rebuild (#1068).
|
|
102
|
+
*
|
|
103
|
+
* Exported for unit testing.
|
|
104
|
+
*/
|
|
105
|
+
export function buildFileHashes(
|
|
106
|
+
filesToParse: FileToParse[],
|
|
96
107
|
precomputedData: Map<string, PrecomputedFileData>,
|
|
97
108
|
metadataUpdates: MetadataUpdate[],
|
|
98
109
|
rootDir: string,
|
|
99
110
|
): Array<{ file: string; hash: string; mtime: number; size: number }> {
|
|
100
111
|
const fileHashes: Array<{ file: string; hash: string; mtime: number; size: number }> = [];
|
|
112
|
+
const seen = new Set<string>();
|
|
113
|
+
|
|
114
|
+
for (const item of filesToParse) {
|
|
115
|
+
const relPath = item.relPath ?? normalizePath(path.relative(rootDir, item.file));
|
|
116
|
+
if (seen.has(relPath)) continue;
|
|
117
|
+
seen.add(relPath);
|
|
101
118
|
|
|
102
|
-
for (const [relPath] of allSymbols) {
|
|
103
119
|
const precomputed = precomputedData.get(relPath);
|
|
104
120
|
if (precomputed?._reverseDepOnly) {
|
|
105
121
|
continue; // file unchanged, hash already correct
|
|
@@ -112,7 +128,7 @@ function buildFileHashes(
|
|
|
112
128
|
size = precomputed.stat.size;
|
|
113
129
|
} else {
|
|
114
130
|
const rawStat = fileStat(path.join(rootDir, relPath));
|
|
115
|
-
mtime = rawStat ?
|
|
131
|
+
mtime = rawStat ? rawStat.mtime : 0;
|
|
116
132
|
size = rawStat ? rawStat.size : 0;
|
|
117
133
|
}
|
|
118
134
|
fileHashes.push({ file: relPath, hash: precomputed.hash, mtime, size });
|
|
@@ -127,7 +143,7 @@ function buildFileHashes(
|
|
|
127
143
|
}
|
|
128
144
|
if (code !== null) {
|
|
129
145
|
const stat = fileStat(absPath);
|
|
130
|
-
const mtime = stat ?
|
|
146
|
+
const mtime = stat ? stat.mtime : 0;
|
|
131
147
|
const size = stat ? stat.size : 0;
|
|
132
148
|
fileHashes.push({ file: relPath, hash: fileHash(code), mtime, size });
|
|
133
149
|
}
|
|
@@ -136,7 +152,7 @@ function buildFileHashes(
|
|
|
136
152
|
|
|
137
153
|
// Also include metadata-only updates (self-heal mtime/size without re-parse)
|
|
138
154
|
for (const item of metadataUpdates) {
|
|
139
|
-
const mtime = item.stat ?
|
|
155
|
+
const mtime = item.stat ? item.stat.mtime : 0;
|
|
140
156
|
const size = item.stat ? item.stat.size : 0;
|
|
141
157
|
fileHashes.push({ file: item.relPath, hash: item.hash, mtime, size });
|
|
142
158
|
}
|
|
@@ -157,7 +173,7 @@ function tryNativeInsert(ctx: PipelineContext): boolean {
|
|
|
157
173
|
for (const item of filesToParse) {
|
|
158
174
|
if (item.relPath) precomputedData.set(item.relPath, item as PrecomputedFileData);
|
|
159
175
|
}
|
|
160
|
-
const fileHashes = buildFileHashes(
|
|
176
|
+
const fileHashes = buildFileHashes(filesToParse, precomputedData, metadataUpdates, rootDir);
|
|
161
177
|
|
|
162
178
|
// In native-first mode (single rusqlite connection), no WAL dance is needed.
|
|
163
179
|
// In dual-connection mode, checkpoint JS side before native write, then
|
|
@@ -321,7 +337,7 @@ function insertChildrenAndEdges(
|
|
|
321
337
|
|
|
322
338
|
function updateFileHashes(
|
|
323
339
|
_db: BetterSqlite3Database,
|
|
324
|
-
|
|
340
|
+
filesToParse: FileToParse[],
|
|
325
341
|
precomputedData: Map<string, PrecomputedFileData>,
|
|
326
342
|
metadataUpdates: MetadataUpdate[],
|
|
327
343
|
rootDir: string,
|
|
@@ -329,7 +345,15 @@ function updateFileHashes(
|
|
|
329
345
|
): void {
|
|
330
346
|
if (!upsertHash) return;
|
|
331
347
|
|
|
332
|
-
|
|
348
|
+
// Iterate every collected file (#1068): files that produced zero symbols
|
|
349
|
+
// (empty, parser no-op, or grammar-missing optional language) still need a
|
|
350
|
+
// hash row, otherwise the next no-op rebuild's fast-skip pre-flight rejects.
|
|
351
|
+
const seen = new Set<string>();
|
|
352
|
+
for (const item of filesToParse) {
|
|
353
|
+
const relPath = item.relPath ?? normalizePath(path.relative(rootDir, item.file));
|
|
354
|
+
if (seen.has(relPath)) continue;
|
|
355
|
+
seen.add(relPath);
|
|
356
|
+
|
|
333
357
|
const precomputed = precomputedData.get(relPath);
|
|
334
358
|
if (precomputed?._reverseDepOnly) {
|
|
335
359
|
// no-op: file unchanged, hash already correct
|
|
@@ -341,7 +365,7 @@ function updateFileHashes(
|
|
|
341
365
|
size = precomputed.stat.size;
|
|
342
366
|
} else {
|
|
343
367
|
const rawStat = fileStat(path.join(rootDir, relPath));
|
|
344
|
-
mtime = rawStat ?
|
|
368
|
+
mtime = rawStat ? rawStat.mtime : 0;
|
|
345
369
|
size = rawStat ? rawStat.size : 0;
|
|
346
370
|
}
|
|
347
371
|
upsertHash.run(relPath, precomputed.hash, mtime, size);
|
|
@@ -356,7 +380,7 @@ function updateFileHashes(
|
|
|
356
380
|
}
|
|
357
381
|
if (code !== null) {
|
|
358
382
|
const stat = fileStat(absPath);
|
|
359
|
-
const mtime = stat ?
|
|
383
|
+
const mtime = stat ? stat.mtime : 0;
|
|
360
384
|
const size = stat ? stat.size : 0;
|
|
361
385
|
upsertHash.run(relPath, fileHash(code), mtime, size);
|
|
362
386
|
}
|
|
@@ -365,7 +389,7 @@ function updateFileHashes(
|
|
|
365
389
|
|
|
366
390
|
// Also update metadata-only entries (self-heal mtime/size without re-parse)
|
|
367
391
|
for (const item of metadataUpdates) {
|
|
368
|
-
const mtime = item.stat ?
|
|
392
|
+
const mtime = item.stat ? item.stat.mtime : 0;
|
|
369
393
|
const size = item.stat ? item.stat.size : 0;
|
|
370
394
|
upsertHash.run(item.relPath, item.hash, mtime, size);
|
|
371
395
|
}
|
|
@@ -415,7 +439,7 @@ export async function insertNodes(ctx: PipelineContext): Promise<void> {
|
|
|
415
439
|
const insertAll = ctx.db.transaction(() => {
|
|
416
440
|
insertDefinitionsAndExports(ctx.db, allSymbols);
|
|
417
441
|
insertChildrenAndEdges(ctx.db, allSymbols);
|
|
418
|
-
updateFileHashes(ctx.db,
|
|
442
|
+
updateFileHashes(ctx.db, filesToParse, precomputedData, metadataUpdates, rootDir, upsertHash);
|
|
419
443
|
});
|
|
420
444
|
|
|
421
445
|
insertAll();
|
package/src/domain/parser.ts
CHANGED
|
@@ -316,16 +316,23 @@ export function getParser(parsers: Map<string, Parser | null>, filePath: string)
|
|
|
316
316
|
*
|
|
317
317
|
* Name is preserved for caller compatibility; the function now ensures
|
|
318
318
|
* *analysis data* rather than *trees*.
|
|
319
|
+
*
|
|
320
|
+
* `needsFn` (optional): when provided, only files for which it returns true are
|
|
321
|
+
* re-parsed. Without it the function falls back to "any WASM-parseable file
|
|
322
|
+
* without _tree", which was the source of #1036 — a single file missing one
|
|
323
|
+
* analysis triggered a full-build re-parse of every WASM-parseable file.
|
|
319
324
|
*/
|
|
320
325
|
export async function ensureWasmTrees(
|
|
321
326
|
fileSymbols: Map<string, any>,
|
|
322
327
|
rootDir: string,
|
|
328
|
+
needsFn?: (relPath: string, symbols: any) => boolean,
|
|
323
329
|
): Promise<void> {
|
|
324
330
|
// Collect files that still need analysis data and are parseable by WASM.
|
|
325
331
|
const pending: Array<{ relPath: string; absPath: string; symbols: any }> = [];
|
|
326
332
|
for (const [relPath, symbols] of fileSymbols) {
|
|
327
333
|
if (symbols._tree) continue; // legacy path — leave existing trees alone
|
|
328
334
|
if (!_extToLang.has(path.extname(relPath).toLowerCase())) continue;
|
|
335
|
+
if (needsFn && !needsFn(relPath, symbols)) continue;
|
|
329
336
|
pending.push({ relPath, absPath: path.join(rootDir, relPath), symbols });
|
|
330
337
|
}
|
|
331
338
|
if (pending.length === 0) return;
|
|
@@ -412,6 +419,128 @@ export function getInstalledWasmExtensions(): Set<string> {
|
|
|
412
419
|
return exts;
|
|
413
420
|
}
|
|
414
421
|
|
|
422
|
+
/**
|
|
423
|
+
* Lowercase file extensions covered by the native Rust addon.
|
|
424
|
+
*
|
|
425
|
+
* Mirrors `LanguageKind::from_extension` in
|
|
426
|
+
* `crates/codegraph-core/src/parser_registry.rs`. Used to classify why the
|
|
427
|
+
* native orchestrator dropped a file: extensions outside this set are a
|
|
428
|
+
* legitimate parser limit (no Rust extractor exists), while extensions inside
|
|
429
|
+
* it indicate a real native bug (parse/read/extract failure).
|
|
430
|
+
*
|
|
431
|
+
* Keep this list in sync with the Rust enum — the native addon is a separate
|
|
432
|
+
* npm package, so JS has no runtime way to discover its language coverage.
|
|
433
|
+
*/
|
|
434
|
+
export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet<string> = new Set([
|
|
435
|
+
'.js',
|
|
436
|
+
'.jsx',
|
|
437
|
+
'.mjs',
|
|
438
|
+
'.cjs',
|
|
439
|
+
'.ts',
|
|
440
|
+
'.tsx',
|
|
441
|
+
'.py',
|
|
442
|
+
'.pyi',
|
|
443
|
+
'.tf',
|
|
444
|
+
'.hcl',
|
|
445
|
+
'.go',
|
|
446
|
+
'.rs',
|
|
447
|
+
'.java',
|
|
448
|
+
'.cs',
|
|
449
|
+
'.rb',
|
|
450
|
+
'.rake',
|
|
451
|
+
'.gemspec',
|
|
452
|
+
'.php',
|
|
453
|
+
'.phtml',
|
|
454
|
+
'.c',
|
|
455
|
+
'.h',
|
|
456
|
+
'.cpp',
|
|
457
|
+
'.cc',
|
|
458
|
+
'.cxx',
|
|
459
|
+
'.hpp',
|
|
460
|
+
'.kt',
|
|
461
|
+
'.kts',
|
|
462
|
+
'.swift',
|
|
463
|
+
'.scala',
|
|
464
|
+
'.sh',
|
|
465
|
+
'.bash',
|
|
466
|
+
'.ex',
|
|
467
|
+
'.exs',
|
|
468
|
+
'.lua',
|
|
469
|
+
'.dart',
|
|
470
|
+
'.zig',
|
|
471
|
+
'.hs',
|
|
472
|
+
'.ml',
|
|
473
|
+
'.mli',
|
|
474
|
+
]);
|
|
475
|
+
|
|
476
|
+
/**
|
|
477
|
+
* Classification for a file the native orchestrator dropped.
|
|
478
|
+
* - `unsupported-by-native`: extension has no Rust extractor (legitimate parser limit).
|
|
479
|
+
* - `native-extractor-failure`: extension is supported by native but the file was
|
|
480
|
+
* still dropped — points at a real bug (read error, parse failure, extractor crash).
|
|
481
|
+
*/
|
|
482
|
+
export type NativeDropReason = 'unsupported-by-native' | 'native-extractor-failure';
|
|
483
|
+
|
|
484
|
+
export interface NativeDropClassification {
|
|
485
|
+
/** Per-reason → per-extension → list of relative paths that hit that bucket. */
|
|
486
|
+
byReason: Record<NativeDropReason, Map<string, string[]>>;
|
|
487
|
+
/** Total file count per reason. */
|
|
488
|
+
totals: Record<NativeDropReason, number>;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* Group the missing files (relative paths) by drop reason and extension so the
|
|
493
|
+
* caller can log per-extension counts and a sample path. Pure function — no
|
|
494
|
+
* I/O, safe to unit-test independently of the build pipeline.
|
|
495
|
+
*/
|
|
496
|
+
export function classifyNativeDrops(relPaths: Iterable<string>): NativeDropClassification {
|
|
497
|
+
const byReason: Record<NativeDropReason, Map<string, string[]>> = {
|
|
498
|
+
'unsupported-by-native': new Map(),
|
|
499
|
+
'native-extractor-failure': new Map(),
|
|
500
|
+
};
|
|
501
|
+
const totals: Record<NativeDropReason, number> = {
|
|
502
|
+
'unsupported-by-native': 0,
|
|
503
|
+
'native-extractor-failure': 0,
|
|
504
|
+
};
|
|
505
|
+
for (const rel of relPaths) {
|
|
506
|
+
const ext = path.extname(rel).toLowerCase();
|
|
507
|
+
const reason: NativeDropReason = NATIVE_SUPPORTED_EXTENSIONS.has(ext)
|
|
508
|
+
? 'native-extractor-failure'
|
|
509
|
+
: 'unsupported-by-native';
|
|
510
|
+
const bucket = byReason[reason];
|
|
511
|
+
let list = bucket.get(ext);
|
|
512
|
+
if (!list) {
|
|
513
|
+
list = [];
|
|
514
|
+
bucket.set(ext, list);
|
|
515
|
+
}
|
|
516
|
+
list.push(rel);
|
|
517
|
+
totals[reason]++;
|
|
518
|
+
}
|
|
519
|
+
return { byReason, totals };
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
/**
|
|
523
|
+
* Render `{ ext → paths[] }` as `ext (n: sample.ext, ...)` slices for log lines.
|
|
524
|
+
* Caps at 3 sample paths per extension and 6 extensions total to keep warnings
|
|
525
|
+
* readable when many languages are dropped at once. Extensions are sorted by
|
|
526
|
+
* descending file count so the loudest offender shows up first; ties keep
|
|
527
|
+
* insertion order. Pure function — safe to unit-test independently.
|
|
528
|
+
*/
|
|
529
|
+
export function formatDropExtensionSummary(buckets: Map<string, string[]>): string {
|
|
530
|
+
const MAX_EXTS = 6;
|
|
531
|
+
const MAX_SAMPLES = 3;
|
|
532
|
+
const entries = Array.from(buckets.entries()).sort((a, b) => b[1].length - a[1].length);
|
|
533
|
+
const shown = entries.slice(0, MAX_EXTS).map(([ext, paths]) => {
|
|
534
|
+
const sample = paths.slice(0, MAX_SAMPLES).join(', ');
|
|
535
|
+
const more = paths.length > MAX_SAMPLES ? `, +${paths.length - MAX_SAMPLES} more` : '';
|
|
536
|
+
return `${ext} (${paths.length}: ${sample}${more})`;
|
|
537
|
+
});
|
|
538
|
+
if (entries.length > MAX_EXTS) {
|
|
539
|
+
shown.push(`+${entries.length - MAX_EXTS} more extension(s)`);
|
|
540
|
+
}
|
|
541
|
+
return shown.join('; ');
|
|
542
|
+
}
|
|
543
|
+
|
|
415
544
|
// ── Unified API ──────────────────────────────────────────────────────────────
|
|
416
545
|
|
|
417
546
|
function resolveEngine(opts: ParseEngineOpts = {}): ResolvedEngine {
|
|
@@ -938,6 +1067,71 @@ async function parseFilesWasm(
|
|
|
938
1067
|
return result;
|
|
939
1068
|
}
|
|
940
1069
|
|
|
1070
|
+
/**
|
|
1071
|
+
* Files at or below this count use the inline parse path (no worker spawn).
|
|
1072
|
+
*
|
|
1073
|
+
* Sized for typical engine-parity drops: a handful of fixture files in one
|
|
1074
|
+
* or two languages (the recurring HCL case is 4 files). Above this, the
|
|
1075
|
+
* worker-pool's IPC + crash-isolation cost (#965) is amortized over enough
|
|
1076
|
+
* parse work to be worth paying; below it, the ~1–2s cold-start dominates.
|
|
1077
|
+
*/
|
|
1078
|
+
const INLINE_BACKFILL_THRESHOLD = 16;
|
|
1079
|
+
|
|
1080
|
+
/**
|
|
1081
|
+
* Inline WASM parse (no worker) for small file batches.
|
|
1082
|
+
*
|
|
1083
|
+
* Used by the engine-parity backfill path when the native engine drops a
|
|
1084
|
+
* handful of files (typically test fixtures). The worker pool's per-call
|
|
1085
|
+
* IPC + grammar-init overhead can cost 1–2s on slow CI runners — for a
|
|
1086
|
+
* 4-file backfill, that dwarfs the ~10ms of actual parse work.
|
|
1087
|
+
*
|
|
1088
|
+
* Returns symbols with `_tree` set so `runAnalyses` can run AST/CFG/dataflow
|
|
1089
|
+
* visitors via the unified walker (mirrors how WASM-engine results behaved
|
|
1090
|
+
* before the worker pool was introduced).
|
|
1091
|
+
*/
|
|
1092
|
+
async function parseFilesWasmInline(
|
|
1093
|
+
filePaths: string[],
|
|
1094
|
+
rootDir: string,
|
|
1095
|
+
): Promise<Map<string, ExtractorOutput>> {
|
|
1096
|
+
const result = new Map<string, ExtractorOutput>();
|
|
1097
|
+
if (filePaths.length === 0) return result;
|
|
1098
|
+
const parsers = await ensureParsersForFiles(filePaths);
|
|
1099
|
+
for (const filePath of filePaths) {
|
|
1100
|
+
if (!_extToLang.has(path.extname(filePath).toLowerCase())) continue;
|
|
1101
|
+
let code: string;
|
|
1102
|
+
try {
|
|
1103
|
+
code = fs.readFileSync(filePath, 'utf-8');
|
|
1104
|
+
} catch (err: unknown) {
|
|
1105
|
+
warn(`Skipping ${path.relative(rootDir, filePath)}: ${(err as Error).message}`);
|
|
1106
|
+
continue;
|
|
1107
|
+
}
|
|
1108
|
+
const extracted = wasmExtractSymbols(parsers, filePath, code);
|
|
1109
|
+
if (!extracted) continue;
|
|
1110
|
+
const relPath = path.relative(rootDir, filePath).split(path.sep).join('/');
|
|
1111
|
+
const symbols = extracted.symbols as ExtractorOutput & { _tree?: unknown; _langId?: string };
|
|
1112
|
+
symbols._tree = extracted.tree;
|
|
1113
|
+
symbols._langId = extracted.langId;
|
|
1114
|
+
result.set(relPath, symbols);
|
|
1115
|
+
}
|
|
1116
|
+
return result;
|
|
1117
|
+
}
|
|
1118
|
+
|
|
1119
|
+
/**
|
|
1120
|
+
* Backfill helper: small batches use the inline (main-thread) path; larger
|
|
1121
|
+
* batches keep the worker-pool isolation against tree-sitter WASM crashes
|
|
1122
|
+
* (#965). Threshold matches typical engine-parity drop sizes (a few fixture
|
|
1123
|
+
* files in one or two languages).
|
|
1124
|
+
*/
|
|
1125
|
+
export async function parseFilesWasmForBackfill(
|
|
1126
|
+
filePaths: string[],
|
|
1127
|
+
rootDir: string,
|
|
1128
|
+
): Promise<Map<string, ExtractorOutput>> {
|
|
1129
|
+
if (filePaths.length <= INLINE_BACKFILL_THRESHOLD) {
|
|
1130
|
+
return parseFilesWasmInline(filePaths, rootDir);
|
|
1131
|
+
}
|
|
1132
|
+
return parseFilesWasm(filePaths, rootDir);
|
|
1133
|
+
}
|
|
1134
|
+
|
|
941
1135
|
/**
|
|
942
1136
|
* Parse multiple files in bulk and return a Map<relPath, symbols>.
|
|
943
1137
|
*/
|
|
@@ -988,7 +1182,7 @@ export async function parseFilesAuto(
|
|
|
988
1182
|
);
|
|
989
1183
|
if (dropped.length > 0) {
|
|
990
1184
|
warn(`Native engine dropped ${dropped.length} file(s); falling back to WASM for parity`);
|
|
991
|
-
const wasmResults = await
|
|
1185
|
+
const wasmResults = await parseFilesWasmForBackfill(dropped, rootDir);
|
|
992
1186
|
for (const [relPath, symbols] of wasmResults) {
|
|
993
1187
|
result.set(relPath, symbols);
|
|
994
1188
|
}
|
|
@@ -1003,15 +1197,17 @@ export async function parseFilesAuto(
|
|
|
1003
1197
|
export function getActiveEngine(opts: ParseEngineOpts = {}): {
|
|
1004
1198
|
name: 'native' | 'wasm';
|
|
1005
1199
|
version: string | null;
|
|
1200
|
+
binaryVersion: string | null;
|
|
1006
1201
|
} {
|
|
1007
1202
|
const { name, native } = resolveEngine(opts);
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
//
|
|
1014
|
-
//
|
|
1203
|
+
const binaryVersion: string | null =
|
|
1204
|
+
native && typeof native.engineVersion === 'function' ? native.engineVersion() : null;
|
|
1205
|
+
// The display version prefers the platform package.json so the "Using native
|
|
1206
|
+
// engine (vX)" log matches the npm release the user installed. The Rust
|
|
1207
|
+
// orchestrator's check_version_mismatch compares against CARGO_PKG_VERSION
|
|
1208
|
+
// (the binary's own value), so build_meta writes must use `binaryVersion`,
|
|
1209
|
+
// not this display value — see pipeline.ts and finalize.ts (#1066).
|
|
1210
|
+
let version: string | null = binaryVersion;
|
|
1015
1211
|
if (native) {
|
|
1016
1212
|
try {
|
|
1017
1213
|
version = getNativePackageVersion() ?? version;
|
|
@@ -1019,7 +1215,7 @@ export function getActiveEngine(opts: ParseEngineOpts = {}): {
|
|
|
1019
1215
|
debug(`getNativePackageVersion failed: ${(e as Error).message}`);
|
|
1020
1216
|
}
|
|
1021
1217
|
}
|
|
1022
|
-
return { name, version };
|
|
1218
|
+
return { name, version, binaryVersion };
|
|
1023
1219
|
}
|
|
1024
1220
|
|
|
1025
1221
|
/**
|
|
@@ -42,7 +42,7 @@ export const MODELS: Record<string, ModelConfig> = {
|
|
|
42
42
|
quantized: false,
|
|
43
43
|
},
|
|
44
44
|
'jina-code': {
|
|
45
|
-
name: '
|
|
45
|
+
name: 'jinaai/jina-embeddings-v2-base-code',
|
|
46
46
|
dim: 768,
|
|
47
47
|
contextWindow: 8192,
|
|
48
48
|
desc: 'Code-aware (~137MB). Trained on code+text, best for code search.',
|
|
@@ -253,7 +253,7 @@ export async function embed(
|
|
|
253
253
|
}
|
|
254
254
|
|
|
255
255
|
if (texts.length > batchSize) {
|
|
256
|
-
process.
|
|
256
|
+
process.stderr.write(` Embedded ${Math.min(i + batchSize, texts.length)}/${texts.length}\r`);
|
|
257
257
|
}
|
|
258
258
|
}
|
|
259
259
|
|
|
@@ -28,7 +28,9 @@ import type { Tree } from 'web-tree-sitter';
|
|
|
28
28
|
import { Language, Parser, Query } from 'web-tree-sitter';
|
|
29
29
|
import { computeLOCMetrics, computeMaintainabilityIndex } from '../ast-analysis/metrics.js';
|
|
30
30
|
import {
|
|
31
|
+
AST_STRING_CONFIGS,
|
|
31
32
|
AST_TYPE_MAPS,
|
|
33
|
+
astStopRecurseKinds,
|
|
32
34
|
CFG_RULES,
|
|
33
35
|
COMPLEXITY_RULES,
|
|
34
36
|
DATAFLOW_RULES,
|
|
@@ -584,7 +586,15 @@ function setupVisitorsLocal(
|
|
|
584
586
|
if (opts.ast) {
|
|
585
587
|
const astTypeMap = AST_TYPE_MAPS.get(langId);
|
|
586
588
|
if (astTypeMap) {
|
|
587
|
-
|
|
589
|
+
const stringConfig = AST_STRING_CONFIGS.get(langId);
|
|
590
|
+
astVisitor = createAstStoreVisitor(
|
|
591
|
+
astTypeMap,
|
|
592
|
+
defs,
|
|
593
|
+
relPath,
|
|
594
|
+
new Map<string, number>(),
|
|
595
|
+
stringConfig,
|
|
596
|
+
astStopRecurseKinds(langId),
|
|
597
|
+
);
|
|
588
598
|
visitors.push(astVisitor);
|
|
589
599
|
}
|
|
590
600
|
}
|
|
@@ -698,18 +708,18 @@ async function handleParse(msg: WorkerParseRequest): Promise<SerializedExtractor
|
|
|
698
708
|
file?: string;
|
|
699
709
|
parentNodeId?: number | null;
|
|
700
710
|
}>;
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
}
|
|
711
|
+
// Always set an array (even empty) — leaving astNodes undefined makes
|
|
712
|
+
// engine.ts::fileNeedsWasmTree treat the file as un-walked and trigger
|
|
713
|
+
// a full ensureWasmTrees re-parse of every WASM-parseable file (#1036).
|
|
714
|
+
// Strip `file` and `parentNodeId` — main thread re-resolves both in
|
|
715
|
+
// features/ast.ts::collectFileAstRows.
|
|
716
|
+
serializedAstNodes = astRows.map((n) => ({
|
|
717
|
+
line: n.line,
|
|
718
|
+
kind: n.kind,
|
|
719
|
+
name: n.name ?? '',
|
|
720
|
+
text: n.text ?? undefined,
|
|
721
|
+
receiver: n.receiver ?? undefined,
|
|
722
|
+
}));
|
|
713
723
|
}
|
|
714
724
|
|
|
715
725
|
if (complexityVisitor) storeComplexityResults(results, defs, entry.id);
|
package/src/features/ast.ts
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
|
-
import {
|
|
2
|
+
import {
|
|
3
|
+
AST_STRING_CONFIGS,
|
|
4
|
+
AST_TYPE_MAPS,
|
|
5
|
+
astStopRecurseKinds,
|
|
6
|
+
} from '../ast-analysis/rules/index.js';
|
|
3
7
|
import { buildExtensionSet } from '../ast-analysis/shared.js';
|
|
4
8
|
import { walkWithVisitors } from '../ast-analysis/visitor.js';
|
|
5
9
|
import { createAstStoreVisitor } from '../ast-analysis/visitors/ast-store-visitor.js';
|
|
@@ -22,8 +26,6 @@ const KIND_ICONS: Record<string, string> = {
|
|
|
22
26
|
await: '\u22B3', // ⊳
|
|
23
27
|
};
|
|
24
28
|
|
|
25
|
-
const JS_TS_AST_TYPES = AST_TYPE_MAPS.get('javascript');
|
|
26
|
-
|
|
27
29
|
const WALK_EXTENSIONS = buildExtensionSet(AST_TYPE_MAPS);
|
|
28
30
|
|
|
29
31
|
// ─── Helpers ──────────────────────────────────────────────────────────
|
|
@@ -171,9 +173,10 @@ function collectFileAstRows(
|
|
|
171
173
|
|
|
172
174
|
// WASM fallback — walk tree if available
|
|
173
175
|
const ext = path.extname(relPath).toLowerCase();
|
|
174
|
-
|
|
176
|
+
const langId = symbols._langId || '';
|
|
177
|
+
if ((WALK_EXTENSIONS.has(ext) || AST_TYPE_MAPS.has(langId)) && symbols._tree) {
|
|
175
178
|
const rows: AstRow[] = [];
|
|
176
|
-
walkAst(symbols._tree.rootNode, defs, relPath, rows, nodeIdMap);
|
|
179
|
+
walkAst(symbols._tree.rootNode, defs, relPath, rows, nodeIdMap, langId);
|
|
177
180
|
return rows;
|
|
178
181
|
}
|
|
179
182
|
|
|
@@ -226,13 +229,23 @@ function walkAst(
|
|
|
226
229
|
relPath: string,
|
|
227
230
|
rows: AstRow[],
|
|
228
231
|
nodeIdMap: Map<string, number>,
|
|
232
|
+
langId: string,
|
|
229
233
|
): void {
|
|
230
|
-
|
|
231
|
-
|
|
234
|
+
const astTypeMap = AST_TYPE_MAPS.get(langId);
|
|
235
|
+
if (!astTypeMap) {
|
|
236
|
+
debug(`ast-store: no astTypes for langId=${langId} — skipping walk`);
|
|
232
237
|
return;
|
|
233
238
|
}
|
|
234
|
-
const
|
|
235
|
-
const
|
|
239
|
+
const stringConfig = AST_STRING_CONFIGS.get(langId);
|
|
240
|
+
const visitor = createAstStoreVisitor(
|
|
241
|
+
astTypeMap,
|
|
242
|
+
defs,
|
|
243
|
+
relPath,
|
|
244
|
+
nodeIdMap,
|
|
245
|
+
stringConfig,
|
|
246
|
+
astStopRecurseKinds(langId),
|
|
247
|
+
);
|
|
248
|
+
const results = walkWithVisitors(rootNode, [visitor], langId);
|
|
236
249
|
const collected = (results['ast-store'] || []) as AstRow[];
|
|
237
250
|
rows.push(...collected);
|
|
238
251
|
}
|
package/src/mcp/server.ts
CHANGED
|
@@ -98,17 +98,15 @@ async function resolveDbPath(
|
|
|
98
98
|
return dbPath;
|
|
99
99
|
}
|
|
100
100
|
|
|
101
|
-
function validateMultiRepoAccess(multiRepo: boolean,
|
|
101
|
+
function validateMultiRepoAccess(multiRepo: boolean, args: { repo?: string }): void {
|
|
102
102
|
if (!multiRepo && args.repo) {
|
|
103
103
|
throw new ConfigError(
|
|
104
104
|
'Multi-repo access is disabled. Restart with `codegraph mcp --multi-repo` to access other repositories.',
|
|
105
105
|
);
|
|
106
106
|
}
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
);
|
|
111
|
-
}
|
|
107
|
+
// Note: the `list_repos` tool is excluded from `enabledToolNames` when
|
|
108
|
+
// `multiRepo` is false (see `buildToolList`), so any call to it is rejected
|
|
109
|
+
// earlier in `createCallToolHandler` with an "Unknown tool" error.
|
|
112
110
|
}
|
|
113
111
|
|
|
114
112
|
/**
|
|
@@ -163,11 +161,17 @@ function createCallToolHandler(
|
|
|
163
161
|
customDbPath: string | undefined,
|
|
164
162
|
allowedRepos: string[] | undefined,
|
|
165
163
|
getQueries: () => Promise<unknown>,
|
|
164
|
+
enabledToolNames: Set<string>,
|
|
166
165
|
) {
|
|
167
166
|
return async (request: any) => {
|
|
168
167
|
const { name, arguments: args } = request.params;
|
|
169
168
|
try {
|
|
170
|
-
|
|
169
|
+
if (!enabledToolNames.has(name)) {
|
|
170
|
+
return { content: [{ type: 'text', text: `Unknown tool: ${name}` }], isError: true };
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
validateMultiRepoAccess(multiRepo, args);
|
|
174
|
+
|
|
171
175
|
const dbPath = await resolveDbPath(customDbPath, args, allowedRepos);
|
|
172
176
|
|
|
173
177
|
const toolEntry = TOOL_HANDLERS.get(name);
|
|
@@ -209,6 +213,9 @@ export async function startMCPServer(
|
|
|
209
213
|
// Apply config-based MCP page-size overrides
|
|
210
214
|
const config = options.config || loadConfig();
|
|
211
215
|
initMcpDefaults(config.mcp?.defaults ? { ...config.mcp.defaults } : undefined);
|
|
216
|
+
const disabledTools = [...(config.mcp?.disabledTools ?? [])];
|
|
217
|
+
const enabledTools = buildToolList(multiRepo, disabledTools);
|
|
218
|
+
const enabledToolNames = new Set(enabledTools.map((tool) => tool.name));
|
|
212
219
|
|
|
213
220
|
const { Server, StdioServerTransport, ListToolsRequestSchema, CallToolRequestSchema } =
|
|
214
221
|
await loadMCPSdk();
|
|
@@ -225,12 +232,12 @@ export async function startMCPServer(
|
|
|
225
232
|
);
|
|
226
233
|
|
|
227
234
|
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
228
|
-
tools:
|
|
235
|
+
tools: enabledTools,
|
|
229
236
|
}));
|
|
230
237
|
|
|
231
238
|
server.setRequestHandler(
|
|
232
239
|
CallToolRequestSchema,
|
|
233
|
-
createCallToolHandler(multiRepo, customDbPath, allowedRepos, getQueries),
|
|
240
|
+
createCallToolHandler(multiRepo, customDbPath, allowedRepos, getQueries, enabledToolNames),
|
|
234
241
|
);
|
|
235
242
|
|
|
236
243
|
const transport = new (StdioServerTransport as any)();
|
package/src/mcp/tool-registry.ts
CHANGED
|
@@ -29,6 +29,17 @@ const PAGINATION_PROPS: Record<string, unknown> = {
|
|
|
29
29
|
offset: { type: 'number', description: 'Skip this many results (pagination, default: 0)' },
|
|
30
30
|
};
|
|
31
31
|
|
|
32
|
+
function normalizeToolName(name: string): string {
|
|
33
|
+
return name
|
|
34
|
+
.trim()
|
|
35
|
+
.toLowerCase()
|
|
36
|
+
.replace(/^codegraph\d+_/, '');
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function buildDisabledToolSet(disabledTools?: string[]): Set<string> {
|
|
40
|
+
return new Set((disabledTools || []).map((name) => normalizeToolName(name)).filter(Boolean));
|
|
41
|
+
}
|
|
42
|
+
|
|
32
43
|
const BASE_TOOLS: ToolSchema[] = [
|
|
33
44
|
{
|
|
34
45
|
name: 'query',
|
|
@@ -849,18 +860,25 @@ const LIST_REPOS_TOOL: ToolSchema = {
|
|
|
849
860
|
/**
|
|
850
861
|
* Build the tool list based on multi-repo mode.
|
|
851
862
|
*/
|
|
852
|
-
export function buildToolList(multiRepo: boolean): ToolSchema[] {
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
863
|
+
export function buildToolList(multiRepo: boolean, disabledTools?: string[]): ToolSchema[] {
|
|
864
|
+
const disabled = buildDisabledToolSet(disabledTools);
|
|
865
|
+
const includeTool = (tool: ToolSchema): boolean => !disabled.has(normalizeToolName(tool.name));
|
|
866
|
+
const baseTools = BASE_TOOLS.filter(includeTool);
|
|
867
|
+
|
|
868
|
+
if (!multiRepo) return baseTools;
|
|
869
|
+
|
|
870
|
+
const tools: ToolSchema[] = [
|
|
871
|
+
...baseTools.map((tool) => ({
|
|
856
872
|
...tool,
|
|
857
873
|
inputSchema: {
|
|
858
874
|
...tool.inputSchema,
|
|
859
875
|
properties: { ...tool.inputSchema.properties, ...REPO_PROP },
|
|
860
876
|
},
|
|
861
877
|
})),
|
|
862
|
-
LIST_REPOS_TOOL,
|
|
863
878
|
];
|
|
879
|
+
|
|
880
|
+
if (includeTool(LIST_REPOS_TOOL)) tools.push(LIST_REPOS_TOOL);
|
|
881
|
+
return tools;
|
|
864
882
|
}
|
|
865
883
|
|
|
866
884
|
// Backward-compatible export: full multi-repo tool list
|