@optave/codegraph 3.9.5 → 3.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/README.md +30 -16
  2. package/dist/ast-analysis/engine.d.ts.map +1 -1
  3. package/dist/ast-analysis/engine.js +4 -3
  4. package/dist/ast-analysis/engine.js.map +1 -1
  5. package/dist/ast-analysis/rules/csharp.d.ts.map +1 -1
  6. package/dist/ast-analysis/rules/csharp.js +8 -1
  7. package/dist/ast-analysis/rules/csharp.js.map +1 -1
  8. package/dist/ast-analysis/rules/go.d.ts.map +1 -1
  9. package/dist/ast-analysis/rules/go.js +4 -1
  10. package/dist/ast-analysis/rules/go.js.map +1 -1
  11. package/dist/ast-analysis/rules/index.d.ts +6 -0
  12. package/dist/ast-analysis/rules/index.d.ts.map +1 -1
  13. package/dist/ast-analysis/rules/index.js +151 -4
  14. package/dist/ast-analysis/rules/index.js.map +1 -1
  15. package/dist/ast-analysis/rules/java.d.ts.map +1 -1
  16. package/dist/ast-analysis/rules/java.js +5 -1
  17. package/dist/ast-analysis/rules/java.js.map +1 -1
  18. package/dist/ast-analysis/rules/php.d.ts.map +1 -1
  19. package/dist/ast-analysis/rules/php.js +6 -1
  20. package/dist/ast-analysis/rules/php.js.map +1 -1
  21. package/dist/ast-analysis/rules/python.d.ts.map +1 -1
  22. package/dist/ast-analysis/rules/python.js +5 -1
  23. package/dist/ast-analysis/rules/python.js.map +1 -1
  24. package/dist/ast-analysis/rules/ruby.d.ts.map +1 -1
  25. package/dist/ast-analysis/rules/ruby.js +4 -1
  26. package/dist/ast-analysis/rules/ruby.js.map +1 -1
  27. package/dist/ast-analysis/rules/rust.d.ts.map +1 -1
  28. package/dist/ast-analysis/rules/rust.js +5 -1
  29. package/dist/ast-analysis/rules/rust.js.map +1 -1
  30. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts +2 -1
  31. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
  32. package/dist/ast-analysis/visitors/ast-store-visitor.js +171 -37
  33. package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
  34. package/dist/domain/graph/builder/context.d.ts +10 -0
  35. package/dist/domain/graph/builder/context.d.ts.map +1 -1
  36. package/dist/domain/graph/builder/context.js +10 -0
  37. package/dist/domain/graph/builder/context.js.map +1 -1
  38. package/dist/domain/graph/builder/helpers.d.ts +7 -2
  39. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  40. package/dist/domain/graph/builder/helpers.js +7 -2
  41. package/dist/domain/graph/builder/helpers.js.map +1 -1
  42. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  43. package/dist/domain/graph/builder/pipeline.js +210 -34
  44. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  45. package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
  46. package/dist/domain/graph/builder/stages/collect-files.js +8 -0
  47. package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
  48. package/dist/domain/graph/builder/stages/detect-changes.d.ts +24 -0
  49. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  50. package/dist/domain/graph/builder/stages/detect-changes.js +117 -3
  51. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  52. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  53. package/dist/domain/graph/builder/stages/finalize.js +9 -6
  54. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  55. package/dist/domain/graph/builder/stages/insert-nodes.d.ts +30 -0
  56. package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
  57. package/dist/domain/graph/builder/stages/insert-nodes.js +36 -13
  58. package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
  59. package/dist/domain/parser.d.ts +54 -1
  60. package/dist/domain/parser.d.ts.map +1 -1
  61. package/dist/domain/parser.js +181 -10
  62. package/dist/domain/parser.js.map +1 -1
  63. package/dist/domain/search/models.js +2 -2
  64. package/dist/domain/wasm-worker-entry.js +15 -14
  65. package/dist/domain/wasm-worker-entry.js.map +1 -1
  66. package/dist/features/ast.d.ts.map +1 -1
  67. package/dist/features/ast.js +11 -9
  68. package/dist/features/ast.js.map +1 -1
  69. package/dist/infrastructure/config.d.ts +1 -0
  70. package/dist/infrastructure/config.d.ts.map +1 -1
  71. package/dist/infrastructure/config.js +1 -0
  72. package/dist/infrastructure/config.js.map +1 -1
  73. package/dist/mcp/server.d.ts.map +1 -1
  74. package/dist/mcp/server.js +14 -8
  75. package/dist/mcp/server.js.map +1 -1
  76. package/dist/mcp/tool-registry.d.ts +1 -1
  77. package/dist/mcp/tool-registry.d.ts.map +1 -1
  78. package/dist/mcp/tool-registry.js +19 -5
  79. package/dist/mcp/tool-registry.js.map +1 -1
  80. package/dist/types.d.ts +1 -0
  81. package/dist/types.d.ts.map +1 -1
  82. package/grammars/tree-sitter-erlang.wasm +0 -0
  83. package/package.json +8 -7
  84. package/src/ast-analysis/engine.ts +14 -2
  85. package/src/ast-analysis/rules/csharp.ts +8 -1
  86. package/src/ast-analysis/rules/go.ts +4 -1
  87. package/src/ast-analysis/rules/index.ts +181 -4
  88. package/src/ast-analysis/rules/java.ts +5 -1
  89. package/src/ast-analysis/rules/php.ts +6 -1
  90. package/src/ast-analysis/rules/python.ts +5 -1
  91. package/src/ast-analysis/rules/ruby.ts +4 -1
  92. package/src/ast-analysis/rules/rust.ts +5 -1
  93. package/src/ast-analysis/visitors/ast-store-visitor.ts +165 -34
  94. package/src/domain/graph/builder/context.ts +10 -0
  95. package/src/domain/graph/builder/helpers.ts +8 -3
  96. package/src/domain/graph/builder/pipeline.ts +234 -36
  97. package/src/domain/graph/builder/stages/collect-files.ts +9 -0
  98. package/src/domain/graph/builder/stages/detect-changes.ts +130 -4
  99. package/src/domain/graph/builder/stages/finalize.ts +9 -6
  100. package/src/domain/graph/builder/stages/insert-nodes.ts +38 -14
  101. package/src/domain/parser.ts +205 -9
  102. package/src/domain/search/models.ts +2 -2
  103. package/src/domain/wasm-worker-entry.ts +23 -13
  104. package/src/features/ast.ts +22 -9
  105. package/src/infrastructure/config.ts +1 -0
  106. package/src/mcp/server.ts +16 -9
  107. package/src/mcp/tool-registry.ts +23 -5
  108. package/src/types.ts +1 -0
@@ -12,10 +12,12 @@ import path from 'node:path';
12
12
  import { performance } from 'node:perf_hooks';
13
13
  import { bulkNodeIdsByFile } from '../../../../db/index.js';
14
14
  import { debug } from '../../../../infrastructure/logger.js';
15
+ import { normalizePath } from '../../../../shared/constants.js';
15
16
  import { toErrorMessage } from '../../../../shared/errors.js';
16
17
  import type {
17
18
  BetterSqlite3Database,
18
19
  ExtractorOutput,
20
+ FileToParse,
19
21
  MetadataUpdate,
20
22
  SqliteStatement,
21
23
  } from '../../../../types.js';
@@ -90,16 +92,30 @@ function marshalSymbolBatches(allSymbols: Map<string, ExtractorOutput>): InsertN
90
92
  return batches;
91
93
  }
92
94
 
93
- /** Build file hash entries from parsed symbols and precomputed/metadata sources. */
94
- function buildFileHashes(
95
- allSymbols: Map<string, ExtractorOutput>,
95
+ /**
96
+ * Build file hash entries for every collected file, including those that
97
+ * produced zero symbols (empty files, parsers that silently no-op'd, or
98
+ * optional-language extensions whose grammar wasn't installed). Iterating the
99
+ * symbol map instead would skip such files and leave them missing from
100
+ * `file_hashes`, which permanently breaks the JS-side fast-skip pre-flight on
101
+ * any subsequent no-op rebuild (#1068).
102
+ *
103
+ * Exported for unit testing.
104
+ */
105
+ export function buildFileHashes(
106
+ filesToParse: FileToParse[],
96
107
  precomputedData: Map<string, PrecomputedFileData>,
97
108
  metadataUpdates: MetadataUpdate[],
98
109
  rootDir: string,
99
110
  ): Array<{ file: string; hash: string; mtime: number; size: number }> {
100
111
  const fileHashes: Array<{ file: string; hash: string; mtime: number; size: number }> = [];
112
+ const seen = new Set<string>();
113
+
114
+ for (const item of filesToParse) {
115
+ const relPath = item.relPath ?? normalizePath(path.relative(rootDir, item.file));
116
+ if (seen.has(relPath)) continue;
117
+ seen.add(relPath);
101
118
 
102
- for (const [relPath] of allSymbols) {
103
119
  const precomputed = precomputedData.get(relPath);
104
120
  if (precomputed?._reverseDepOnly) {
105
121
  continue; // file unchanged, hash already correct
@@ -112,7 +128,7 @@ function buildFileHashes(
112
128
  size = precomputed.stat.size;
113
129
  } else {
114
130
  const rawStat = fileStat(path.join(rootDir, relPath));
115
- mtime = rawStat ? Math.floor(rawStat.mtimeMs) : 0;
131
+ mtime = rawStat ? rawStat.mtime : 0;
116
132
  size = rawStat ? rawStat.size : 0;
117
133
  }
118
134
  fileHashes.push({ file: relPath, hash: precomputed.hash, mtime, size });
@@ -127,7 +143,7 @@ function buildFileHashes(
127
143
  }
128
144
  if (code !== null) {
129
145
  const stat = fileStat(absPath);
130
- const mtime = stat ? Math.floor(stat.mtimeMs) : 0;
146
+ const mtime = stat ? stat.mtime : 0;
131
147
  const size = stat ? stat.size : 0;
132
148
  fileHashes.push({ file: relPath, hash: fileHash(code), mtime, size });
133
149
  }
@@ -136,7 +152,7 @@ function buildFileHashes(
136
152
 
137
153
  // Also include metadata-only updates (self-heal mtime/size without re-parse)
138
154
  for (const item of metadataUpdates) {
139
- const mtime = item.stat ? Math.floor(item.stat.mtime) : 0;
155
+ const mtime = item.stat ? item.stat.mtime : 0;
140
156
  const size = item.stat ? item.stat.size : 0;
141
157
  fileHashes.push({ file: item.relPath, hash: item.hash, mtime, size });
142
158
  }
@@ -157,7 +173,7 @@ function tryNativeInsert(ctx: PipelineContext): boolean {
157
173
  for (const item of filesToParse) {
158
174
  if (item.relPath) precomputedData.set(item.relPath, item as PrecomputedFileData);
159
175
  }
160
- const fileHashes = buildFileHashes(allSymbols, precomputedData, metadataUpdates, rootDir);
176
+ const fileHashes = buildFileHashes(filesToParse, precomputedData, metadataUpdates, rootDir);
161
177
 
162
178
  // In native-first mode (single rusqlite connection), no WAL dance is needed.
163
179
  // In dual-connection mode, checkpoint JS side before native write, then
@@ -321,7 +337,7 @@ function insertChildrenAndEdges(
321
337
 
322
338
  function updateFileHashes(
323
339
  _db: BetterSqlite3Database,
324
- allSymbols: Map<string, ExtractorOutput>,
340
+ filesToParse: FileToParse[],
325
341
  precomputedData: Map<string, PrecomputedFileData>,
326
342
  metadataUpdates: MetadataUpdate[],
327
343
  rootDir: string,
@@ -329,7 +345,15 @@ function updateFileHashes(
329
345
  ): void {
330
346
  if (!upsertHash) return;
331
347
 
332
- for (const [relPath] of allSymbols) {
348
+ // Iterate every collected file (#1068): files that produced zero symbols
349
+ // (empty, parser no-op, or grammar-missing optional language) still need a
350
+ // hash row, otherwise the next no-op rebuild's fast-skip pre-flight rejects.
351
+ const seen = new Set<string>();
352
+ for (const item of filesToParse) {
353
+ const relPath = item.relPath ?? normalizePath(path.relative(rootDir, item.file));
354
+ if (seen.has(relPath)) continue;
355
+ seen.add(relPath);
356
+
333
357
  const precomputed = precomputedData.get(relPath);
334
358
  if (precomputed?._reverseDepOnly) {
335
359
  // no-op: file unchanged, hash already correct
@@ -341,7 +365,7 @@ function updateFileHashes(
341
365
  size = precomputed.stat.size;
342
366
  } else {
343
367
  const rawStat = fileStat(path.join(rootDir, relPath));
344
- mtime = rawStat ? Math.floor(rawStat.mtimeMs) : 0;
368
+ mtime = rawStat ? rawStat.mtime : 0;
345
369
  size = rawStat ? rawStat.size : 0;
346
370
  }
347
371
  upsertHash.run(relPath, precomputed.hash, mtime, size);
@@ -356,7 +380,7 @@ function updateFileHashes(
356
380
  }
357
381
  if (code !== null) {
358
382
  const stat = fileStat(absPath);
359
- const mtime = stat ? Math.floor(stat.mtimeMs) : 0;
383
+ const mtime = stat ? stat.mtime : 0;
360
384
  const size = stat ? stat.size : 0;
361
385
  upsertHash.run(relPath, fileHash(code), mtime, size);
362
386
  }
@@ -365,7 +389,7 @@ function updateFileHashes(
365
389
 
366
390
  // Also update metadata-only entries (self-heal mtime/size without re-parse)
367
391
  for (const item of metadataUpdates) {
368
- const mtime = item.stat ? Math.floor(item.stat.mtime) : 0;
392
+ const mtime = item.stat ? item.stat.mtime : 0;
369
393
  const size = item.stat ? item.stat.size : 0;
370
394
  upsertHash.run(item.relPath, item.hash, mtime, size);
371
395
  }
@@ -415,7 +439,7 @@ export async function insertNodes(ctx: PipelineContext): Promise<void> {
415
439
  const insertAll = ctx.db.transaction(() => {
416
440
  insertDefinitionsAndExports(ctx.db, allSymbols);
417
441
  insertChildrenAndEdges(ctx.db, allSymbols);
418
- updateFileHashes(ctx.db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash);
442
+ updateFileHashes(ctx.db, filesToParse, precomputedData, metadataUpdates, rootDir, upsertHash);
419
443
  });
420
444
 
421
445
  insertAll();
@@ -316,16 +316,23 @@ export function getParser(parsers: Map<string, Parser | null>, filePath: string)
316
316
  *
317
317
  * Name is preserved for caller compatibility; the function now ensures
318
318
  * *analysis data* rather than *trees*.
319
+ *
320
+ * `needsFn` (optional): when provided, only files for which it returns true are
321
+ * re-parsed. Without it the function falls back to "any WASM-parseable file
322
+ * without _tree", which was the source of #1036 — a single file missing one
323
+ * analysis triggered a full-build re-parse of every WASM-parseable file.
319
324
  */
320
325
  export async function ensureWasmTrees(
321
326
  fileSymbols: Map<string, any>,
322
327
  rootDir: string,
328
+ needsFn?: (relPath: string, symbols: any) => boolean,
323
329
  ): Promise<void> {
324
330
  // Collect files that still need analysis data and are parseable by WASM.
325
331
  const pending: Array<{ relPath: string; absPath: string; symbols: any }> = [];
326
332
  for (const [relPath, symbols] of fileSymbols) {
327
333
  if (symbols._tree) continue; // legacy path — leave existing trees alone
328
334
  if (!_extToLang.has(path.extname(relPath).toLowerCase())) continue;
335
+ if (needsFn && !needsFn(relPath, symbols)) continue;
329
336
  pending.push({ relPath, absPath: path.join(rootDir, relPath), symbols });
330
337
  }
331
338
  if (pending.length === 0) return;
@@ -412,6 +419,128 @@ export function getInstalledWasmExtensions(): Set<string> {
412
419
  return exts;
413
420
  }
414
421
 
422
+ /**
423
+ * Lowercase file extensions covered by the native Rust addon.
424
+ *
425
+ * Mirrors `LanguageKind::from_extension` in
426
+ * `crates/codegraph-core/src/parser_registry.rs`. Used to classify why the
427
+ * native orchestrator dropped a file: extensions outside this set are a
428
+ * legitimate parser limit (no Rust extractor exists), while extensions inside
429
+ * it indicate a real native bug (parse/read/extract failure).
430
+ *
431
+ * Keep this list in sync with the Rust enum — the native addon is a separate
432
+ * npm package, so JS has no runtime way to discover its language coverage.
433
+ */
434
+ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet<string> = new Set([
435
+ '.js',
436
+ '.jsx',
437
+ '.mjs',
438
+ '.cjs',
439
+ '.ts',
440
+ '.tsx',
441
+ '.py',
442
+ '.pyi',
443
+ '.tf',
444
+ '.hcl',
445
+ '.go',
446
+ '.rs',
447
+ '.java',
448
+ '.cs',
449
+ '.rb',
450
+ '.rake',
451
+ '.gemspec',
452
+ '.php',
453
+ '.phtml',
454
+ '.c',
455
+ '.h',
456
+ '.cpp',
457
+ '.cc',
458
+ '.cxx',
459
+ '.hpp',
460
+ '.kt',
461
+ '.kts',
462
+ '.swift',
463
+ '.scala',
464
+ '.sh',
465
+ '.bash',
466
+ '.ex',
467
+ '.exs',
468
+ '.lua',
469
+ '.dart',
470
+ '.zig',
471
+ '.hs',
472
+ '.ml',
473
+ '.mli',
474
+ ]);
475
+
476
+ /**
477
+ * Classification for a file the native orchestrator dropped.
478
+ * - `unsupported-by-native`: extension has no Rust extractor (legitimate parser limit).
479
+ * - `native-extractor-failure`: extension is supported by native but the file was
480
+ * still dropped — points at a real bug (read error, parse failure, extractor crash).
481
+ */
482
+ export type NativeDropReason = 'unsupported-by-native' | 'native-extractor-failure';
483
+
484
+ export interface NativeDropClassification {
485
+ /** Per-reason → per-extension → list of relative paths that hit that bucket. */
486
+ byReason: Record<NativeDropReason, Map<string, string[]>>;
487
+ /** Total file count per reason. */
488
+ totals: Record<NativeDropReason, number>;
489
+ }
490
+
491
+ /**
492
+ * Group the missing files (relative paths) by drop reason and extension so the
493
+ * caller can log per-extension counts and a sample path. Pure function — no
494
+ * I/O, safe to unit-test independently of the build pipeline.
495
+ */
496
+ export function classifyNativeDrops(relPaths: Iterable<string>): NativeDropClassification {
497
+ const byReason: Record<NativeDropReason, Map<string, string[]>> = {
498
+ 'unsupported-by-native': new Map(),
499
+ 'native-extractor-failure': new Map(),
500
+ };
501
+ const totals: Record<NativeDropReason, number> = {
502
+ 'unsupported-by-native': 0,
503
+ 'native-extractor-failure': 0,
504
+ };
505
+ for (const rel of relPaths) {
506
+ const ext = path.extname(rel).toLowerCase();
507
+ const reason: NativeDropReason = NATIVE_SUPPORTED_EXTENSIONS.has(ext)
508
+ ? 'native-extractor-failure'
509
+ : 'unsupported-by-native';
510
+ const bucket = byReason[reason];
511
+ let list = bucket.get(ext);
512
+ if (!list) {
513
+ list = [];
514
+ bucket.set(ext, list);
515
+ }
516
+ list.push(rel);
517
+ totals[reason]++;
518
+ }
519
+ return { byReason, totals };
520
+ }
521
+
522
+ /**
523
+ * Render `{ ext → paths[] }` as `ext (n: sample.ext, ...)` slices for log lines.
524
+ * Caps at 3 sample paths per extension and 6 extensions total to keep warnings
525
+ * readable when many languages are dropped at once. Extensions are sorted by
526
+ * descending file count so the loudest offender shows up first; ties keep
527
+ * insertion order. Pure function — safe to unit-test independently.
528
+ */
529
+ export function formatDropExtensionSummary(buckets: Map<string, string[]>): string {
530
+ const MAX_EXTS = 6;
531
+ const MAX_SAMPLES = 3;
532
+ const entries = Array.from(buckets.entries()).sort((a, b) => b[1].length - a[1].length);
533
+ const shown = entries.slice(0, MAX_EXTS).map(([ext, paths]) => {
534
+ const sample = paths.slice(0, MAX_SAMPLES).join(', ');
535
+ const more = paths.length > MAX_SAMPLES ? `, +${paths.length - MAX_SAMPLES} more` : '';
536
+ return `${ext} (${paths.length}: ${sample}${more})`;
537
+ });
538
+ if (entries.length > MAX_EXTS) {
539
+ shown.push(`+${entries.length - MAX_EXTS} more extension(s)`);
540
+ }
541
+ return shown.join('; ');
542
+ }
543
+
415
544
  // ── Unified API ──────────────────────────────────────────────────────────────
416
545
 
417
546
  function resolveEngine(opts: ParseEngineOpts = {}): ResolvedEngine {
@@ -938,6 +1067,71 @@ async function parseFilesWasm(
938
1067
  return result;
939
1068
  }
940
1069
 
1070
+ /**
1071
+ * Files at or below this count use the inline parse path (no worker spawn).
1072
+ *
1073
+ * Sized for typical engine-parity drops: a handful of fixture files in one
1074
+ * or two languages (the recurring HCL case is 4 files). Above this, the
1075
+ * worker-pool's IPC + crash-isolation cost (#965) is amortized over enough
1076
+ * parse work to be worth paying; below it, the ~1–2s cold-start dominates.
1077
+ */
1078
+ const INLINE_BACKFILL_THRESHOLD = 16;
1079
+
1080
+ /**
1081
+ * Inline WASM parse (no worker) for small file batches.
1082
+ *
1083
+ * Used by the engine-parity backfill path when the native engine drops a
1084
+ * handful of files (typically test fixtures). The worker pool's per-call
1085
+ * IPC + grammar-init overhead can cost 1–2s on slow CI runners — for a
1086
+ * 4-file backfill, that dwarfs the ~10ms of actual parse work.
1087
+ *
1088
+ * Returns symbols with `_tree` set so `runAnalyses` can run AST/CFG/dataflow
1089
+ * visitors via the unified walker (mirrors how WASM-engine results behaved
1090
+ * before the worker pool was introduced).
1091
+ */
1092
+ async function parseFilesWasmInline(
1093
+ filePaths: string[],
1094
+ rootDir: string,
1095
+ ): Promise<Map<string, ExtractorOutput>> {
1096
+ const result = new Map<string, ExtractorOutput>();
1097
+ if (filePaths.length === 0) return result;
1098
+ const parsers = await ensureParsersForFiles(filePaths);
1099
+ for (const filePath of filePaths) {
1100
+ if (!_extToLang.has(path.extname(filePath).toLowerCase())) continue;
1101
+ let code: string;
1102
+ try {
1103
+ code = fs.readFileSync(filePath, 'utf-8');
1104
+ } catch (err: unknown) {
1105
+ warn(`Skipping ${path.relative(rootDir, filePath)}: ${(err as Error).message}`);
1106
+ continue;
1107
+ }
1108
+ const extracted = wasmExtractSymbols(parsers, filePath, code);
1109
+ if (!extracted) continue;
1110
+ const relPath = path.relative(rootDir, filePath).split(path.sep).join('/');
1111
+ const symbols = extracted.symbols as ExtractorOutput & { _tree?: unknown; _langId?: string };
1112
+ symbols._tree = extracted.tree;
1113
+ symbols._langId = extracted.langId;
1114
+ result.set(relPath, symbols);
1115
+ }
1116
+ return result;
1117
+ }
1118
+
1119
+ /**
1120
+ * Backfill helper: small batches use the inline (main-thread) path; larger
1121
+ * batches keep the worker-pool isolation against tree-sitter WASM crashes
1122
+ * (#965). Threshold matches typical engine-parity drop sizes (a few fixture
1123
+ * files in one or two languages).
1124
+ */
1125
+ export async function parseFilesWasmForBackfill(
1126
+ filePaths: string[],
1127
+ rootDir: string,
1128
+ ): Promise<Map<string, ExtractorOutput>> {
1129
+ if (filePaths.length <= INLINE_BACKFILL_THRESHOLD) {
1130
+ return parseFilesWasmInline(filePaths, rootDir);
1131
+ }
1132
+ return parseFilesWasm(filePaths, rootDir);
1133
+ }
1134
+
941
1135
  /**
942
1136
  * Parse multiple files in bulk and return a Map<relPath, symbols>.
943
1137
  */
@@ -988,7 +1182,7 @@ export async function parseFilesAuto(
988
1182
  );
989
1183
  if (dropped.length > 0) {
990
1184
  warn(`Native engine dropped ${dropped.length} file(s); falling back to WASM for parity`);
991
- const wasmResults = await parseFilesWasm(dropped, rootDir);
1185
+ const wasmResults = await parseFilesWasmForBackfill(dropped, rootDir);
992
1186
  for (const [relPath, symbols] of wasmResults) {
993
1187
  result.set(relPath, symbols);
994
1188
  }
@@ -1003,15 +1197,17 @@ export async function parseFilesAuto(
1003
1197
  export function getActiveEngine(opts: ParseEngineOpts = {}): {
1004
1198
  name: 'native' | 'wasm';
1005
1199
  version: string | null;
1200
+ binaryVersion: string | null;
1006
1201
  } {
1007
1202
  const { name, native } = resolveEngine(opts);
1008
- let version: string | null = native
1009
- ? typeof native.engineVersion === 'function'
1010
- ? native.engineVersion()
1011
- : null
1012
- : null;
1013
- // Prefer platform package.json version over binary-embedded version
1014
- // to handle stale binaries that weren't recompiled during a release
1203
+ const binaryVersion: string | null =
1204
+ native && typeof native.engineVersion === 'function' ? native.engineVersion() : null;
1205
+ // The display version prefers the platform package.json so the "Using native
1206
+ // engine (vX)" log matches the npm release the user installed. The Rust
1207
+ // orchestrator's check_version_mismatch compares against CARGO_PKG_VERSION
1208
+ // (the binary's own value), so build_meta writes must use `binaryVersion`,
1209
+ // not this display value see pipeline.ts and finalize.ts (#1066).
1210
+ let version: string | null = binaryVersion;
1015
1211
  if (native) {
1016
1212
  try {
1017
1213
  version = getNativePackageVersion() ?? version;
@@ -1019,7 +1215,7 @@ export function getActiveEngine(opts: ParseEngineOpts = {}): {
1019
1215
  debug(`getNativePackageVersion failed: ${(e as Error).message}`);
1020
1216
  }
1021
1217
  }
1022
- return { name, version };
1218
+ return { name, version, binaryVersion };
1023
1219
  }
1024
1220
 
1025
1221
  /**
@@ -42,7 +42,7 @@ export const MODELS: Record<string, ModelConfig> = {
42
42
  quantized: false,
43
43
  },
44
44
  'jina-code': {
45
- name: 'Xenova/jina-embeddings-v2-base-code',
45
+ name: 'jinaai/jina-embeddings-v2-base-code',
46
46
  dim: 768,
47
47
  contextWindow: 8192,
48
48
  desc: 'Code-aware (~137MB). Trained on code+text, best for code search.',
@@ -253,7 +253,7 @@ export async function embed(
253
253
  }
254
254
 
255
255
  if (texts.length > batchSize) {
256
- process.stdout.write(` Embedded ${Math.min(i + batchSize, texts.length)}/${texts.length}\r`);
256
+ process.stderr.write(` Embedded ${Math.min(i + batchSize, texts.length)}/${texts.length}\r`);
257
257
  }
258
258
  }
259
259
 
@@ -28,7 +28,9 @@ import type { Tree } from 'web-tree-sitter';
28
28
  import { Language, Parser, Query } from 'web-tree-sitter';
29
29
  import { computeLOCMetrics, computeMaintainabilityIndex } from '../ast-analysis/metrics.js';
30
30
  import {
31
+ AST_STRING_CONFIGS,
31
32
  AST_TYPE_MAPS,
33
+ astStopRecurseKinds,
32
34
  CFG_RULES,
33
35
  COMPLEXITY_RULES,
34
36
  DATAFLOW_RULES,
@@ -584,7 +586,15 @@ function setupVisitorsLocal(
584
586
  if (opts.ast) {
585
587
  const astTypeMap = AST_TYPE_MAPS.get(langId);
586
588
  if (astTypeMap) {
587
- astVisitor = createAstStoreVisitor(astTypeMap, defs, relPath, new Map<string, number>());
589
+ const stringConfig = AST_STRING_CONFIGS.get(langId);
590
+ astVisitor = createAstStoreVisitor(
591
+ astTypeMap,
592
+ defs,
593
+ relPath,
594
+ new Map<string, number>(),
595
+ stringConfig,
596
+ astStopRecurseKinds(langId),
597
+ );
588
598
  visitors.push(astVisitor);
589
599
  }
590
600
  }
@@ -698,18 +708,18 @@ async function handleParse(msg: WorkerParseRequest): Promise<SerializedExtractor
698
708
  file?: string;
699
709
  parentNodeId?: number | null;
700
710
  }>;
701
- if (astRows.length > 0) {
702
- // Strip `file` and `parentNodeId` main thread re-resolves parent IDs
703
- // against its DB in features/ast.ts::collectFileAstRows, and `file` is
704
- // known from the map key.
705
- serializedAstNodes = astRows.map((n) => ({
706
- line: n.line,
707
- kind: n.kind,
708
- name: n.name ?? '',
709
- text: n.text ?? undefined,
710
- receiver: n.receiver ?? undefined,
711
- }));
712
- }
711
+ // Always set an array (even empty) — leaving astNodes undefined makes
712
+ // engine.ts::fileNeedsWasmTree treat the file as un-walked and trigger
713
+ // a full ensureWasmTrees re-parse of every WASM-parseable file (#1036).
714
+ // Strip `file` and `parentNodeId` — main thread re-resolves both in
715
+ // features/ast.ts::collectFileAstRows.
716
+ serializedAstNodes = astRows.map((n) => ({
717
+ line: n.line,
718
+ kind: n.kind,
719
+ name: n.name ?? '',
720
+ text: n.text ?? undefined,
721
+ receiver: n.receiver ?? undefined,
722
+ }));
713
723
  }
714
724
 
715
725
  if (complexityVisitor) storeComplexityResults(results, defs, entry.id);
@@ -1,5 +1,9 @@
1
1
  import path from 'node:path';
2
- import { AST_TYPE_MAPS } from '../ast-analysis/rules/index.js';
2
+ import {
3
+ AST_STRING_CONFIGS,
4
+ AST_TYPE_MAPS,
5
+ astStopRecurseKinds,
6
+ } from '../ast-analysis/rules/index.js';
3
7
  import { buildExtensionSet } from '../ast-analysis/shared.js';
4
8
  import { walkWithVisitors } from '../ast-analysis/visitor.js';
5
9
  import { createAstStoreVisitor } from '../ast-analysis/visitors/ast-store-visitor.js';
@@ -22,8 +26,6 @@ const KIND_ICONS: Record<string, string> = {
22
26
  await: '\u22B3', // ⊳
23
27
  };
24
28
 
25
- const JS_TS_AST_TYPES = AST_TYPE_MAPS.get('javascript');
26
-
27
29
  const WALK_EXTENSIONS = buildExtensionSet(AST_TYPE_MAPS);
28
30
 
29
31
  // ─── Helpers ──────────────────────────────────────────────────────────
@@ -171,9 +173,10 @@ function collectFileAstRows(
171
173
 
172
174
  // WASM fallback — walk tree if available
173
175
  const ext = path.extname(relPath).toLowerCase();
174
- if (WALK_EXTENSIONS.has(ext) && symbols._tree) {
176
+ const langId = symbols._langId || '';
177
+ if ((WALK_EXTENSIONS.has(ext) || AST_TYPE_MAPS.has(langId)) && symbols._tree) {
175
178
  const rows: AstRow[] = [];
176
- walkAst(symbols._tree.rootNode, defs, relPath, rows, nodeIdMap);
179
+ walkAst(symbols._tree.rootNode, defs, relPath, rows, nodeIdMap, langId);
177
180
  return rows;
178
181
  }
179
182
 
@@ -226,13 +229,23 @@ function walkAst(
226
229
  relPath: string,
227
230
  rows: AstRow[],
228
231
  nodeIdMap: Map<string, number>,
232
+ langId: string,
229
233
  ): void {
230
- if (!JS_TS_AST_TYPES) {
231
- debug('ast-store: JS_TS_AST_TYPES not available — skipping walk');
234
+ const astTypeMap = AST_TYPE_MAPS.get(langId);
235
+ if (!astTypeMap) {
236
+ debug(`ast-store: no astTypes for langId=${langId} — skipping walk`);
232
237
  return;
233
238
  }
234
- const visitor = createAstStoreVisitor(JS_TS_AST_TYPES, defs, relPath, nodeIdMap);
235
- const results = walkWithVisitors(rootNode, [visitor], 'javascript');
239
+ const stringConfig = AST_STRING_CONFIGS.get(langId);
240
+ const visitor = createAstStoreVisitor(
241
+ astTypeMap,
242
+ defs,
243
+ relPath,
244
+ nodeIdMap,
245
+ stringConfig,
246
+ astStopRecurseKinds(langId),
247
+ );
248
+ const results = walkWithVisitors(rootNode, [visitor], langId);
236
249
  const collected = (results['ast-store'] || []) as AstRow[];
237
250
  rows.push(...collected);
238
251
  }
@@ -147,6 +147,7 @@ export const DEFAULTS = {
147
147
  implementations: 50,
148
148
  interfaces: 50,
149
149
  },
150
+ disabledTools: [] as string[],
150
151
  },
151
152
  } satisfies CodegraphConfig;
152
153
 
package/src/mcp/server.ts CHANGED
@@ -98,17 +98,15 @@ async function resolveDbPath(
98
98
  return dbPath;
99
99
  }
100
100
 
101
- function validateMultiRepoAccess(multiRepo: boolean, name: string, args: { repo?: string }): void {
101
+ function validateMultiRepoAccess(multiRepo: boolean, args: { repo?: string }): void {
102
102
  if (!multiRepo && args.repo) {
103
103
  throw new ConfigError(
104
104
  'Multi-repo access is disabled. Restart with `codegraph mcp --multi-repo` to access other repositories.',
105
105
  );
106
106
  }
107
- if (!multiRepo && name === 'list_repos') {
108
- throw new ConfigError(
109
- 'Multi-repo access is disabled. Restart with `codegraph mcp --multi-repo` to list repositories.',
110
- );
111
- }
107
+ // Note: the `list_repos` tool is excluded from `enabledToolNames` when
108
+ // `multiRepo` is false (see `buildToolList`), so any call to it is rejected
109
+ // earlier in `createCallToolHandler` with an "Unknown tool" error.
112
110
  }
113
111
 
114
112
  /**
@@ -163,11 +161,17 @@ function createCallToolHandler(
163
161
  customDbPath: string | undefined,
164
162
  allowedRepos: string[] | undefined,
165
163
  getQueries: () => Promise<unknown>,
164
+ enabledToolNames: Set<string>,
166
165
  ) {
167
166
  return async (request: any) => {
168
167
  const { name, arguments: args } = request.params;
169
168
  try {
170
- validateMultiRepoAccess(multiRepo, name, args);
169
+ if (!enabledToolNames.has(name)) {
170
+ return { content: [{ type: 'text', text: `Unknown tool: ${name}` }], isError: true };
171
+ }
172
+
173
+ validateMultiRepoAccess(multiRepo, args);
174
+
171
175
  const dbPath = await resolveDbPath(customDbPath, args, allowedRepos);
172
176
 
173
177
  const toolEntry = TOOL_HANDLERS.get(name);
@@ -209,6 +213,9 @@ export async function startMCPServer(
209
213
  // Apply config-based MCP page-size overrides
210
214
  const config = options.config || loadConfig();
211
215
  initMcpDefaults(config.mcp?.defaults ? { ...config.mcp.defaults } : undefined);
216
+ const disabledTools = [...(config.mcp?.disabledTools ?? [])];
217
+ const enabledTools = buildToolList(multiRepo, disabledTools);
218
+ const enabledToolNames = new Set(enabledTools.map((tool) => tool.name));
212
219
 
213
220
  const { Server, StdioServerTransport, ListToolsRequestSchema, CallToolRequestSchema } =
214
221
  await loadMCPSdk();
@@ -225,12 +232,12 @@ export async function startMCPServer(
225
232
  );
226
233
 
227
234
  server.setRequestHandler(ListToolsRequestSchema, async () => ({
228
- tools: buildToolList(multiRepo),
235
+ tools: enabledTools,
229
236
  }));
230
237
 
231
238
  server.setRequestHandler(
232
239
  CallToolRequestSchema,
233
- createCallToolHandler(multiRepo, customDbPath, allowedRepos, getQueries),
240
+ createCallToolHandler(multiRepo, customDbPath, allowedRepos, getQueries, enabledToolNames),
234
241
  );
235
242
 
236
243
  const transport = new (StdioServerTransport as any)();
@@ -29,6 +29,17 @@ const PAGINATION_PROPS: Record<string, unknown> = {
29
29
  offset: { type: 'number', description: 'Skip this many results (pagination, default: 0)' },
30
30
  };
31
31
 
32
+ function normalizeToolName(name: string): string {
33
+ return name
34
+ .trim()
35
+ .toLowerCase()
36
+ .replace(/^codegraph\d+_/, '');
37
+ }
38
+
39
+ function buildDisabledToolSet(disabledTools?: string[]): Set<string> {
40
+ return new Set((disabledTools || []).map((name) => normalizeToolName(name)).filter(Boolean));
41
+ }
42
+
32
43
  const BASE_TOOLS: ToolSchema[] = [
33
44
  {
34
45
  name: 'query',
@@ -849,18 +860,25 @@ const LIST_REPOS_TOOL: ToolSchema = {
849
860
  /**
850
861
  * Build the tool list based on multi-repo mode.
851
862
  */
852
- export function buildToolList(multiRepo: boolean): ToolSchema[] {
853
- if (!multiRepo) return BASE_TOOLS;
854
- return [
855
- ...BASE_TOOLS.map((tool) => ({
863
+ export function buildToolList(multiRepo: boolean, disabledTools?: string[]): ToolSchema[] {
864
+ const disabled = buildDisabledToolSet(disabledTools);
865
+ const includeTool = (tool: ToolSchema): boolean => !disabled.has(normalizeToolName(tool.name));
866
+ const baseTools = BASE_TOOLS.filter(includeTool);
867
+
868
+ if (!multiRepo) return baseTools;
869
+
870
+ const tools: ToolSchema[] = [
871
+ ...baseTools.map((tool) => ({
856
872
  ...tool,
857
873
  inputSchema: {
858
874
  ...tool.inputSchema,
859
875
  properties: { ...tool.inputSchema.properties, ...REPO_PROP },
860
876
  },
861
877
  })),
862
- LIST_REPOS_TOOL,
863
878
  ];
879
+
880
+ if (includeTool(LIST_REPOS_TOOL)) tools.push(LIST_REPOS_TOOL);
881
+ return tools;
864
882
  }
865
883
 
866
884
  // Backward-compatible export: full multi-repo tool list
package/src/types.ts CHANGED
@@ -1201,6 +1201,7 @@ export interface CodegraphConfig {
1201
1201
 
1202
1202
  mcp: {
1203
1203
  defaults: McpDefaults;
1204
+ disabledTools?: string[];
1204
1205
  };
1205
1206
  }
1206
1207