@colbymchenry/codegraph-darwin-x64 1.0.1 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/lib/dist/bin/codegraph.js +111 -11
  2. package/lib/dist/bin/codegraph.js.map +1 -1
  3. package/lib/dist/db/index.d.ts +22 -1
  4. package/lib/dist/db/index.d.ts.map +1 -1
  5. package/lib/dist/db/index.js +46 -1
  6. package/lib/dist/db/index.js.map +1 -1
  7. package/lib/dist/db/queries.d.ts +14 -0
  8. package/lib/dist/db/queries.d.ts.map +1 -1
  9. package/lib/dist/db/queries.js +25 -0
  10. package/lib/dist/db/queries.js.map +1 -1
  11. package/lib/dist/directory.d.ts +43 -0
  12. package/lib/dist/directory.d.ts.map +1 -1
  13. package/lib/dist/directory.js +121 -0
  14. package/lib/dist/directory.js.map +1 -1
  15. package/lib/dist/extraction/grammars.d.ts +11 -3
  16. package/lib/dist/extraction/grammars.d.ts.map +1 -1
  17. package/lib/dist/extraction/grammars.js +14 -5
  18. package/lib/dist/extraction/grammars.js.map +1 -1
  19. package/lib/dist/extraction/index.d.ts +6 -3
  20. package/lib/dist/extraction/index.d.ts.map +1 -1
  21. package/lib/dist/extraction/index.js +229 -62
  22. package/lib/dist/extraction/index.js.map +1 -1
  23. package/lib/dist/extraction/languages/c-cpp.d.ts.map +1 -1
  24. package/lib/dist/extraction/languages/c-cpp.js +47 -2
  25. package/lib/dist/extraction/languages/c-cpp.js.map +1 -1
  26. package/lib/dist/extraction/languages/csharp.d.ts.map +1 -1
  27. package/lib/dist/extraction/languages/csharp.js +20 -0
  28. package/lib/dist/extraction/languages/csharp.js.map +1 -1
  29. package/lib/dist/extraction/languages/dart.d.ts.map +1 -1
  30. package/lib/dist/extraction/languages/dart.js +22 -0
  31. package/lib/dist/extraction/languages/dart.js.map +1 -1
  32. package/lib/dist/extraction/languages/java.d.ts.map +1 -1
  33. package/lib/dist/extraction/languages/java.js +213 -9
  34. package/lib/dist/extraction/languages/java.js.map +1 -1
  35. package/lib/dist/extraction/languages/kotlin.d.ts.map +1 -1
  36. package/lib/dist/extraction/languages/kotlin.js +51 -0
  37. package/lib/dist/extraction/languages/kotlin.js.map +1 -1
  38. package/lib/dist/extraction/languages/scala.d.ts.map +1 -1
  39. package/lib/dist/extraction/languages/scala.js +19 -9
  40. package/lib/dist/extraction/languages/scala.js.map +1 -1
  41. package/lib/dist/extraction/parse-worker.js +4 -1
  42. package/lib/dist/extraction/parse-worker.js.map +1 -1
  43. package/lib/dist/extraction/tree-sitter-types.d.ts +13 -0
  44. package/lib/dist/extraction/tree-sitter-types.d.ts.map +1 -1
  45. package/lib/dist/extraction/tree-sitter.d.ts +119 -0
  46. package/lib/dist/extraction/tree-sitter.d.ts.map +1 -1
  47. package/lib/dist/extraction/tree-sitter.js +890 -11
  48. package/lib/dist/extraction/tree-sitter.js.map +1 -1
  49. package/lib/dist/index.d.ts +33 -0
  50. package/lib/dist/index.d.ts.map +1 -1
  51. package/lib/dist/index.js +68 -7
  52. package/lib/dist/index.js.map +1 -1
  53. package/lib/dist/installer/index.d.ts.map +1 -1
  54. package/lib/dist/installer/index.js +33 -67
  55. package/lib/dist/installer/index.js.map +1 -1
  56. package/lib/dist/installer/instructions-template.d.ts +3 -3
  57. package/lib/dist/installer/instructions-template.d.ts.map +1 -1
  58. package/lib/dist/installer/instructions-template.js +4 -4
  59. package/lib/dist/installer/targets/claude.d.ts +18 -12
  60. package/lib/dist/installer/targets/claude.d.ts.map +1 -1
  61. package/lib/dist/installer/targets/claude.js +78 -6
  62. package/lib/dist/installer/targets/claude.js.map +1 -1
  63. package/lib/dist/installer/targets/shared.d.ts +12 -2
  64. package/lib/dist/installer/targets/shared.d.ts.map +1 -1
  65. package/lib/dist/installer/targets/shared.js +13 -12
  66. package/lib/dist/installer/targets/shared.js.map +1 -1
  67. package/lib/dist/installer/targets/types.d.ts +7 -0
  68. package/lib/dist/installer/targets/types.d.ts.map +1 -1
  69. package/lib/dist/mcp/daemon.d.ts.map +1 -1
  70. package/lib/dist/mcp/daemon.js +16 -0
  71. package/lib/dist/mcp/daemon.js.map +1 -1
  72. package/lib/dist/mcp/engine.d.ts.map +1 -1
  73. package/lib/dist/mcp/engine.js +8 -0
  74. package/lib/dist/mcp/engine.js.map +1 -1
  75. package/lib/dist/mcp/proxy.d.ts.map +1 -1
  76. package/lib/dist/mcp/proxy.js +14 -1
  77. package/lib/dist/mcp/proxy.js.map +1 -1
  78. package/lib/dist/mcp/server-instructions.d.ts +18 -14
  79. package/lib/dist/mcp/server-instructions.d.ts.map +1 -1
  80. package/lib/dist/mcp/server-instructions.js +57 -52
  81. package/lib/dist/mcp/server-instructions.js.map +1 -1
  82. package/lib/dist/mcp/session.d.ts.map +1 -1
  83. package/lib/dist/mcp/session.js +23 -18
  84. package/lib/dist/mcp/session.js.map +1 -1
  85. package/lib/dist/mcp/tools.d.ts +51 -1
  86. package/lib/dist/mcp/tools.d.ts.map +1 -1
  87. package/lib/dist/mcp/tools.js +585 -151
  88. package/lib/dist/mcp/tools.js.map +1 -1
  89. package/lib/dist/project-config.d.ts +36 -0
  90. package/lib/dist/project-config.d.ts.map +1 -0
  91. package/lib/dist/project-config.js +235 -0
  92. package/lib/dist/project-config.js.map +1 -0
  93. package/lib/dist/reasoning/config.d.ts +45 -0
  94. package/lib/dist/reasoning/config.d.ts.map +1 -0
  95. package/lib/dist/reasoning/config.js +171 -0
  96. package/lib/dist/reasoning/config.js.map +1 -0
  97. package/lib/dist/reasoning/credentials.d.ts +5 -0
  98. package/lib/dist/reasoning/credentials.d.ts.map +1 -0
  99. package/lib/dist/reasoning/credentials.js +83 -0
  100. package/lib/dist/reasoning/credentials.js.map +1 -0
  101. package/lib/dist/reasoning/login.d.ts +21 -0
  102. package/lib/dist/reasoning/login.d.ts.map +1 -0
  103. package/lib/dist/reasoning/login.js +85 -0
  104. package/lib/dist/reasoning/login.js.map +1 -0
  105. package/lib/dist/reasoning/reasoner.d.ts +43 -0
  106. package/lib/dist/reasoning/reasoner.d.ts.map +1 -0
  107. package/lib/dist/reasoning/reasoner.js +308 -0
  108. package/lib/dist/reasoning/reasoner.js.map +1 -0
  109. package/lib/dist/resolution/c-fnptr-synthesizer.d.ts +33 -0
  110. package/lib/dist/resolution/c-fnptr-synthesizer.d.ts.map +1 -0
  111. package/lib/dist/resolution/c-fnptr-synthesizer.js +352 -0
  112. package/lib/dist/resolution/c-fnptr-synthesizer.js.map +1 -0
  113. package/lib/dist/resolution/callback-synthesizer.d.ts +6 -1
  114. package/lib/dist/resolution/callback-synthesizer.d.ts.map +1 -1
  115. package/lib/dist/resolution/callback-synthesizer.js +1109 -1
  116. package/lib/dist/resolution/callback-synthesizer.js.map +1 -1
  117. package/lib/dist/resolution/frameworks/goframe.d.ts +41 -0
  118. package/lib/dist/resolution/frameworks/goframe.d.ts.map +1 -0
  119. package/lib/dist/resolution/frameworks/goframe.js +112 -0
  120. package/lib/dist/resolution/frameworks/goframe.js.map +1 -0
  121. package/lib/dist/resolution/frameworks/index.d.ts +1 -0
  122. package/lib/dist/resolution/frameworks/index.d.ts.map +1 -1
  123. package/lib/dist/resolution/frameworks/index.js +5 -1
  124. package/lib/dist/resolution/frameworks/index.js.map +1 -1
  125. package/lib/dist/resolution/frameworks/react.d.ts.map +1 -1
  126. package/lib/dist/resolution/frameworks/react.js +17 -60
  127. package/lib/dist/resolution/frameworks/react.js.map +1 -1
  128. package/lib/dist/resolution/goframe-synthesizer.d.ts +28 -0
  129. package/lib/dist/resolution/goframe-synthesizer.d.ts.map +1 -0
  130. package/lib/dist/resolution/goframe-synthesizer.js +158 -0
  131. package/lib/dist/resolution/goframe-synthesizer.js.map +1 -0
  132. package/lib/dist/resolution/name-matcher.d.ts.map +1 -1
  133. package/lib/dist/resolution/name-matcher.js +48 -8
  134. package/lib/dist/resolution/name-matcher.js.map +1 -1
  135. package/lib/dist/resolution/strip-comments.d.ts +1 -1
  136. package/lib/dist/resolution/strip-comments.d.ts.map +1 -1
  137. package/lib/dist/resolution/strip-comments.js +2 -0
  138. package/lib/dist/resolution/strip-comments.js.map +1 -1
  139. package/lib/dist/sync/watcher.d.ts +68 -1
  140. package/lib/dist/sync/watcher.d.ts.map +1 -1
  141. package/lib/dist/sync/watcher.js +212 -14
  142. package/lib/dist/sync/watcher.js.map +1 -1
  143. package/lib/dist/telemetry/index.d.ts +0 -3
  144. package/lib/dist/telemetry/index.d.ts.map +1 -1
  145. package/lib/dist/telemetry/index.js +4 -7
  146. package/lib/dist/telemetry/index.js.map +1 -1
  147. package/lib/dist/upgrade/index.d.ts.map +1 -1
  148. package/lib/dist/upgrade/index.js +40 -4
  149. package/lib/dist/upgrade/index.js.map +1 -1
  150. package/lib/dist/utils.d.ts +14 -1
  151. package/lib/dist/utils.d.ts.map +1 -1
  152. package/lib/dist/utils.js +20 -2
  153. package/lib/dist/utils.js.map +1 -1
  154. package/lib/node_modules/.package-lock.json +1 -1
  155. package/lib/package.json +2 -2
  156. package/package.json +1 -1
@@ -55,6 +55,7 @@ const crypto = __importStar(require("crypto"));
55
55
  const child_process_1 = require("child_process");
56
56
  const tree_sitter_1 = require("./tree-sitter");
57
57
  const grammars_1 = require("./grammars");
58
+ const project_config_1 = require("../project-config");
58
59
  const directory_1 = require("../directory");
59
60
  const errors_1 = require("../errors");
60
61
  const utils_1 = require("../utils");
@@ -65,6 +66,17 @@ const frameworks_1 = require("../resolution/frameworks");
65
66
  * File reads are I/O-bound; batching overlaps I/O wait with CPU parse work.
66
67
  */
67
68
  const FILE_IO_BATCH_SIZE = 10;
69
+ /**
70
+ * How many files the `sync()` reconcile processes between cooperative yields to
71
+ * the event loop. The reconcile runs two O(files) loops of synchronous `fs`
72
+ * calls (existsSync for removals, statSync for adds/mods); on a very large repo
73
+ * (~100k files) an un-yielded run wedges the main thread for minutes, which both
74
+ * trips the liveness watchdog (it SIGKILLs a process whose loop stops turning)
75
+ * and blocks the first MCP tool call behind the catch-up gate (issue #905).
76
+ * Yielding every N files keeps the socket, the watchdog heartbeat, and any
77
+ * concurrent read query responsive while the reconcile runs.
78
+ */
79
+ const SYNC_RECONCILE_YIELD_INTERVAL = 1000;
68
80
  // PARSER_RESET_INTERVAL moved to parse-worker.ts (runs in worker thread)
69
81
  /**
70
82
  * Maximum time (ms) to wait for a single file to parse in the worker thread.
@@ -240,6 +252,33 @@ function buildDefaultIgnore(rootDir) {
240
252
  function defaultsOnlyIgnore() {
241
253
  return (0, ignore_1.default)().add(DEFAULT_IGNORE_PATTERNS);
242
254
  }
255
+ /**
256
+ * Matcher for the project's `codegraph.json` `includeIgnored` patterns — the
257
+ * explicit opt-in to index embedded git repos living inside gitignored
258
+ * directories (#622, #699). Returns `null` when the project opted in nothing,
259
+ * which is the zero-config DEFAULT: `.gitignore` is then fully respected and a
260
+ * gitignored directory (even one holding nested repos) is never walked or
261
+ * indexed (#970, #976). Built once per scan/sync/scope operation from the scan
262
+ * root and threaded down — never global, so multi-project daemons stay isolated.
263
+ */
264
+ function loadIncludeIgnoredMatcher(rootDir) {
265
+ const patterns = (0, project_config_1.loadIncludeIgnoredPatterns)(rootDir);
266
+ return patterns.length > 0 ? (0, ignore_1.default)().add(patterns) : null;
267
+ }
268
+ /**
269
+ * `git ls-files --directory` collapses a wholly-untracked/ignored directory into
270
+ * one entry — and when the command's own cwd is such a directory (the indexed
271
+ * root is itself a git-ignored subdir of an enclosing repo), git emits the
272
+ * literal `./` meaning "this entire directory". That sentinel is not a real
273
+ * nested path: feeding it to the `ignore` matcher throws ("path should be a
274
+ * `path.relative()`d string, but got "./""), which used to abort `buildScopeIgnore`
275
+ * and so break the MCP daemon's watcher/auto-sync on connect; and joining it back
276
+ * onto `repoDir` would just re-point at the cwd. Drop it wherever we consume
277
+ * `--directory` output. (#936)
278
+ */
279
+ function isWholeCwdEntry(entry) {
280
+ return entry === './' || entry === '.' || entry === '';
281
+ }
243
282
  /**
244
283
  * List the gitignored DIRECTORIES of a repo (collapsed, trailing-slash form),
245
284
  * relative to `repoDir`. These are invisible to every other `git ls-files` /
@@ -250,7 +289,7 @@ function defaultsOnlyIgnore() {
250
289
  function listIgnoredDirs(repoDir) {
251
290
  try {
252
291
  const out = (0, child_process_1.execFileSync)('git', ['ls-files', '-z', '-o', '-i', '--exclude-standard', '--directory'], { cwd: repoDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true });
253
- return out.split('\0').filter((e) => e.endsWith('/'));
292
+ return out.split('\0').filter((e) => e.endsWith('/') && !isWholeCwdEntry(e));
254
293
  }
255
294
  catch {
256
295
  return [];
@@ -268,8 +307,10 @@ const EMBEDDED_REPO_SEARCH_ENTRIES = 2000;
268
307
  * - A `.git` **file** is a pointer (`gitdir: …`). A git **worktree** points into
269
308
  * the host repo's own `.git/worktrees/<name>`, so it is a second working view
270
309
  * of a repo CodeGraph already indexes — indexing it just duplicates the whole
271
- * graph N times; skip it (#848). A **submodule** points into `.git/modules/`
272
- * and is distinct code, so index it as before.
310
+ * graph N times; skip it (#848). A **submodule worktree** points into
311
+ * `.git/modules/<module>/worktrees/<name>` same duplication, so skip it too
312
+ * (#945). A **submodule** checkout points into `.git/modules/<module>` (no
313
+ * `worktrees/` segment) and is distinct code, so index it as before.
273
314
  *
274
315
  * Returns `'none'` when there is no `.git` entry here.
275
316
  */
@@ -287,9 +328,12 @@ function classifyGitDir(absDir) {
287
328
  return 'none';
288
329
  try {
289
330
  const gitdir = fs.readFileSync(path.join(absDir, '.git'), 'utf8').match(/^gitdir:\s*(.+)$/m)?.[1]?.trim();
290
- // A linked worktree's gitdir lives under some repo's `.git/worktrees/`.
331
+ // A worktree's gitdir lives under some repo's `.git/worktrees/<name>` —
332
+ // either the top-level repo's (`.git/worktrees/`) or, for a worktree of a
333
+ // submodule, that submodule's gitdir (`.git/modules/<module>/worktrees/`).
334
+ // The optional `modules/<module>` segment covers the submodule case (#945).
291
335
  // Match both separators so a Windows-style pointer is recognized too.
292
- if (gitdir && /(^|[\\/])\.git[\\/]worktrees[\\/]/.test(gitdir))
336
+ if (gitdir && /(^|[\\/])\.git[\\/](modules[\\/][^\\/]+[\\/])?worktrees[\\/]/.test(gitdir))
293
337
  return 'worktree';
294
338
  }
295
339
  catch {
@@ -398,9 +442,12 @@ function buildScopeIgnore(rootDir, embeddedRoots) {
398
442
  }
399
443
  /**
400
444
  * Standalone discovery of every embedded repo root under `rootDir` (relative,
401
- * trailing-slashed) — both the untracked kind (#193) and the gitignored kind
402
- * (#514), recursively (an embedded repo can embed further repos). Returns []
403
- * for non-git roots: the filesystem walk handles nested repos there already.
445
+ * trailing-slashed) — the untracked kind (#193) always, and the gitignored kind
446
+ * (#514) only for directories the project opted in via `codegraph.json`
447
+ * `includeIgnored` (#622, #699); otherwise `.gitignore` is respected and they
448
+ * are not discovered (#970, #976). Recursive (an embedded repo can embed further
449
+ * repos). Returns [] for non-git roots: the filesystem walk handles nested repos
450
+ * there already.
404
451
  */
405
452
  function discoverEmbeddedRepoRoots(rootDir) {
406
453
  try {
@@ -411,18 +458,19 @@ function discoverEmbeddedRepoRoots(rootDir) {
411
458
  }
412
459
  const out = [];
413
460
  const defaults = defaultsOnlyIgnore();
461
+ const includeIgnored = loadIncludeIgnoredMatcher(rootDir);
414
462
  const visit = (repoAbs, prefix) => {
415
463
  const candidates = [];
416
464
  try {
417
465
  const o = (0, child_process_1.execFileSync)('git', ['ls-files', '-z', '-o', '--exclude-standard', '--directory'], { cwd: repoAbs, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true });
418
466
  for (const e of o.split('\0')) {
419
- if (e.endsWith('/') && !defaults.ignores(e)) {
467
+ if (e.endsWith('/') && !isWholeCwdEntry(e) && !defaults.ignores(e)) {
420
468
  candidates.push(...findNestedGitRepos(path.join(repoAbs, e), e));
421
469
  }
422
470
  }
423
471
  }
424
472
  catch { /* untracked listing failed — ignored-side discovery still runs */ }
425
- candidates.push(...findIgnoredEmbeddedRepos(repoAbs));
473
+ candidates.push(...findIgnoredEmbeddedRepos(repoAbs, includeIgnored, prefix));
426
474
  for (const rel of candidates) {
427
475
  const full = (0, utils_1.normalizePath)(prefix + rel);
428
476
  out.push(full);
@@ -433,16 +481,30 @@ function discoverEmbeddedRepoRoots(rootDir) {
433
481
  return out;
434
482
  }
435
483
  /**
436
- * Discover embedded repos hidden by `repoDir`'s OWN ignore rules: for each
437
- * gitignored directory (skipping built-in default excludes), search for nested
438
- * `.git` roots. Returns repo paths relative to `repoDir`, trailing-slashed.
484
+ * Discover embedded repos hidden by `repoDir`'s OWN gitignore rules: for each
485
+ * gitignored directory, search for nested `.git` roots. Returns repo paths
486
+ * relative to `repoDir`, trailing-slashed.
487
+ *
488
+ * OPT-IN ONLY. Walking into a gitignored directory contradicts what every other
489
+ * tool (and CodeGraph's own `git ls-files` foundation) does — `.gitignore`
490
+ * excludes. So this returns `[]` unless the project opted the directory in via
491
+ * `codegraph.json` `includeIgnored`; without that, a gitignored dir — including
492
+ * a huge reference/data dir full of nested clones — is left untouched (#970,
493
+ * #976). When opted in, it restores the super-repo-of-clones behavior (#622,
494
+ * #699). `prefix` is the scan-root-relative path of `repoDir`, so a pattern like
495
+ * `services/` opts that whole subtree in at any recursion depth. Built-in
496
+ * default excludes (`node_modules`, …) are always skipped.
439
497
  */
440
- function findIgnoredEmbeddedRepos(repoDir) {
498
+ function findIgnoredEmbeddedRepos(repoDir, includeIgnored, prefix) {
499
+ if (!includeIgnored)
500
+ return [];
441
501
  const defaults = defaultsOnlyIgnore();
442
502
  const repos = [];
443
503
  for (const dir of listIgnoredDirs(repoDir)) {
444
504
  if (defaults.ignores(dir))
445
505
  continue;
506
+ if (!includeIgnored.ignores((0, utils_1.normalizePath)(prefix + dir)))
507
+ continue;
446
508
  repos.push(...findNestedGitRepos(path.join(repoDir, dir), dir));
447
509
  }
448
510
  return repos;
@@ -458,12 +520,15 @@ function findIgnoredEmbeddedRepos(repoDir) {
458
520
  * skips them entirely, and untracked output reports them only as an opaque
459
521
  * "subdir/" entry (trailing slash) rather than expanding their files. Each
460
522
  * embedded repo is its own git boundary, so we re-run `git ls-files` inside it.
461
- * (See issue #193.) GITIGNORED embedded repos are invisible even to that
462
- * they're discovered separately via `findIgnoredEmbeddedRepos` (#514); every
463
- * embedded repo root (however found) is recorded in `embeddedRoots` so callers
464
- * can exempt its files from the parent's own gitignore rules.
523
+ * (See issue #193.) GITIGNORED embedded repos are invisible even to that; they
524
+ * are discovered separately via `findIgnoredEmbeddedRepos` (#514) but ONLY for
525
+ * directories the project opted in through `codegraph.json` `includeIgnored`
526
+ * (`includeIgnored` here, threaded from the scan root) — by default `.gitignore`
527
+ * is respected and they stay out (#970, #976). Every embedded repo root (however
528
+ * found) is recorded in `embeddedRoots` so callers can exempt its files from the
529
+ * parent's own gitignore rules.
465
530
  */
466
- function collectGitFiles(repoDir, prefix, files, embeddedRoots) {
531
+ function collectGitFiles(repoDir, prefix, files, embeddedRoots, includeIgnored = null) {
467
532
  const gitOpts = { cwd: repoDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true };
468
533
  // Tracked files. --recurse-submodules pulls in files from active submodules,
469
534
  // which the index would otherwise represent only as a commit pointer.
@@ -497,18 +562,20 @@ function collectGitFiles(repoDir, prefix, files, embeddedRoots) {
497
562
  // it's a duplicate working view of an already-indexed repo (#848).
498
563
  if (classifyGitDir(childDir) === 'embedded' && !defaultsOnlyIgnore().ignores(rel)) {
499
564
  embeddedRoots?.add((0, utils_1.normalizePath)(prefix + rel));
500
- collectGitFiles(childDir, prefix + rel, files, embeddedRoots);
565
+ collectGitFiles(childDir, prefix + rel, files, embeddedRoots, includeIgnored);
501
566
  }
502
567
  continue;
503
568
  }
504
569
  files.add((0, utils_1.normalizePath)(prefix + rel));
505
570
  }
506
571
  // Embedded repos hidden by THIS repo's ignore rules (`/packages/` in a
507
- // super-repo .gitignore) never appear in any listing above discover and
508
- // recurse into them too. (#514)
509
- for (const rel of findIgnoredEmbeddedRepos(repoDir)) {
572
+ // super-repo .gitignore) never appear in any listing above. By default they
573
+ // stay hidden `.gitignore` is respected (#970, #976). They are recursed into
574
+ // only when the project opted the directory in via `codegraph.json`
575
+ // `includeIgnored` (#622, #699), which `findIgnoredEmbeddedRepos` enforces.
576
+ for (const rel of findIgnoredEmbeddedRepos(repoDir, includeIgnored, prefix)) {
510
577
  embeddedRoots?.add((0, utils_1.normalizePath)(prefix + rel));
511
- collectGitFiles(path.join(repoDir, rel), prefix + rel, files, embeddedRoots);
578
+ collectGitFiles(path.join(repoDir, rel), prefix + rel, files, embeddedRoots, includeIgnored);
512
579
  }
513
580
  }
514
581
  /**
@@ -536,7 +603,7 @@ function getGitVisibleFiles(rootDir) {
536
603
  }
537
604
  const files = new Set();
538
605
  const embeddedRoots = new Set();
539
- collectGitFiles(rootDir, '', files, embeddedRoots);
606
+ collectGitFiles(rootDir, '', files, embeddedRoots, loadIncludeIgnoredMatcher(rootDir));
540
607
  // Apply built-in default ignores uniformly — to tracked files too, since
541
608
  // committing a dependency/build dir doesn't make it project code. A
542
609
  // `.gitignore` negation (e.g. `!vendor/`) is the explicit opt-in. (issue #407)
@@ -554,26 +621,42 @@ function getGitVisibleFiles(rootDir) {
554
621
  * Use `git status` to detect changed files instead of scanning every file.
555
622
  * Returns null on failure so callers fall back to full scan.
556
623
  *
557
- * Recurses into embedded repos — both the untracked kind (#193: the parent's
558
- * status collapses them to an opaque `?? subdir/` entry) and the gitignored
559
- * kind (#514: they never appear in the parent's status at all) running
560
- * `git status` inside each, so changes in a multi-repo workspace sync without
561
- * a full rescan. Deleting an ENTIRE embedded repo dir is the one case this
562
- * cannot see (the child status that would report the deletions is gone with
563
- * it); a full `codegraph index` reconciles that.
624
+ * Recurses into embedded repos — the untracked kind (#193: the parent's status
625
+ * collapses them to an opaque `?? subdir/` entry) always, and the gitignored
626
+ * kind (#514: they never appear in the parent's status at all) only for
627
+ * directories opted in via `codegraph.json` `includeIgnored` (#622, #699)
628
+ * running `git status` inside each, so changes in a multi-repo workspace sync
629
+ * without a full rescan. By default a gitignored dir is left alone, matching the
630
+ * full-index scan (#970, #976). Deleting an ENTIRE embedded repo dir is the one
631
+ * case this cannot see (the child status that would report the deletions is gone
632
+ * with it); a full `codegraph index` reconciles that.
564
633
  */
565
634
  function getGitChangedFiles(rootDir) {
566
635
  try {
567
636
  const changes = { modified: [], added: [], deleted: [] };
568
- collectGitStatus(rootDir, '', changes);
637
+ // Custom extension → language overrides from the project's codegraph.json,
638
+ // so change detection sees the same custom-extension files the full index does.
639
+ const overrides = (0, project_config_1.loadExtensionOverrides)(rootDir);
640
+ collectGitStatus(rootDir, '', changes, overrides, loadIncludeIgnoredMatcher(rootDir));
569
641
  return changes;
570
642
  }
571
643
  catch {
572
644
  return null;
573
645
  }
574
646
  }
575
- function collectGitStatus(repoDir, prefix, out) {
647
+ function collectGitStatus(repoDir, prefix, out, overrides, includeIgnored = null) {
576
648
  const output = (0, child_process_1.execFileSync)('git', ['status', '--porcelain', '--no-renames'], { cwd: repoDir, encoding: 'utf-8', timeout: 10000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true });
649
+ // This repo's own ignore rules — built-in defaults (#407) plus its .gitignore.
650
+ // Change detection must exclude the SAME files the full index does, but git
651
+ // status hides neither: it ignores nothing for *tracked* paths, and the
652
+ // built-in defaults aren't gitignore at all. Without this filter a committed
653
+ // vendor/ dir, or a tracked file under a .gitignored dir, surfaces here as a
654
+ // change — so `codegraph status` (which reads getChangedFiles) reports a
655
+ // pending edit the full index never tracks and `sync` never clears. Matching
656
+ // repo-relative `rel` at each recursion level mirrors getGitVisibleFiles'
657
+ // ScopeIgnore: every embedded repo is judged by ITS OWN rules, never the
658
+ // parent's. (#766)
659
+ const ig = buildDefaultIgnore(repoDir);
577
660
  const untrackedDirs = [];
578
661
  for (const line of output.split('\n')) {
579
662
  if (line.length < 4)
@@ -587,29 +670,38 @@ function collectGitStatus(repoDir, prefix, out) {
587
670
  continue;
588
671
  }
589
672
  const filePath = (0, utils_1.normalizePath)(prefix + rel);
590
- // Skip non-source files (git status already omits .gitignored paths).
591
- if (!(0, grammars_1.isSourceFile)(filePath))
673
+ if (!(0, grammars_1.isSourceFile)(filePath, overrides))
674
+ continue;
675
+ if (statusCode.includes('D')) {
676
+ // Deletions stay unfiltered: getChangedFiles acts on one only when the
677
+ // path is already tracked in the DB, where removal is always correct — and
678
+ // that lets a newly-excluded dir's stale rows clean themselves up. (#766)
679
+ out.deleted.push(filePath);
680
+ continue;
681
+ }
682
+ // Added (`??`) / modified files inside an excluded dir must not enter the
683
+ // index — match against the repo-relative path, same as the full scan. (#766)
684
+ if (ig.ignores(rel))
592
685
  continue;
593
686
  if (statusCode === '??') {
594
687
  out.added.push(filePath);
595
688
  }
596
- else if (statusCode.includes('D')) {
597
- out.deleted.push(filePath);
598
- }
599
689
  else {
600
690
  // M, MM, AM, A (staged), etc. — treat as modified
601
691
  out.modified.push(filePath);
602
692
  }
603
693
  }
604
694
  // Recurse embedded repos found under untracked dirs (at the dir itself or
605
- // nested deeper) and under this repo's gitignored dirs.
695
+ // nested deeper). Gitignored dirs are walked only for the directories the
696
+ // project opted in via `includeIgnored`; by default `.gitignore` is respected
697
+ // and they are left alone (#970, #976), mirroring the full-index scan.
606
698
  for (const rel of untrackedDirs) {
607
699
  for (const repoRel of findNestedGitRepos(path.join(repoDir, rel), rel)) {
608
- collectGitStatus(path.join(repoDir, repoRel), prefix + repoRel, out);
700
+ collectGitStatus(path.join(repoDir, repoRel), prefix + repoRel, out, overrides, includeIgnored);
609
701
  }
610
702
  }
611
- for (const rel of findIgnoredEmbeddedRepos(repoDir)) {
612
- collectGitStatus(path.join(repoDir, rel), prefix + rel, out);
703
+ for (const rel of findIgnoredEmbeddedRepos(repoDir, includeIgnored, prefix)) {
704
+ collectGitStatus(path.join(repoDir, rel), prefix + rel, out, overrides, includeIgnored);
613
705
  }
614
706
  }
615
707
  /**
@@ -620,13 +712,15 @@ function collectGitStatus(repoDir, prefix, out) {
620
712
  * projects, falls back to a filesystem walk that parses .gitignore itself.
621
713
  */
622
714
  function scanDirectory(rootDir, onProgress) {
715
+ // Custom extension → language overrides from the project's codegraph.json.
716
+ const overrides = (0, project_config_1.loadExtensionOverrides)(rootDir);
623
717
  // Fast path: use git to get all visible files (respects .gitignore everywhere)
624
718
  const gitFiles = getGitVisibleFiles(rootDir);
625
719
  if (gitFiles) {
626
720
  const files = [];
627
721
  let count = 0;
628
722
  for (const filePath of gitFiles) {
629
- if ((0, grammars_1.isSourceFile)(filePath)) {
723
+ if ((0, grammars_1.isSourceFile)(filePath, overrides)) {
630
724
  files.push(filePath);
631
725
  count++;
632
726
  onProgress?.(count, filePath);
@@ -642,12 +736,14 @@ function scanDirectory(rootDir, onProgress) {
642
736
  * allowing worker threads to receive and render progress messages.
643
737
  */
644
738
  async function scanDirectoryAsync(rootDir, onProgress) {
739
+ // Custom extension → language overrides from the project's codegraph.json.
740
+ const overrides = (0, project_config_1.loadExtensionOverrides)(rootDir);
645
741
  const gitFiles = getGitVisibleFiles(rootDir);
646
742
  if (gitFiles) {
647
743
  const files = [];
648
744
  let count = 0;
649
745
  for (const filePath of gitFiles) {
650
- if ((0, grammars_1.isSourceFile)(filePath)) {
746
+ if ((0, grammars_1.isSourceFile)(filePath, overrides)) {
651
747
  files.push(filePath);
652
748
  count++;
653
749
  onProgress?.(count, filePath);
@@ -668,6 +764,8 @@ function scanDirectoryWalk(rootDir, onProgress) {
668
764
  const files = [];
669
765
  let count = 0;
670
766
  const visitedDirs = new Set();
767
+ // Custom extension → language overrides from the project's codegraph.json.
768
+ const overrides = (0, project_config_1.loadExtensionOverrides)(rootDir);
671
769
  const loadIgnore = (dir) => {
672
770
  const giPath = path.join(dir, '.gitignore');
673
771
  if (!fs.existsSync(giPath))
@@ -734,7 +832,7 @@ function scanDirectoryWalk(rootDir, onProgress) {
734
832
  }
735
833
  }
736
834
  else if (stat.isFile()) {
737
- if (!isIgnored(fullPath, false, active) && (0, grammars_1.isSourceFile)(relativePath)) {
835
+ if (!isIgnored(fullPath, false, active) && (0, grammars_1.isSourceFile)(relativePath, overrides)) {
738
836
  files.push(relativePath);
739
837
  count++;
740
838
  onProgress?.(count, relativePath);
@@ -752,7 +850,7 @@ function scanDirectoryWalk(rootDir, onProgress) {
752
850
  }
753
851
  }
754
852
  else if (entry.isFile()) {
755
- if (!isIgnored(fullPath, false, active) && (0, grammars_1.isSourceFile)(relativePath)) {
853
+ if (!isIgnored(fullPath, false, active) && (0, grammars_1.isSourceFile)(relativePath, overrides)) {
756
854
  files.push(relativePath);
757
855
  count++;
758
856
  onProgress?.(count, relativePath);
@@ -866,6 +964,10 @@ class ExtractionOrchestrator {
866
964
  let filesErrored = 0;
867
965
  let totalNodes = 0;
868
966
  let totalEdges = 0;
967
+ // Custom extension → language overrides from the project's codegraph.json.
968
+ // Threaded into language detection so custom-extension files load the right
969
+ // grammar and store under the mapped language.
970
+ const overrides = (0, project_config_1.loadExtensionOverrides)(this.rootDir);
869
971
  const log = verbose
870
972
  ? (msg) => { console.log(`[worker] ${msg}`); }
871
973
  : (_msg) => { };
@@ -915,7 +1017,7 @@ class ExtractionOrchestrator {
915
1017
  });
916
1018
  await new Promise(resolve => setImmediate(resolve));
917
1019
  // Detect needed languages and load grammars in the parse worker
918
- const neededLanguages = [...new Set(files.map((f) => (0, grammars_1.detectLanguage)(f)))];
1020
+ const neededLanguages = [...new Set(files.map((f) => (0, grammars_1.detectLanguage)(f, undefined, overrides)))];
919
1021
  // .h files default to 'c' but may be C++ — ensure cpp grammar is loaded when c is needed
920
1022
  if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
921
1023
  neededLanguages.push('cpp');
@@ -1014,9 +1116,13 @@ class ExtractionOrchestrator {
1014
1116
  w.terminate().catch(() => { });
1015
1117
  }
1016
1118
  async function requestParse(filePath, content) {
1119
+ // Resolve the language on the main thread (where the project's
1120
+ // codegraph.json overrides are loaded) and hand it to the worker, so the
1121
+ // worker never needs the override map itself.
1122
+ const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
1017
1123
  if (!WorkerClass) {
1018
1124
  // In-process fallback
1019
- return (0, tree_sitter_1.extractFromSource)(filePath, content, (0, grammars_1.detectLanguage)(filePath, content), frameworkNames);
1125
+ return (0, tree_sitter_1.extractFromSource)(filePath, content, language, frameworkNames);
1020
1126
  }
1021
1127
  // Recycle the worker before the next parse if we've hit the threshold.
1022
1128
  // This destroys the WASM linear memory (which can grow but never shrink)
@@ -1041,7 +1147,7 @@ class ExtractionOrchestrator {
1041
1147
  worker.terminate().catch(() => { });
1042
1148
  }, timeoutMs);
1043
1149
  pendingParses.set(id, { resolve, reject, timer });
1044
- worker.postMessage({ type: 'parse', id, filePath, content, frameworkNames });
1150
+ worker.postMessage({ type: 'parse', id, filePath, content, frameworkNames, language });
1045
1151
  });
1046
1152
  }
1047
1153
  for (let i = 0; i < files.length; i += FILE_IO_BATCH_SIZE) {
@@ -1063,7 +1169,10 @@ class ExtractionOrchestrator {
1063
1169
  // Read files in parallel (with path validation before any I/O)
1064
1170
  const fileContents = await Promise.all(batch.map(async (fp) => {
1065
1171
  try {
1066
- const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, fp);
1172
+ // Indexing read: follow in-root symlinks the directory walk already
1173
+ // descended into (the `../` guard still applies) so files reached
1174
+ // via an in-root symlink-to-outside still index (#935).
1175
+ const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, fp, { allowSymlinkEscape: true });
1067
1176
  if (!fullPath) {
1068
1177
  (0, errors_1.logWarn)('Path traversal blocked in batch reader', { filePath: fp });
1069
1178
  return { filePath: fp, content: null, stats: null, error: new Error('Path traversal blocked') };
@@ -1147,7 +1256,7 @@ class ExtractionOrchestrator {
1147
1256
  processed++;
1148
1257
  // Store in database on main thread (SQLite is not thread-safe)
1149
1258
  if (result.nodes.length > 0 || result.errors.length === 0) {
1150
- const language = (0, grammars_1.detectLanguage)(filePath, content);
1259
+ const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
1151
1260
  this.storeExtractionResult(filePath, content, language, stats, result);
1152
1261
  }
1153
1262
  if (result.errors.length > 0) {
@@ -1169,7 +1278,7 @@ class ExtractionOrchestrator {
1169
1278
  // Files with no symbols but no errors (yaml, twig, properties) are
1170
1279
  // tracked at the file level — count them as indexed so the CLI
1171
1280
  // doesn't misleadingly report "No files found to index".
1172
- const lang = (0, grammars_1.detectLanguage)(filePath, content);
1281
+ const lang = (0, grammars_1.detectLanguage)(filePath, content, overrides);
1173
1282
  if ((0, grammars_1.isFileLevelOnlyLanguage)(lang)) {
1174
1283
  filesIndexed++;
1175
1284
  }
@@ -1222,7 +1331,7 @@ class ExtractionOrchestrator {
1222
1331
  continue;
1223
1332
  }
1224
1333
  if (result.nodes.length > 0 || result.errors.length === 0) {
1225
- const language = (0, grammars_1.detectLanguage)(filePath, content);
1334
+ const language = (0, grammars_1.detectLanguage)(filePath, content, overrides);
1226
1335
  const stats = await fsp.stat(path.join(this.rootDir, filePath));
1227
1336
  this.storeExtractionResult(filePath, content, language, stats, result);
1228
1337
  const idx = errors.indexOf(errEntry);
@@ -1270,7 +1379,7 @@ class ExtractionOrchestrator {
1270
1379
  continue;
1271
1380
  }
1272
1381
  if (result.nodes.length > 0 || result.errors.length === 0) {
1273
- const language = (0, grammars_1.detectLanguage)(filePath, fullContent);
1382
+ const language = (0, grammars_1.detectLanguage)(filePath, fullContent, overrides);
1274
1383
  const stats = await fsp.stat(path.join(this.rootDir, filePath));
1275
1384
  this.storeExtractionResult(filePath, fullContent, language, stats, result);
1276
1385
  const idx = errors.indexOf(errEntry);
@@ -1350,7 +1459,8 @@ class ExtractionOrchestrator {
1350
1459
  * Index a single file
1351
1460
  */
1352
1461
  async indexFile(relativePath) {
1353
- const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, relativePath);
1462
+ // Indexing read: follow in-root symlinks (the `../` guard still applies), #935.
1463
+ const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, relativePath, { allowSymlinkEscape: true });
1354
1464
  if (!fullPath) {
1355
1465
  return {
1356
1466
  nodes: [],
@@ -1390,8 +1500,8 @@ class ExtractionOrchestrator {
1390
1500
  * Used by the parallel batch reader to avoid redundant file I/O.
1391
1501
  */
1392
1502
  async indexFileWithContent(relativePath, content, stats) {
1393
- // Prevent path traversal
1394
- const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, relativePath);
1503
+ // Prevent `../` traversal; follow in-root symlinks like the directory walk (#935).
1504
+ const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, relativePath, { allowSymlinkEscape: true });
1395
1505
  if (!fullPath) {
1396
1506
  (0, errors_1.logWarn)('Path traversal blocked in indexFileWithContent', { relativePath });
1397
1507
  return {
@@ -1419,8 +1529,8 @@ class ExtractionOrchestrator {
1419
1529
  durationMs: 0,
1420
1530
  };
1421
1531
  }
1422
- // Detect language
1423
- const language = (0, grammars_1.detectLanguage)(relativePath, content);
1532
+ // Detect language (honoring the project's codegraph.json extension overrides)
1533
+ const language = (0, grammars_1.detectLanguage)(relativePath, content, (0, project_config_1.loadExtensionOverrides)(this.rootDir));
1424
1534
  if (!(0, grammars_1.isLanguageSupported)(language)) {
1425
1535
  return {
1426
1536
  nodes: [],
@@ -1451,6 +1561,25 @@ class ExtractionOrchestrator {
1451
1561
  if (existingFile && existingFile.contentHash === contentHash) {
1452
1562
  return; // No changes
1453
1563
  }
1564
+ // Snapshot incoming cross-file edges BEFORE deleting this file's nodes.
1565
+ // `deleteFile` cascades to delete every edge whose source OR target is a
1566
+ // node in this file (edges.FK ... ON DELETE CASCADE). Edges whose SOURCE is
1567
+ // in this file are re-emitted by the extractor below, but edges whose SOURCE
1568
+ // is in a *different* (unchanged) file are not — they would be silently
1569
+ // dropped, which is issue #899: re-indexing a callee file severs `calls`/
1570
+ // `references` edges from callers that import it via module-attribute
1571
+ // access (`pkg.mod.fn(...)`).
1572
+ //
1573
+ // We snapshot the edge plus the target node's (name, kind) so we can
1574
+ // re-resolve to the re-indexed target's NEW id. Node ids are
1575
+ // `sha256(filePath:kind:name:line)`, so any line shift in the callee file
1576
+ // (e.g. a docstring-only edit above the symbol) changes every target id and
1577
+ // a naive re-insert by old id would silently drop every edge. Matching by
1578
+ // (filePath, kind, name) is stable across line shifts; if the symbol was
1579
+ // renamed/removed, no match is found and the edge stays dropped (correct).
1580
+ const crossFileIncomingEdges = existingFile
1581
+ ? this.queries.getCrossFileIncomingEdgesWithTarget(filePath)
1582
+ : [];
1454
1583
  // Delete existing data for this file
1455
1584
  if (existingFile) {
1456
1585
  this.queries.deleteFile(filePath);
@@ -1471,6 +1600,31 @@ class ExtractionOrchestrator {
1471
1600
  this.queries.insertEdges(validEdges);
1472
1601
  }
1473
1602
  }
1603
+ // Re-insert cross-file incoming edges snapshotted before the delete,
1604
+ // re-resolving each edge's target to the re-indexed node's new id by
1605
+ // (filePath, kind, name). Node ids include the source line, so any line
1606
+ // shift in the callee file (e.g. a docstring-only edit above the symbol)
1607
+ // changes every target id and a naive re-insert by old id would drop them
1608
+ // all. `insertEdges` still filters to endpoints that exist, so edges whose
1609
+ // caller (source) was deleted, or whose callee (target) was renamed/removed
1610
+ // during the re-index (no match in `newTargetIds`), are dropped. This
1611
+ // closes the #899 edge-drop on `sync`.
1612
+ if (crossFileIncomingEdges.length > 0) {
1613
+ const newNodesByKindName = new Map();
1614
+ for (const n of validNodes) {
1615
+ newNodesByKindName.set(`${n.kind}\0${n.name}`, n.id);
1616
+ }
1617
+ const reinserted = [];
1618
+ for (const e of crossFileIncomingEdges) {
1619
+ const newTargetId = newNodesByKindName.get(`${e.targetKind}\0${e.targetName}`);
1620
+ if (newTargetId) {
1621
+ reinserted.push({ source: e.source, target: newTargetId, kind: e.kind, metadata: e.metadata, line: e.line, column: e.column, provenance: e.provenance });
1622
+ }
1623
+ }
1624
+ if (reinserted.length > 0) {
1625
+ this.queries.insertEdges(reinserted);
1626
+ }
1627
+ }
1474
1628
  // Insert unresolved references in batch with denormalized filePath/language
1475
1629
  if (result.unresolvedReferences.length > 0) {
1476
1630
  const insertedIds = new Set(validNodes.map((n) => n.id));
@@ -1530,7 +1684,7 @@ class ExtractionOrchestrator {
1530
1684
  // whether or not the project uses git, and crucially also catches committed
1531
1685
  // changes from `git pull`/`checkout`/`merge`/`rebase` — which `git status`
1532
1686
  // cannot see, because the working tree is clean afterward.
1533
- const currentFiles = scanDirectory(this.rootDir);
1687
+ const currentFiles = await scanDirectoryAsync(this.rootDir);
1534
1688
  filesChecked = currentFiles.length;
1535
1689
  const currentSet = new Set(currentFiles);
1536
1690
  const trackedFiles = this.queries.getAllFiles();
@@ -1541,14 +1695,26 @@ class ExtractionOrchestrator {
1541
1695
  // Removals: tracked in the DB but no longer a present source file. Check the
1542
1696
  // filesystem directly — `scanDirectory` (via `git ls-files`) still lists a
1543
1697
  // file deleted from disk but not yet staged, so set membership alone misses it.
1698
+ // `reconcileChecks` drives the cooperative yield shared with the adds/mods loop
1699
+ // below (see SYNC_RECONCILE_YIELD_INTERVAL / issue #905).
1700
+ let reconcileChecks = 0;
1544
1701
  for (const tracked of trackedFiles) {
1545
1702
  if (!currentSet.has(tracked.path) || !fs.existsSync(path.join(this.rootDir, tracked.path))) {
1546
1703
  this.queries.deleteFile(tracked.path);
1547
1704
  filesRemoved++;
1548
1705
  }
1706
+ if (++reconcileChecks % SYNC_RECONCILE_YIELD_INTERVAL === 0) {
1707
+ await new Promise((resolve) => setImmediate(resolve));
1708
+ }
1549
1709
  }
1550
1710
  // Adds / modifications.
1551
1711
  for (const filePath of currentFiles) {
1712
+ // Same cooperative yield as the removals loop — this is the other O(files)
1713
+ // synchronous-stat loop that wedges the main thread on a large repo (#905).
1714
+ // Yield at the top of the body so the `continue` fast-paths below still hit it.
1715
+ if (++reconcileChecks % SYNC_RECONCILE_YIELD_INTERVAL === 0) {
1716
+ await new Promise((resolve) => setImmediate(resolve));
1717
+ }
1552
1718
  const fullPath = path.join(this.rootDir, filePath);
1553
1719
  const tracked = trackedMap.get(filePath);
1554
1720
  // Cheap pre-filter: an already-indexed file whose size AND mtime both match
@@ -1591,7 +1757,8 @@ class ExtractionOrchestrator {
1591
1757
  }
1592
1758
  // Load only grammars needed for changed files
1593
1759
  if (filesToIndex.length > 0) {
1594
- const neededLanguages = [...new Set(filesToIndex.map((f) => (0, grammars_1.detectLanguage)(f)))];
1760
+ const overrides = (0, project_config_1.loadExtensionOverrides)(this.rootDir);
1761
+ const neededLanguages = [...new Set(filesToIndex.map((f) => (0, grammars_1.detectLanguage)(f, undefined, overrides)))];
1595
1762
  // .h files default to 'c' but may be C++ — ensure cpp grammar is loaded
1596
1763
  if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
1597
1764
  neededLanguages.push('cpp');