@optave/codegraph 3.9.4 → 3.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/README.md +10 -10
  2. package/dist/cli/commands/watch.d.ts.map +1 -1
  3. package/dist/cli/commands/watch.js +2 -0
  4. package/dist/cli/commands/watch.js.map +1 -1
  5. package/dist/cli.js +24 -1
  6. package/dist/cli.js.map +1 -1
  7. package/dist/domain/graph/builder/context.d.ts +2 -0
  8. package/dist/domain/graph/builder/context.d.ts.map +1 -1
  9. package/dist/domain/graph/builder/context.js.map +1 -1
  10. package/dist/domain/graph/builder/helpers.d.ts +13 -2
  11. package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
  12. package/dist/domain/graph/builder/helpers.js +30 -4
  13. package/dist/domain/graph/builder/helpers.js.map +1 -1
  14. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  15. package/dist/domain/graph/builder/pipeline.js +129 -3
  16. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  17. package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
  18. package/dist/domain/graph/builder/stages/collect-files.js +58 -26
  19. package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
  20. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  21. package/dist/domain/graph/builder/stages/detect-changes.js +54 -45
  22. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  23. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  24. package/dist/domain/graph/builder/stages/finalize.js +17 -0
  25. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  26. package/dist/domain/graph/journal.d.ts +15 -0
  27. package/dist/domain/graph/journal.d.ts.map +1 -1
  28. package/dist/domain/graph/journal.js +283 -28
  29. package/dist/domain/graph/journal.js.map +1 -1
  30. package/dist/domain/graph/watcher.d.ts +17 -0
  31. package/dist/domain/graph/watcher.d.ts.map +1 -1
  32. package/dist/domain/graph/watcher.js +23 -7
  33. package/dist/domain/graph/watcher.js.map +1 -1
  34. package/dist/domain/parser.d.ts +13 -4
  35. package/dist/domain/parser.d.ts.map +1 -1
  36. package/dist/domain/parser.js +174 -80
  37. package/dist/domain/parser.js.map +1 -1
  38. package/dist/domain/search/generator.d.ts.map +1 -1
  39. package/dist/domain/search/generator.js +28 -2
  40. package/dist/domain/search/generator.js.map +1 -1
  41. package/dist/domain/wasm-worker-entry.d.ts +24 -0
  42. package/dist/domain/wasm-worker-entry.d.ts.map +1 -0
  43. package/dist/domain/wasm-worker-entry.js +643 -0
  44. package/dist/domain/wasm-worker-entry.js.map +1 -0
  45. package/dist/domain/wasm-worker-pool.d.ts +59 -0
  46. package/dist/domain/wasm-worker-pool.d.ts.map +1 -0
  47. package/dist/domain/wasm-worker-pool.js +312 -0
  48. package/dist/domain/wasm-worker-pool.js.map +1 -0
  49. package/dist/domain/wasm-worker-protocol.d.ts +65 -0
  50. package/dist/domain/wasm-worker-protocol.d.ts.map +1 -0
  51. package/dist/domain/wasm-worker-protocol.js +13 -0
  52. package/dist/domain/wasm-worker-protocol.js.map +1 -0
  53. package/dist/extractors/javascript.js +146 -2
  54. package/dist/extractors/javascript.js.map +1 -1
  55. package/dist/features/boundaries.d.ts +2 -2
  56. package/dist/features/boundaries.d.ts.map +1 -1
  57. package/dist/features/boundaries.js +2 -31
  58. package/dist/features/boundaries.js.map +1 -1
  59. package/dist/features/snapshot.d.ts.map +1 -1
  60. package/dist/features/snapshot.js +99 -13
  61. package/dist/features/snapshot.js.map +1 -1
  62. package/dist/graph/algorithms/louvain.d.ts.map +1 -1
  63. package/dist/graph/algorithms/louvain.js +2 -4
  64. package/dist/graph/algorithms/louvain.js.map +1 -1
  65. package/dist/infrastructure/config.d.ts.map +1 -1
  66. package/dist/infrastructure/config.js +12 -2
  67. package/dist/infrastructure/config.js.map +1 -1
  68. package/dist/shared/globs.d.ts +40 -0
  69. package/dist/shared/globs.d.ts.map +1 -0
  70. package/dist/shared/globs.js +126 -0
  71. package/dist/shared/globs.js.map +1 -0
  72. package/dist/types.d.ts +26 -1
  73. package/dist/types.d.ts.map +1 -1
  74. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  75. package/package.json +7 -7
  76. package/src/cli/commands/watch.ts +2 -0
  77. package/src/cli.ts +31 -8
  78. package/src/domain/graph/builder/context.ts +2 -0
  79. package/src/domain/graph/builder/helpers.ts +53 -3
  80. package/src/domain/graph/builder/pipeline.ts +142 -3
  81. package/src/domain/graph/builder/stages/collect-files.ts +56 -26
  82. package/src/domain/graph/builder/stages/detect-changes.ts +57 -49
  83. package/src/domain/graph/builder/stages/finalize.ts +16 -0
  84. package/src/domain/graph/journal.ts +284 -27
  85. package/src/domain/graph/watcher.ts +29 -9
  86. package/src/domain/parser.ts +166 -73
  87. package/src/domain/search/generator.ts +34 -2
  88. package/src/domain/wasm-worker-entry.ts +788 -0
  89. package/src/domain/wasm-worker-pool.ts +330 -0
  90. package/src/domain/wasm-worker-protocol.ts +81 -0
  91. package/src/extractors/javascript.ts +149 -2
  92. package/src/features/boundaries.ts +2 -27
  93. package/src/features/snapshot.ts +93 -14
  94. package/src/graph/algorithms/louvain.ts +2 -4
  95. package/src/infrastructure/config.ts +12 -2
  96. package/src/shared/globs.ts +121 -0
  97. package/src/types.ts +26 -1
package/src/cli.ts CHANGED
@@ -1,14 +1,37 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  import { run } from './cli/index.js';
4
+ import { disposeParsers } from './domain/parser.js';
4
5
  import { CodegraphError, toErrorMessage } from './shared/errors.js';
5
6
 
6
- run().catch((err: unknown) => {
7
- if (err instanceof CodegraphError) {
8
- console.error(`codegraph [${err.code}]: ${err.message}`);
9
- if (err.file) console.error(` file: ${err.file}`);
10
- } else {
11
- console.error(`codegraph: fatal error${toErrorMessage(err)}`);
7
+ /**
8
+ * After the CLI command finishes, tear down any cached WASM parsers and the
9
+ * worker thread pool. The WASM parse worker (see `domain/wasm-worker-pool.ts`)
10
+ * keeps the event loop alive until `worker.terminate()` is called, so without
11
+ * this teardown short-lived commands like `codegraph build` would hang for
12
+ * minutes before Node gives up surfacing in CI as `spawnSync ETIMEDOUT`
13
+ * even though the command's work is already complete.
14
+ *
15
+ * `disposeParsers` is safe to call when the pool was never instantiated
16
+ * (e.g. native engine, or commands that never parse): it no-ops cleanly.
17
+ */
18
+ async function shutdown(): Promise<void> {
19
+ try {
20
+ await disposeParsers();
21
+ } catch {
22
+ /* don't mask the real exit status over a teardown failure */
12
23
  }
13
- process.exit(1);
14
- });
24
+ }
25
+
26
+ run()
27
+ .then(shutdown)
28
+ .catch(async (err: unknown) => {
29
+ if (err instanceof CodegraphError) {
30
+ console.error(`codegraph [${err.code}]: ${err.message}`);
31
+ if (err.file) console.error(` file: ${err.file}`);
32
+ } else {
33
+ console.error(`codegraph: fatal error — ${toErrorMessage(err)}`);
34
+ }
35
+ await shutdown();
36
+ process.exit(1);
37
+ });
@@ -87,6 +87,8 @@ export class PipelineContext {
87
87
  // ── Phase timing ───────────────────────────────────────────────────
88
88
  timing: {
89
89
  setupMs?: number;
90
+ collectMs?: number;
91
+ detectMs?: number;
90
92
  parseMs?: number;
91
93
  insertMs?: number;
92
94
  resolveMs?: number;
@@ -8,7 +8,8 @@ import fs from 'node:fs';
8
8
  import path from 'node:path';
9
9
  import { purgeFilesData } from '../../../db/index.js';
10
10
  import { warn } from '../../../infrastructure/logger.js';
11
- import { EXTENSIONS, IGNORE_DIRS } from '../../../shared/constants.js';
11
+ import { EXTENSIONS, IGNORE_DIRS, normalizePath } from '../../../shared/constants.js';
12
+ import { compileGlobs, matchesAny } from '../../../shared/globs.js';
12
13
  import type {
13
14
  BetterSqlite3Database,
14
15
  CodegraphConfig,
@@ -58,9 +59,29 @@ function shouldSkipEntry(entry: fs.Dirent, extraIgnore: Set<string> | null): boo
58
59
  return false;
59
60
  }
60
61
 
62
+ /**
63
+ * Check whether a source file passes the configured include/exclude globs.
64
+ *
65
+ * Patterns are matched against the path relative to the project root,
66
+ * normalized to forward slashes (e.g. `src/foo/bar.ts`). When both lists
67
+ * are set, a file must match at least one include and no exclude.
68
+ */
69
+ export function passesIncludeExclude(
70
+ relPath: string,
71
+ includeRegexes: readonly RegExp[],
72
+ excludeRegexes: readonly RegExp[],
73
+ ): boolean {
74
+ if (includeRegexes.length > 0 && !matchesAny(includeRegexes, relPath)) return false;
75
+ if (excludeRegexes.length > 0 && matchesAny(excludeRegexes, relPath)) return false;
76
+ return true;
77
+ }
78
+
61
79
  /**
62
80
  * Recursively collect all source files under `dir`.
63
81
  * When `directories` is a Set, also tracks which directories contain files.
82
+ *
83
+ * The first invocation establishes `dir` as the project root against which
84
+ * `config.include` / `config.exclude` globs are matched.
64
85
  */
65
86
  export function collectFiles(
66
87
  dir: string,
@@ -68,6 +89,9 @@ export function collectFiles(
68
89
  config: Partial<CodegraphConfig>,
69
90
  directories: Set<string>,
70
91
  _visited?: Set<string>,
92
+ _rootDir?: string,
93
+ _includeRegexes?: readonly RegExp[],
94
+ _excludeRegexes?: readonly RegExp[],
71
95
  ): { files: string[]; directories: Set<string> };
72
96
  export function collectFiles(
73
97
  dir: string,
@@ -75,6 +99,9 @@ export function collectFiles(
75
99
  config?: Partial<CodegraphConfig>,
76
100
  directories?: null,
77
101
  _visited?: Set<string>,
102
+ _rootDir?: string,
103
+ _includeRegexes?: readonly RegExp[],
104
+ _excludeRegexes?: readonly RegExp[],
78
105
  ): string[];
79
106
  export function collectFiles(
80
107
  dir: string,
@@ -82,10 +109,20 @@ export function collectFiles(
82
109
  config: Partial<CodegraphConfig> = {},
83
110
  directories: Set<string> | null = null,
84
111
  _visited: Set<string> = new Set(),
112
+ _rootDir?: string,
113
+ _includeRegexes?: readonly RegExp[],
114
+ _excludeRegexes?: readonly RegExp[],
85
115
  ): string[] | { files: string[]; directories: Set<string> } {
86
116
  const trackDirs = directories instanceof Set;
87
117
  let hasFiles = false;
88
118
 
119
+ // First call: compute root and compile include/exclude patterns once,
120
+ // then pass them down recursive calls so we don't recompile per directory.
121
+ const rootDir = _rootDir ?? dir;
122
+ const includeRegexes = _includeRegexes ?? compileGlobs(config.include);
123
+ const excludeRegexes = _excludeRegexes ?? compileGlobs(config.exclude);
124
+ const hasGlobFilters = includeRegexes.length > 0 || excludeRegexes.length > 0;
125
+
89
126
  // Merge config ignoreDirs with defaults
90
127
  const extraIgnore = config.ignoreDirs ? new Set(config.ignoreDirs) : null;
91
128
 
@@ -116,11 +153,24 @@ export function collectFiles(
116
153
  const full = path.join(dir, entry.name);
117
154
  if (entry.isDirectory()) {
118
155
  if (trackDirs) {
119
- collectFiles(full, files, config, directories as Set<string>, _visited);
156
+ collectFiles(
157
+ full,
158
+ files,
159
+ config,
160
+ directories as Set<string>,
161
+ _visited,
162
+ rootDir,
163
+ includeRegexes,
164
+ excludeRegexes,
165
+ );
120
166
  } else {
121
- collectFiles(full, files, config, null, _visited);
167
+ collectFiles(full, files, config, null, _visited, rootDir, includeRegexes, excludeRegexes);
122
168
  }
123
169
  } else if (EXTENSIONS.has(path.extname(entry.name))) {
170
+ if (hasGlobFilters) {
171
+ const rel = normalizePath(path.relative(rootDir, full));
172
+ if (!passesIncludeExclude(rel, includeRegexes, excludeRegexes)) continue;
173
+ }
124
174
  files.push(full);
125
175
  hasFiles = true;
126
176
  }
@@ -21,6 +21,7 @@ import { detectWorkspaces, loadConfig } from '../../../infrastructure/config.js'
21
21
  import { debug, info, warn } from '../../../infrastructure/logger.js';
22
22
  import { loadNative } from '../../../infrastructure/native.js';
23
23
  import { semverCompare } from '../../../infrastructure/update-check.js';
24
+ import { normalizePath } from '../../../shared/constants.js';
24
25
  import { toErrorMessage } from '../../../shared/errors.js';
25
26
  import { CODEGRAPH_VERSION } from '../../../shared/version.js';
26
27
  import type {
@@ -29,11 +30,12 @@ import type {
29
30
  BuildResult,
30
31
  Definition,
31
32
  ExtractorOutput,
33
+ SqliteStatement,
32
34
  } from '../../../types.js';
33
- import { getActiveEngine } from '../../parser.js';
35
+ import { getActiveEngine, getInstalledWasmExtensions, parseFilesAuto } from '../../parser.js';
34
36
  import { setWorkspaces } from '../resolve.js';
35
37
  import { PipelineContext } from './context.js';
36
- import { loadPathAliases } from './helpers.js';
38
+ import { batchInsertNodes, collectFiles as collectFilesUtil, loadPathAliases } from './helpers.js';
37
39
  import { NativeDbProxy } from './native-db-proxy.js';
38
40
  import { buildEdges } from './stages/build-edges.js';
39
41
  import { buildStructure } from './stages/build-structure.js';
@@ -104,6 +106,21 @@ function checkEngineSchemaMismatch(ctx: PipelineContext): void {
104
106
  }
105
107
  }
106
108
 
109
+ function warnOnEmbeddingsWipe(ctx: PipelineContext): void {
110
+ const willBeFullBuild = !ctx.incremental || ctx.forceFullRebuild;
111
+ if (!willBeFullBuild) return;
112
+ let count = 0;
113
+ try {
114
+ count = (ctx.db.prepare('SELECT COUNT(*) AS c FROM embeddings').get() as { c: number }).c;
115
+ } catch {
116
+ return; // embeddings table missing — nothing to warn about
117
+ }
118
+ if (count === 0) return;
119
+ warn(
120
+ `Full rebuild will discard ${count} embedding${count === 1 ? '' : 's'}; re-run \`codegraph embed\` after the build.`,
121
+ );
122
+ }
123
+
107
124
  function loadAliases(ctx: PipelineContext): void {
108
125
  ctx.aliases = loadPathAliases(ctx.rootDir);
109
126
  if (ctx.config.aliases) {
@@ -149,6 +166,7 @@ function setupPipeline(ctx: PipelineContext): void {
149
166
 
150
167
  initializeEngine(ctx);
151
168
  checkEngineSchemaMismatch(ctx);
169
+ warnOnEmbeddingsWipe(ctx);
152
170
  loadAliases(ctx);
153
171
 
154
172
  // Workspace packages (monorepo)
@@ -166,6 +184,8 @@ function formatTimingResult(ctx: PipelineContext): BuildResult {
166
184
  return {
167
185
  phases: {
168
186
  setupMs: +(t.setupMs ?? 0).toFixed(1),
187
+ collectMs: +(t.collectMs ?? 0).toFixed(1),
188
+ detectMs: +(t.detectMs ?? 0).toFixed(1),
169
189
  parseMs: +(t.parseMs ?? 0).toFixed(1),
170
190
  insertMs: +(t.insertMs ?? 0).toFixed(1),
171
191
  resolveMs: +(t.resolveMs ?? 0).toFixed(1),
@@ -540,7 +560,9 @@ function formatNativeTimingResult(
540
560
  ): BuildResult {
541
561
  return {
542
562
  phases: {
543
- setupMs: +((p.setupMs ?? 0) + (p.collectMs ?? 0) + (p.detectMs ?? 0)).toFixed(1),
563
+ setupMs: +(p.setupMs ?? 0).toFixed(1),
564
+ collectMs: +(p.collectMs ?? 0).toFixed(1),
565
+ detectMs: +(p.detectMs ?? 0).toFixed(1),
544
566
  parseMs: +(p.parseMs ?? 0).toFixed(1),
545
567
  insertMs: +(p.insertMs ?? 0).toFixed(1),
546
568
  resolveMs: +(p.resolveMs ?? 0).toFixed(1),
@@ -696,10 +718,123 @@ async function tryNativeOrchestrator(
696
718
  }
697
719
  }
698
720
 
721
+ // Engine parity: the native orchestrator silently drops files whose
722
+ // Rust extractor/grammar is missing or fails (e.g. HCL, Scala, Swift on
723
+ // stale native binaries). WASM handles those — backfill via WASM so both
724
+ // engines process the same file set (#967).
725
+ //
726
+ // Only runs on full builds: incremental builds only touch changed files,
727
+ // which are parsed through parseFilesAuto (which has its own per-file
728
+ // backfill), so a full filesystem scan here would be wasted work.
729
+ if (result.isFullBuild) {
730
+ await backfillNativeDroppedFiles(ctx);
731
+ }
732
+
699
733
  closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
700
734
  return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
701
735
  }
702
736
 
737
+ /**
738
+ * Backfill files that the native orchestrator silently dropped during parse.
739
+ * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
740
+ */
741
+ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
742
+ // Needs a real better-sqlite3 connection for INSERT.
743
+ if (ctx.nativeFirstProxy) {
744
+ closeNativeDb(ctx, 'pre-parity-backfill');
745
+ ctx.db = openDb(ctx.dbPath);
746
+ ctx.nativeFirstProxy = false;
747
+ }
748
+
749
+ const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
750
+ const expected = new Set(
751
+ collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
752
+ );
753
+
754
+ const existingRows = ctx.db
755
+ .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
756
+ .all() as Array<{ file: string }>;
757
+ const existing = new Set(existingRows.map((r) => r.file));
758
+
759
+ // Restrict backfill to files with an installed WASM grammar. Extensions in
760
+ // LANGUAGE_REGISTRY without a shipped grammar file (e.g. groovy, erlang on
761
+ // minimal installs) can't be parsed by either engine, so they're not a
762
+ // native regression — excluding them keeps the warn count meaningful.
763
+ const installedExts = getInstalledWasmExtensions();
764
+ const missingAbs: string[] = [];
765
+ for (const rel of expected) {
766
+ if (existing.has(rel)) continue;
767
+ const ext = path.extname(rel).toLowerCase();
768
+ if (!installedExts.has(ext)) continue;
769
+ missingAbs.push(path.join(ctx.rootDir, rel));
770
+ }
771
+ if (missingAbs.length === 0) return;
772
+
773
+ warn(
774
+ `Native orchestrator dropped ${missingAbs.length} file(s); backfilling via WASM for engine parity`,
775
+ );
776
+ const wasmResults = await parseFilesAuto(missingAbs, ctx.rootDir, { engine: 'wasm' });
777
+
778
+ const rows: unknown[][] = [];
779
+ const exportKeys: unknown[][] = [];
780
+ for (const [relPath, symbols] of wasmResults) {
781
+ // File row — mirrors insertDefinitionsAndExports: qualified_name is null.
782
+ rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
783
+ for (const def of symbols.definitions ?? []) {
784
+ // Populate qualified_name/scope the same way the JS fallback does so
785
+ // downstream queries (cross-file references, "go to definition") find
786
+ // these symbols.
787
+ const dotIdx = def.name.lastIndexOf('.');
788
+ const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null;
789
+ rows.push([
790
+ def.name,
791
+ def.kind,
792
+ relPath,
793
+ def.line,
794
+ def.endLine ?? null,
795
+ null,
796
+ def.name,
797
+ scope,
798
+ def.visibility ?? null,
799
+ ]);
800
+ }
801
+ // Exports: insert the row (INSERT OR IGNORE — a matching definition row
802
+ // is a no-op) and queue a key for the second-pass exported=1 update, so
803
+ // queries filtering on exported=1 find backfilled symbols (#970).
804
+ for (const exp of symbols.exports ?? []) {
805
+ rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
806
+ exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
807
+ }
808
+ }
809
+ const db = ctx.db as unknown as BetterSqlite3Database;
810
+ batchInsertNodes(db, rows);
811
+
812
+ // Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
813
+ if (exportKeys.length > 0) {
814
+ const EXPORT_CHUNK = 500;
815
+ const exportStmtCache = new Map<number, SqliteStatement>();
816
+ for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
817
+ const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
818
+ const chunkSize = end - i;
819
+ let updateStmt = exportStmtCache.get(chunkSize);
820
+ if (!updateStmt) {
821
+ const conditions = Array.from(
822
+ { length: chunkSize },
823
+ () => '(name = ? AND kind = ? AND file = ? AND line = ?)',
824
+ ).join(' OR ');
825
+ updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
826
+ exportStmtCache.set(chunkSize, updateStmt);
827
+ }
828
+ const vals: unknown[] = [];
829
+ for (let j = i; j < end; j++) {
830
+ const k = exportKeys[j] as unknown[];
831
+ vals.push(k[0], k[1], k[2], k[3]);
832
+ }
833
+ updateStmt.run(...vals);
834
+ }
835
+ }
836
+ }
837
+
703
838
  // ── Pipeline stages execution ───────────────────────────────────────────
704
839
 
705
840
  async function runPipelineStages(ctx: PipelineContext): Promise<void> {
@@ -845,6 +980,10 @@ export async function buildGraph(
845
980
  `Codegraph version changed (${prevVersion} → ${CODEGRAPH_VERSION}), promoting to full rebuild.`,
846
981
  );
847
982
  ctx.forceFullRebuild = true;
983
+ // Re-check embeddings: the initial warnOnEmbeddingsWipe ran before
984
+ // forceFullRebuild was set here, so the silent-data-loss guard
985
+ // would otherwise miss this late-promotion path (#986 follow-up).
986
+ warnOnEmbeddingsWipe(ctx);
848
987
  }
849
988
  }
850
989
  }
@@ -7,11 +7,13 @@
7
7
  */
8
8
  import fs from 'node:fs';
9
9
  import path from 'node:path';
10
+ import { performance } from 'node:perf_hooks';
10
11
  import { debug, info } from '../../../../infrastructure/logger.js';
11
12
  import { normalizePath } from '../../../../shared/constants.js';
13
+ import { compileGlobs } from '../../../../shared/globs.js';
12
14
  import { readJournal } from '../../journal.js';
13
15
  import type { PipelineContext } from '../context.js';
14
- import { collectFiles as collectFilesUtil } from '../helpers.js';
16
+ import { collectFiles as collectFilesUtil, passesIncludeExclude } from '../helpers.js';
15
17
 
16
18
  /**
17
19
  * Reconstruct allFiles from DB file_hashes + journal deltas.
@@ -20,7 +22,7 @@ import { collectFiles as collectFilesUtil } from '../helpers.js';
20
22
  function tryFastCollect(
21
23
  ctx: PipelineContext,
22
24
  ): { files: string[]; directories: Set<string> } | null {
23
- const { db, rootDir } = ctx;
25
+ const { db, rootDir, config } = ctx;
24
26
  const useNative = ctx.engineName === 'native' && !!ctx.nativeDb?.getCollectFilesData;
25
27
 
26
28
  // 1. Check that file_hashes table exists and has entries
@@ -70,10 +72,20 @@ function tryFastCollect(
70
72
  }
71
73
  }
72
74
 
73
- // 5. Convert to absolute paths and compute directories
75
+ // 5. Convert to absolute paths and compute directories, honoring
76
+ // config.include / config.exclude globs so incremental builds reflect
77
+ // config changes (paths from the DB were collected under older config).
78
+ const includeRegexes = compileGlobs(config?.include);
79
+ const excludeRegexes = compileGlobs(config?.exclude);
80
+ const hasGlobFilters = includeRegexes.length > 0 || excludeRegexes.length > 0;
81
+
74
82
  const files: string[] = [];
75
83
  const directories = new Set<string>();
76
84
  for (const relPath of fileSet) {
85
+ if (hasGlobFilters) {
86
+ const normRel = normalizePath(relPath);
87
+ if (!passesIncludeExclude(normRel, includeRegexes, excludeRegexes)) continue;
88
+ }
77
89
  const absPath = path.join(rootDir, relPath);
78
90
  files.push(absPath);
79
91
  directories.add(path.dirname(absPath));
@@ -89,42 +101,60 @@ export async function collectFiles(ctx: PipelineContext): Promise<void> {
89
101
  const { rootDir, config, opts } = ctx;
90
102
 
91
103
  if (opts.scope) {
92
- // Scoped rebuild: rebuild only specified files
104
+ // Scoped rebuild: rebuild only specified files.
105
+ //
106
+ // Timer only wraps the filesystem-walk portion (existence checks + file
107
+ // list construction). Change-detection outputs (parseChanges, removed,
108
+ // isFullBuild) are attributed to detectMs for semantic consistency with
109
+ // the non-scoped path, even though this stage computes them.
110
+ const start = performance.now();
93
111
  const scopedFiles = opts.scope.map((f: string) => normalizePath(f));
94
112
  const existing: Array<{ file: string; relPath: string }> = [];
95
113
  const missing: string[] = [];
96
- for (const rel of scopedFiles) {
97
- const abs = path.join(rootDir, rel);
98
- if (fs.existsSync(abs)) {
99
- existing.push({ file: abs, relPath: rel });
100
- } else {
101
- missing.push(rel);
114
+ try {
115
+ for (const rel of scopedFiles) {
116
+ const abs = path.join(rootDir, rel);
117
+ if (fs.existsSync(abs)) {
118
+ existing.push({ file: abs, relPath: rel });
119
+ } else {
120
+ missing.push(rel);
121
+ }
102
122
  }
123
+ ctx.allFiles = existing.map((e) => e.file);
124
+ ctx.discoveredDirs = new Set(existing.map((e) => path.dirname(e.file)));
125
+ } finally {
126
+ ctx.timing.collectMs = performance.now() - start;
103
127
  }
104
- ctx.allFiles = existing.map((e) => e.file);
105
- ctx.discoveredDirs = new Set(existing.map((e) => path.dirname(e.file)));
128
+ // Change-detection outputs timed under detectMs for semantic parity.
129
+ const detectStart = performance.now();
106
130
  ctx.parseChanges = existing;
107
131
  ctx.metadataUpdates = [];
108
132
  ctx.removed = missing;
109
133
  ctx.isFullBuild = false;
134
+ ctx.timing.detectMs = (ctx.timing.detectMs ?? 0) + (performance.now() - detectStart);
110
135
  info(`Scoped rebuild: ${existing.length} files to rebuild, ${missing.length} to purge`);
111
136
  return;
112
137
  }
113
138
 
114
- // Incremental fast path: reconstruct file list from DB + journal deltas
115
- // instead of full recursive filesystem scan (~8ms savings on 473 files).
116
- if (ctx.incremental && !ctx.forceFullRebuild) {
117
- const fast = tryFastCollect(ctx);
118
- if (fast) {
119
- ctx.allFiles = fast.files;
120
- ctx.discoveredDirs = fast.directories;
121
- info(`Found ${ctx.allFiles.length} files (cached)`);
122
- return;
139
+ const start = performance.now();
140
+ try {
141
+ // Incremental fast path: reconstruct file list from DB + journal deltas
142
+ // instead of full recursive filesystem scan (~8ms savings on 473 files).
143
+ if (ctx.incremental && !ctx.forceFullRebuild) {
144
+ const fast = tryFastCollect(ctx);
145
+ if (fast) {
146
+ ctx.allFiles = fast.files;
147
+ ctx.discoveredDirs = fast.directories;
148
+ info(`Found ${ctx.allFiles.length} files (cached)`);
149
+ return;
150
+ }
123
151
  }
124
- }
125
152
 
126
- const collected = collectFilesUtil(rootDir, [], config, new Set<string>());
127
- ctx.allFiles = collected.files;
128
- ctx.discoveredDirs = collected.directories;
129
- info(`Found ${ctx.allFiles.length} files to parse`);
153
+ const collected = collectFilesUtil(rootDir, [], config, new Set<string>());
154
+ ctx.allFiles = collected.files;
155
+ ctx.discoveredDirs = collected.directories;
156
+ info(`Found ${ctx.allFiles.length} files to parse`);
157
+ } finally {
158
+ ctx.timing.collectMs = performance.now() - start;
159
+ }
130
160
  }
@@ -7,6 +7,7 @@
7
7
  */
8
8
  import fs from 'node:fs';
9
9
  import path from 'node:path';
10
+ import { performance } from 'node:perf_hooks';
10
11
  import { closeDb } from '../../../../db/index.js';
11
12
  import { debug, info } from '../../../../infrastructure/logger.js';
12
13
  import { normalizePath } from '../../../../shared/constants.js';
@@ -512,59 +513,66 @@ function handleIncrementalBuild(ctx: PipelineContext): void {
512
513
  }
513
514
 
514
515
  export async function detectChanges(ctx: PipelineContext): Promise<void> {
515
- const { db, allFiles, rootDir, incremental, forceFullRebuild, opts } = ctx;
516
- if ((opts as Record<string, unknown>).scope) {
517
- handleScopedBuild(ctx);
518
- return;
519
- }
520
- const increResult =
521
- incremental && !forceFullRebuild
522
- ? getChangedFiles(db, allFiles, rootDir)
523
- : {
524
- changed: allFiles.map((f): ChangedFile => ({ file: f })),
525
- removed: [] as string[],
526
- isFullBuild: true,
527
- };
528
- ctx.removed = increResult.removed;
529
- ctx.isFullBuild = increResult.isFullBuild;
530
- ctx.parseChanges = increResult.changed
531
- .filter((c) => !c.metadataOnly)
532
- .map((c) => ({
533
- file: c.file,
534
- relPath: c.relPath,
535
- content: c.content,
536
- hash: c.hash,
537
- stat: c.stat ? { mtime: Math.floor(c.stat.mtimeMs), size: c.stat.size } : undefined,
538
- _reverseDepOnly: c._reverseDepOnly,
539
- }));
540
- ctx.metadataUpdates = increResult.changed
541
- .filter(
542
- (c): c is ChangedFile & { relPath: string; hash: string; stat: FileStat } =>
543
- !!c.metadataOnly && !!c.relPath && !!c.hash && !!c.stat,
544
- )
545
- .map((c) => ({
546
- relPath: c.relPath,
547
- hash: c.hash,
548
- stat: { mtime: Math.floor(c.stat.mtimeMs), size: c.stat.size },
549
- }));
550
- if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) {
551
- const ranAnalysis = await runPendingAnalysis(ctx);
552
- if (ranAnalysis) {
516
+ const start = performance.now();
517
+ try {
518
+ const { db, allFiles, rootDir, incremental, forceFullRebuild, opts } = ctx;
519
+ if ((opts as Record<string, unknown>).scope) {
520
+ handleScopedBuild(ctx);
521
+ return;
522
+ }
523
+ const increResult =
524
+ incremental && !forceFullRebuild
525
+ ? getChangedFiles(db, allFiles, rootDir)
526
+ : {
527
+ changed: allFiles.map((f): ChangedFile => ({ file: f })),
528
+ removed: [] as string[],
529
+ isFullBuild: true,
530
+ };
531
+ ctx.removed = increResult.removed;
532
+ ctx.isFullBuild = increResult.isFullBuild;
533
+ ctx.parseChanges = increResult.changed
534
+ .filter((c) => !c.metadataOnly)
535
+ .map((c) => ({
536
+ file: c.file,
537
+ relPath: c.relPath,
538
+ content: c.content,
539
+ hash: c.hash,
540
+ stat: c.stat ? { mtime: Math.floor(c.stat.mtimeMs), size: c.stat.size } : undefined,
541
+ _reverseDepOnly: c._reverseDepOnly,
542
+ }));
543
+ ctx.metadataUpdates = increResult.changed
544
+ .filter(
545
+ (c): c is ChangedFile & { relPath: string; hash: string; stat: FileStat } =>
546
+ !!c.metadataOnly && !!c.relPath && !!c.hash && !!c.stat,
547
+ )
548
+ .map((c) => ({
549
+ relPath: c.relPath,
550
+ hash: c.hash,
551
+ stat: { mtime: Math.floor(c.stat.mtimeMs), size: c.stat.size },
552
+ }));
553
+ if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) {
554
+ const ranAnalysis = await runPendingAnalysis(ctx);
555
+ if (ranAnalysis) {
556
+ closeDb(db);
557
+ writeJournalHeader(rootDir, Date.now());
558
+ ctx.earlyExit = true;
559
+ return;
560
+ }
561
+ healMetadata(ctx);
562
+ info('No changes detected. Graph is up to date.');
553
563
  closeDb(db);
554
564
  writeJournalHeader(rootDir, Date.now());
555
565
  ctx.earlyExit = true;
556
566
  return;
557
567
  }
558
- healMetadata(ctx);
559
- info('No changes detected. Graph is up to date.');
560
- closeDb(db);
561
- writeJournalHeader(rootDir, Date.now());
562
- ctx.earlyExit = true;
563
- return;
564
- }
565
- if (ctx.isFullBuild) {
566
- handleFullBuild(ctx);
567
- } else {
568
- handleIncrementalBuild(ctx);
568
+ if (ctx.isFullBuild) {
569
+ handleFullBuild(ctx);
570
+ } else {
571
+ handleIncrementalBuild(ctx);
572
+ }
573
+ } finally {
574
+ // Additive to respect any partial detectMs contribution from collectFiles
575
+ // (scoped-rebuild path splits change-detection outputs across both stages).
576
+ ctx.timing.detectMs = (ctx.timing.detectMs ?? 0) + (performance.now() - start);
569
577
  }
570
578
  }
@@ -3,6 +3,7 @@
3
3
  *
4
4
  * WASM cleanup, stats logging, drift detection, build metadata, registry, journal.
5
5
  */
6
+ import fs from 'node:fs';
6
7
  import { tmpdir } from 'node:os';
7
8
  import path from 'node:path';
8
9
  import { performance } from 'node:perf_hooks';
@@ -88,6 +89,19 @@ function persistBuildMetadata(
88
89
  // subsequent build to be a full rebuild.
89
90
  const codeVersionToWrite =
90
91
  ctx.engineName === 'native' && ctx.engineVersion ? ctx.engineVersion : CODEGRAPH_VERSION;
92
+ // Persist the repo root so downstream commands (e.g. `codegraph embed`)
93
+ // can resolve relative file paths regardless of the invoking cwd.
94
+ // Use realpathSync (symlink-resolving) to match the Rust engine's
95
+ // std::fs::canonicalize — otherwise the JS write here would overwrite the
96
+ // canonical path Rust wrote for native full builds and could re-introduce
97
+ // a non-canonical path when the project root is behind a symlink.
98
+ const resolvedRootDir = path.resolve(ctx.rootDir);
99
+ let rootDirToWrite = resolvedRootDir;
100
+ try {
101
+ rootDirToWrite = fs.realpathSync(resolvedRootDir);
102
+ } catch {
103
+ /* realpath can fail (e.g. path no longer exists); fall back to resolve() */
104
+ }
91
105
  try {
92
106
  if (useNativeDb) {
93
107
  ctx.nativeDb!.setBuildMeta(
@@ -99,6 +113,7 @@ function persistBuildMetadata(
99
113
  built_at: buildNow.toISOString(),
100
114
  node_count: String(nodeCount),
101
115
  edge_count: String(actualEdgeCount),
116
+ root_dir: rootDirToWrite,
102
117
  }).map(([key, value]) => ({ key, value: String(value) })),
103
118
  );
104
119
  } else {
@@ -110,6 +125,7 @@ function persistBuildMetadata(
110
125
  built_at: buildNow.toISOString(),
111
126
  node_count: nodeCount,
112
127
  edge_count: actualEdgeCount,
128
+ root_dir: rootDirToWrite,
113
129
  });
114
130
  }
115
131
  } catch (err) {