@optave/codegraph 3.4.1 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +22 -21
  2. package/dist/ast-analysis/rules/javascript.d.ts.map +1 -1
  3. package/dist/ast-analysis/rules/javascript.js +1 -0
  4. package/dist/ast-analysis/rules/javascript.js.map +1 -1
  5. package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
  6. package/dist/ast-analysis/visitors/ast-store-visitor.js +103 -35
  7. package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
  8. package/dist/db/better-sqlite3.d.ts +3 -0
  9. package/dist/db/better-sqlite3.d.ts.map +1 -0
  10. package/dist/db/better-sqlite3.js +19 -0
  11. package/dist/db/better-sqlite3.js.map +1 -0
  12. package/dist/db/connection.d.ts +13 -2
  13. package/dist/db/connection.d.ts.map +1 -1
  14. package/dist/db/connection.js +76 -2
  15. package/dist/db/connection.js.map +1 -1
  16. package/dist/db/index.d.ts +2 -2
  17. package/dist/db/index.d.ts.map +1 -1
  18. package/dist/db/index.js +1 -1
  19. package/dist/db/index.js.map +1 -1
  20. package/dist/db/migrations.d.ts.map +1 -1
  21. package/dist/db/migrations.js +2 -0
  22. package/dist/db/migrations.js.map +1 -1
  23. package/dist/db/query-builder.d.ts +5 -5
  24. package/dist/db/query-builder.d.ts.map +1 -1
  25. package/dist/db/query-builder.js +20 -4
  26. package/dist/db/query-builder.js.map +1 -1
  27. package/dist/db/repository/index.d.ts +1 -0
  28. package/dist/db/repository/index.d.ts.map +1 -1
  29. package/dist/db/repository/index.js +1 -0
  30. package/dist/db/repository/index.js.map +1 -1
  31. package/dist/db/repository/native-repository.d.ts +58 -0
  32. package/dist/db/repository/native-repository.d.ts.map +1 -0
  33. package/dist/db/repository/native-repository.js +261 -0
  34. package/dist/db/repository/native-repository.js.map +1 -0
  35. package/dist/db/repository/nodes.d.ts +4 -4
  36. package/dist/db/repository/nodes.d.ts.map +1 -1
  37. package/dist/db/repository/nodes.js +6 -6
  38. package/dist/db/repository/nodes.js.map +1 -1
  39. package/dist/domain/graph/builder/context.d.ts +2 -1
  40. package/dist/domain/graph/builder/context.d.ts.map +1 -1
  41. package/dist/domain/graph/builder/context.js +1 -0
  42. package/dist/domain/graph/builder/context.js.map +1 -1
  43. package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
  44. package/dist/domain/graph/builder/pipeline.js +29 -7
  45. package/dist/domain/graph/builder/pipeline.js.map +1 -1
  46. package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
  47. package/dist/domain/graph/builder/stages/build-edges.js +103 -15
  48. package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
  49. package/dist/domain/graph/builder/stages/build-structure.d.ts.map +1 -1
  50. package/dist/domain/graph/builder/stages/build-structure.js +33 -5
  51. package/dist/domain/graph/builder/stages/build-structure.js.map +1 -1
  52. package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
  53. package/dist/domain/graph/builder/stages/collect-files.js +71 -7
  54. package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
  55. package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
  56. package/dist/domain/graph/builder/stages/detect-changes.js +36 -14
  57. package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
  58. package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
  59. package/dist/domain/graph/builder/stages/finalize.js +43 -20
  60. package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
  61. package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
  62. package/dist/domain/graph/builder/stages/insert-nodes.js +104 -9
  63. package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
  64. package/dist/domain/parser.d.ts.map +1 -1
  65. package/dist/domain/parser.js +1 -0
  66. package/dist/domain/parser.js.map +1 -1
  67. package/dist/extractors/javascript.js +53 -38
  68. package/dist/extractors/javascript.js.map +1 -1
  69. package/dist/extractors/rust.js +2 -1
  70. package/dist/extractors/rust.js.map +1 -1
  71. package/dist/features/ast.d.ts +14 -1
  72. package/dist/features/ast.d.ts.map +1 -1
  73. package/dist/features/ast.js +38 -1
  74. package/dist/features/ast.js.map +1 -1
  75. package/dist/features/branch-compare.d.ts.map +1 -1
  76. package/dist/features/branch-compare.js +3 -1
  77. package/dist/features/branch-compare.js.map +1 -1
  78. package/dist/features/snapshot.d.ts.map +1 -1
  79. package/dist/features/snapshot.js +2 -1
  80. package/dist/features/snapshot.js.map +1 -1
  81. package/dist/mcp/server.d.ts.map +1 -1
  82. package/dist/mcp/server.js +2 -9
  83. package/dist/mcp/server.js.map +1 -1
  84. package/dist/types.d.ts +228 -0
  85. package/dist/types.d.ts.map +1 -1
  86. package/package.json +61 -11
  87. package/src/ast-analysis/rules/javascript.ts +1 -0
  88. package/src/ast-analysis/visitors/ast-store-visitor.ts +102 -33
  89. package/src/db/better-sqlite3.ts +20 -0
  90. package/src/db/connection.ts +94 -3
  91. package/src/db/index.ts +3 -1
  92. package/src/db/migrations.ts +2 -0
  93. package/src/db/query-builder.ts +30 -5
  94. package/src/db/repository/index.ts +1 -0
  95. package/src/db/repository/native-repository.ts +361 -0
  96. package/src/db/repository/nodes.ts +7 -3
  97. package/src/domain/graph/builder/context.ts +2 -0
  98. package/src/domain/graph/builder/pipeline.ts +30 -6
  99. package/src/domain/graph/builder/stages/build-edges.ts +117 -19
  100. package/src/domain/graph/builder/stages/build-structure.ts +47 -11
  101. package/src/domain/graph/builder/stages/collect-files.ts +84 -7
  102. package/src/domain/graph/builder/stages/detect-changes.ts +39 -21
  103. package/src/domain/graph/builder/stages/finalize.ts +49 -20
  104. package/src/domain/graph/builder/stages/insert-nodes.ts +129 -10
  105. package/src/domain/parser.ts +1 -0
  106. package/src/extractors/javascript.ts +54 -36
  107. package/src/extractors/rust.ts +2 -1
  108. package/src/features/ast.ts +66 -1
  109. package/src/features/branch-compare.ts +3 -1
  110. package/src/features/snapshot.ts +2 -1
  111. package/src/mcp/server.ts +2 -10
  112. package/src/types.ts +273 -0
@@ -6,8 +6,9 @@
6
6
  */
7
7
  import path from 'node:path';
8
8
  import { performance } from 'node:perf_hooks';
9
- import { getNodeId } from '../../../../db/index.js';
10
- import { loadNative } from '../../../../infrastructure/native.js';
9
+ import { getNodeId } from '#db/index.js';
10
+ import { debug } from '#infrastructure/logger.js';
11
+ import { loadNative } from '#infrastructure/native.js';
11
12
  import type {
12
13
  BetterSqlite3Database,
13
14
  Call,
@@ -17,7 +18,7 @@ import type {
17
18
  NativeAddon,
18
19
  NodeRow,
19
20
  TypeMapEntry,
20
- } from '../../../../types.js';
21
+ } from '#types';
21
22
  import { computeConfidence } from '../../resolve.js';
22
23
  import type { PipelineContext } from '../context.js';
23
24
  import { BUILTIN_RECEIVERS, batchInsertEdges } from '../helpers.js';
@@ -561,22 +562,94 @@ function buildClassHierarchyEdges(
561
562
 
562
563
  // ── Main entry point ────────────────────────────────────────────────────
563
564
 
565
+ /**
566
+ * For small incremental builds (≤5 changed files on a large codebase), scope
567
+ * the node loading query to only files that are relevant: changed files +
568
+ * their import targets. Falls back to loading ALL nodes for full builds or
569
+ * larger incremental changes.
570
+ */
571
+ function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolean } {
572
+ const { db, fileSymbols, isFullBuild, batchResolved } = ctx;
573
+ const nodeKindFilter = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`;
574
+
575
+ // Gate: only scope for small incremental on large codebases
576
+ if (!isFullBuild && fileSymbols.size <= 5) {
577
+ const existingFileCount = (
578
+ db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get() as { c: number }
579
+ ).c;
580
+ if (existingFileCount > 20) {
581
+ // Collect relevant files: changed files + their import targets
582
+ const relevantFiles = new Set<string>(fileSymbols.keys());
583
+ if (batchResolved) {
584
+ for (const resolvedPath of batchResolved.values()) {
585
+ relevantFiles.add(resolvedPath);
586
+ }
587
+ }
588
+ // Also add barrel-only files
589
+ for (const barrelPath of ctx.barrelOnlyFiles) {
590
+ relevantFiles.add(barrelPath);
591
+ }
592
+
593
+ const placeholders = [...relevantFiles].map(() => '?').join(',');
594
+ const rows = db
595
+ .prepare(
596
+ `SELECT id, name, kind, file, line FROM nodes WHERE ${nodeKindFilter} AND file IN (${placeholders})`,
597
+ )
598
+ .all(...relevantFiles) as QueryNodeRow[];
599
+ return { rows, scoped: true };
600
+ }
601
+ }
602
+
603
+ const rows = db
604
+ .prepare(`SELECT id, name, kind, file, line FROM nodes WHERE ${nodeKindFilter}`)
605
+ .all() as QueryNodeRow[];
606
+ return { rows, scoped: false };
607
+ }
608
+
609
+ /**
610
+ * For scoped node loading, patch nodesByName.get with a lazy SQL fallback
611
+ * so global name-only lookups (resolveByMethodOrGlobal, supplementReceiverEdges)
612
+ * can still find nodes outside the scoped set.
613
+ */
614
+ function addLazyFallback(ctx: PipelineContext, scopedLoad: boolean): void {
615
+ if (!scopedLoad) return;
616
+ const { db } = ctx;
617
+ const fallbackStmt = db.prepare(
618
+ `SELECT id, name, kind, file, line FROM nodes WHERE name = ? AND kind != 'file'`,
619
+ );
620
+ const originalGet = ctx.nodesByName.get.bind(ctx.nodesByName);
621
+ ctx.nodesByName.get = (name: string) => {
622
+ const result = originalGet(name);
623
+ if (result !== undefined) return result;
624
+ const rows = fallbackStmt.all(name) as unknown as NodeRow[];
625
+ if (rows.length > 0) {
626
+ ctx.nodesByName.set(name, rows);
627
+ return rows;
628
+ }
629
+ return undefined;
630
+ };
631
+ }
632
+
564
633
  export async function buildEdges(ctx: PipelineContext): Promise<void> {
565
634
  const { db, engineName } = ctx;
566
635
 
567
636
  const getNodeIdStmt = makeGetNodeIdStmt(db);
568
637
 
569
- const allNodes = db
570
- .prepare(
571
- `SELECT id, name, kind, file, line FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`,
572
- )
573
- .all() as QueryNodeRow[];
574
- setupNodeLookups(ctx, allNodes);
638
+ const { rows: allNodesBefore, scoped: scopedLoad } = loadNodes(ctx);
639
+ setupNodeLookups(ctx, allNodesBefore);
640
+ addLazyFallback(ctx, scopedLoad);
575
641
 
576
642
  const t0 = performance.now();
577
- const buildEdgesTx = db.transaction(() => {
578
- // Delete stale outgoing edges for barrel-only files inside the transaction
579
- // so that deletion and re-creation are atomic (no edge loss on mid-build crash).
643
+ const native = engineName === 'native' ? loadNative() : null;
644
+
645
+ // Phase 1: Compute edges inside a better-sqlite3 transaction.
646
+ // Barrel-edge deletion lives here so that the JS path (which also inserts
647
+ // edges in this transaction) keeps deletion + insertion atomic.
648
+ // When using the native rusqlite path, insertion happens in Phase 2 on a
649
+ // separate connection — a crash between Phase 1 and Phase 2 would leave
650
+ // barrel edges missing until the next incremental rebuild re-creates them.
651
+ const allEdgeRows: EdgeRowTuple[] = [];
652
+ const computeEdgesTx = db.transaction(() => {
580
653
  if (ctx.barrelOnlyFiles.size > 0) {
581
654
  const deleteOutgoingEdges = db.prepare(
582
655
  'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)',
@@ -586,19 +659,44 @@ export async function buildEdges(ctx: PipelineContext): Promise<void> {
586
659
  }
587
660
  }
588
661
 
589
- const allEdgeRows: EdgeRowTuple[] = [];
590
-
591
662
  buildImportEdges(ctx, getNodeIdStmt, allEdgeRows);
592
663
 
593
- const native = engineName === 'native' ? loadNative() : null;
594
- if (native?.buildCallEdges) {
595
- buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native);
664
+ // Skip native call-edge path for small incremental builds (≤3 files):
665
+ // napi-rs marshaling overhead for allNodes exceeds computation savings.
666
+ const useNativeCallEdges =
667
+ native?.buildCallEdges && (ctx.isFullBuild || ctx.fileSymbols.size > 3);
668
+ if (useNativeCallEdges) {
669
+ buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native!);
596
670
  } else {
597
671
  buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows);
598
672
  }
599
673
 
600
- batchInsertEdges(db, allEdgeRows);
674
+ // When using native edge insert, skip JS insert here — do it after tx commits.
675
+ // Otherwise insert edges within this transaction for atomicity.
676
+ const useNativeEdgeInsert = !!ctx.nativeDb?.bulkInsertEdges;
677
+ if (!useNativeEdgeInsert) {
678
+ batchInsertEdges(db, allEdgeRows);
679
+ }
601
680
  });
602
- buildEdgesTx();
681
+ computeEdgesTx();
682
+
683
+ // Phase 2: Native rusqlite bulk insert (outside better-sqlite3 transaction
684
+ // to avoid SQLITE_BUSY contention). Uses NativeDatabase persistent connection.
685
+ // Standalone napi functions were removed in 6.17.
686
+ if (ctx.nativeDb?.bulkInsertEdges && allEdgeRows.length > 0) {
687
+ const nativeEdges = allEdgeRows.map((r) => ({
688
+ sourceId: r[0],
689
+ targetId: r[1],
690
+ kind: r[2],
691
+ confidence: r[3],
692
+ dynamic: r[4],
693
+ }));
694
+ const ok = ctx.nativeDb.bulkInsertEdges(nativeEdges);
695
+ if (!ok) {
696
+ debug('Native bulkInsertEdges failed — falling back to JS batchInsertEdges');
697
+ batchInsertEdges(db, allEdgeRows);
698
+ }
699
+ }
700
+
603
701
  ctx.timing.edgesMs = performance.now() - t0;
604
702
  }
@@ -5,9 +5,9 @@
5
5
  */
6
6
  import path from 'node:path';
7
7
  import { performance } from 'node:perf_hooks';
8
- import { debug } from '../../../../infrastructure/logger.js';
9
- import { normalizePath } from '../../../../shared/constants.js';
10
- import type { ExtractorOutput } from '../../../../types.js';
8
+ import { debug } from '#infrastructure/logger.js';
9
+ import { normalizePath } from '#shared/constants.js';
10
+ import type { ExtractorOutput } from '#types';
11
11
  import type { PipelineContext } from '../context.js';
12
12
  import { readFileSafe } from '../helpers.js';
13
13
 
@@ -40,7 +40,13 @@ export async function buildStructure(ctx: PipelineContext): Promise<void> {
40
40
  // Gate: ≤5 changed files AND significantly more existing files (>20) to
41
41
  // avoid triggering on small test fixtures where directory metrics matter.
42
42
  const existingFileCount = !isFullBuild
43
- ? (db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get() as { c: number }).c
43
+ ? (
44
+ (ctx.nativeDb
45
+ ? ctx.nativeDb.queryGet("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'", [])
46
+ : db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get()) as {
47
+ c: number;
48
+ }
49
+ ).c
44
50
  : 0;
45
51
  const useSmallIncrementalFastPath =
46
52
  !isFullBuild &&
@@ -86,13 +92,43 @@ export async function buildStructure(ctx: PipelineContext): Promise<void> {
86
92
  // Classify node roles (incremental: only reclassify changed files' nodes)
87
93
  const t1 = performance.now();
88
94
  try {
89
- const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as {
90
- classifyNodeRoles: (
91
- db: PipelineContext['db'],
92
- changedFiles?: string[] | null,
93
- ) => Record<string, number>;
94
- };
95
- const roleSummary = classifyNodeRoles(db, changedFileList);
95
+ let roleSummary: Record<string, number> | null = null;
96
+
97
+ // Use NativeDatabase persistent connection (Phase 6.15+).
98
+ // Standalone napi functions were removed in 6.17 — falls through to JS if nativeDb unavailable.
99
+ if (ctx.nativeDb?.classifyRolesFull) {
100
+ const nativeResult =
101
+ changedFileList && changedFileList.length > 0
102
+ ? ctx.nativeDb.classifyRolesIncremental(changedFileList)
103
+ : ctx.nativeDb.classifyRolesFull();
104
+ if (nativeResult) {
105
+ roleSummary = {
106
+ entry: nativeResult.entry,
107
+ core: nativeResult.core,
108
+ utility: nativeResult.utility,
109
+ adapter: nativeResult.adapter,
110
+ dead: nativeResult.dead,
111
+ 'dead-leaf': nativeResult.deadLeaf,
112
+ 'dead-entry': nativeResult.deadEntry,
113
+ 'dead-ffi': nativeResult.deadFfi,
114
+ 'dead-unresolved': nativeResult.deadUnresolved,
115
+ 'test-only': nativeResult.testOnly,
116
+ leaf: nativeResult.leaf,
117
+ };
118
+ }
119
+ }
120
+
121
+ // Fall back to JS path
122
+ if (!roleSummary) {
123
+ const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as {
124
+ classifyNodeRoles: (
125
+ db: PipelineContext['db'],
126
+ changedFiles?: string[] | null,
127
+ ) => Record<string, number>;
128
+ };
129
+ roleSummary = classifyNodeRoles(db, changedFileList);
130
+ }
131
+
96
132
  debug(
97
133
  `Roles${changedFileList ? ` (incremental, ${changedFileList.length} files)` : ''}: ${Object.entries(
98
134
  roleSummary,
@@ -2,14 +2,78 @@
2
2
  * Stage: collectFiles
3
3
  *
4
4
  * Collects all source files to process. Handles both normal and scoped rebuilds.
5
+ * For incremental builds with a valid journal, reconstructs the file list from
6
+ * the DB's file_hashes table + journal deltas, skipping the filesystem scan.
5
7
  */
6
8
  import fs from 'node:fs';
7
9
  import path from 'node:path';
8
- import { info } from '../../../../infrastructure/logger.js';
9
- import { normalizePath } from '../../../../shared/constants.js';
10
+ import { debug, info } from '#infrastructure/logger.js';
11
+ import { normalizePath } from '#shared/constants.js';
12
+ import { readJournal } from '../../journal.js';
10
13
  import type { PipelineContext } from '../context.js';
11
14
  import { collectFiles as collectFilesUtil } from '../helpers.js';
12
15
 
16
+ /**
17
+ * Reconstruct allFiles from DB file_hashes + journal deltas.
18
+ * Returns null when the fast path isn't applicable (first build, no journal, etc).
19
+ */
20
+ function tryFastCollect(
21
+ ctx: PipelineContext,
22
+ ): { files: string[]; directories: Set<string> } | null {
23
+ const { db, rootDir } = ctx;
24
+
25
+ // 1. Check that file_hashes table exists and has entries
26
+ let dbFileCount: number;
27
+ try {
28
+ dbFileCount = (db.prepare('SELECT COUNT(*) as c FROM file_hashes').get() as { c: number }).c;
29
+ } catch {
30
+ return null;
31
+ }
32
+ if (dbFileCount === 0) return null;
33
+
34
+ // 2. Read the journal — only use fast path when journal has entries,
35
+ // proving the watcher was active and tracking changes. An empty-but-valid
36
+ // journal (no watcher) could miss file deletions.
37
+ const journal = readJournal(rootDir);
38
+ if (!journal.valid) return null;
39
+ const hasEntries =
40
+ (journal.changed && journal.changed.length > 0) ||
41
+ (journal.removed && journal.removed.length > 0);
42
+ if (!hasEntries) return null;
43
+
44
+ // 3. Load existing file list from file_hashes (relative paths)
45
+ const dbFiles = (db.prepare('SELECT file FROM file_hashes').all() as Array<{ file: string }>).map(
46
+ (r) => r.file,
47
+ );
48
+
49
+ // 4. Apply journal deltas: remove deleted files, add new/changed files
50
+ const fileSet = new Set(dbFiles);
51
+ if (journal.removed) {
52
+ for (const removed of journal.removed) {
53
+ fileSet.delete(removed);
54
+ }
55
+ }
56
+ if (journal.changed) {
57
+ for (const changed of journal.changed) {
58
+ fileSet.add(changed);
59
+ }
60
+ }
61
+
62
+ // 5. Convert to absolute paths and compute directories
63
+ const files: string[] = [];
64
+ const directories = new Set<string>();
65
+ for (const relPath of fileSet) {
66
+ const absPath = path.join(rootDir, relPath);
67
+ files.push(absPath);
68
+ directories.add(path.dirname(absPath));
69
+ }
70
+
71
+ debug(
72
+ `collectFiles fast path: ${dbFiles.length} from DB, journal: +${journal.changed?.length ?? 0}/-${journal.removed?.length ?? 0} → ${files.length} files`,
73
+ );
74
+ return { files, directories };
75
+ }
76
+
13
77
  export async function collectFiles(ctx: PipelineContext): Promise<void> {
14
78
  const { rootDir, config, opts } = ctx;
15
79
 
@@ -33,10 +97,23 @@ export async function collectFiles(ctx: PipelineContext): Promise<void> {
33
97
  ctx.removed = missing;
34
98
  ctx.isFullBuild = false;
35
99
  info(`Scoped rebuild: ${existing.length} files to rebuild, ${missing.length} to purge`);
36
- } else {
37
- const collected = collectFilesUtil(rootDir, [], config, new Set<string>());
38
- ctx.allFiles = collected.files;
39
- ctx.discoveredDirs = collected.directories;
40
- info(`Found ${ctx.allFiles.length} files to parse`);
100
+ return;
41
101
  }
102
+
103
+ // Incremental fast path: reconstruct file list from DB + journal deltas
104
+ // instead of full recursive filesystem scan (~8ms savings on 473 files).
105
+ if (ctx.incremental && !ctx.forceFullRebuild) {
106
+ const fast = tryFastCollect(ctx);
107
+ if (fast) {
108
+ ctx.allFiles = fast.files;
109
+ ctx.discoveredDirs = fast.directories;
110
+ info(`Found ${ctx.allFiles.length} files (cached)`);
111
+ return;
112
+ }
113
+ }
114
+
115
+ const collected = collectFilesUtil(rootDir, [], config, new Set<string>());
116
+ ctx.allFiles = collected.files;
117
+ ctx.discoveredDirs = collected.directories;
118
+ info(`Found ${ctx.allFiles.length} files to parse`);
42
119
  }
@@ -10,7 +10,7 @@ import path from 'node:path';
10
10
  import { closeDb } from '../../../../db/index.js';
11
11
  import { debug, info } from '../../../../infrastructure/logger.js';
12
12
  import { normalizePath } from '../../../../shared/constants.js';
13
- import type { BetterSqlite3Database, ExtractorOutput } from '../../../../types.js';
13
+ import type { BetterSqlite3Database, ExtractorOutput, NativeDatabase } from '../../../../types.js';
14
14
  import { parseFilesAuto } from '../../../parser.js';
15
15
  import { readJournal, writeJournalHeader } from '../../journal.js';
16
16
  import type { PipelineContext } from '../context.js';
@@ -58,10 +58,16 @@ function getChangedFiles(
58
58
  db: BetterSqlite3Database,
59
59
  allFiles: string[],
60
60
  rootDir: string,
61
+ nativeDb?: NativeDatabase,
61
62
  ): ChangeResult {
62
63
  let hasTable = false;
63
64
  try {
64
- db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get();
65
+ if (nativeDb) {
66
+ nativeDb.queryGet('SELECT 1 FROM file_hashes LIMIT 1', []);
67
+ } else {
68
+ db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get();
69
+ }
70
+ // Query succeeded → table exists (result may be undefined if table is empty)
65
71
  hasTable = true;
66
72
  } catch {
67
73
  /* table doesn't exist */
@@ -75,11 +81,11 @@ function getChangedFiles(
75
81
  };
76
82
  }
77
83
 
78
- const existing = new Map<string, FileHashRow>(
79
- (db.prepare('SELECT file, hash, mtime, size FROM file_hashes').all() as FileHashRow[]).map(
80
- (r) => [r.file, r],
81
- ),
82
- );
84
+ const sql = 'SELECT file, hash, mtime, size FROM file_hashes';
85
+ const rows = nativeDb
86
+ ? (nativeDb.queryAll(sql, []) as unknown as FileHashRow[])
87
+ : (db.prepare(sql).all() as FileHashRow[]);
88
+ const existing = new Map<string, FileHashRow>(rows.map((r) => [r.file, r]));
83
89
 
84
90
  const removed = detectRemovedFiles(existing, allFiles, rootDir);
85
91
  const journalResult = tryJournalTier(db, existing, rootDir, removed);
@@ -325,21 +331,33 @@ function purgeAndAddReverseDeps(
325
331
  reverseDeps: Set<string>,
326
332
  ): void {
327
333
  const { db, rootDir } = ctx;
328
- if (changePaths.length > 0 || ctx.removed.length > 0) {
329
- purgeFilesFromGraph(db, [...ctx.removed, ...changePaths], { purgeHashes: false });
330
- }
331
- if (reverseDeps.size > 0) {
332
- const deleteOutgoingEdgesForFile = db.prepare(
333
- 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)',
334
- );
335
- for (const relPath of reverseDeps) {
336
- deleteOutgoingEdgesForFile.run(relPath);
337
- }
338
- for (const relPath of reverseDeps) {
339
- const absPath = path.join(rootDir, relPath);
340
- ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true });
334
+ const hasPurge = changePaths.length > 0 || ctx.removed.length > 0;
335
+ const hasReverseDeps = reverseDeps.size > 0;
336
+ const reverseDepList = hasReverseDeps ? [...reverseDeps] : [];
337
+
338
+ if (hasPurge || hasReverseDeps) {
339
+ const filesToPurge = hasPurge ? [...ctx.removed, ...changePaths] : [];
340
+ // Prefer NativeDatabase: purge + reverse-dep edge deletion in one transaction (#670)
341
+ if (ctx.nativeDb?.purgeFilesData) {
342
+ ctx.nativeDb.purgeFilesData(filesToPurge, false, hasReverseDeps ? reverseDepList : undefined);
343
+ } else {
344
+ if (hasPurge) {
345
+ purgeFilesFromGraph(db, filesToPurge, { purgeHashes: false });
346
+ }
347
+ if (hasReverseDeps) {
348
+ const deleteOutgoingEdgesForFile = db.prepare(
349
+ 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)',
350
+ );
351
+ for (const relPath of reverseDepList) {
352
+ deleteOutgoingEdgesForFile.run(relPath);
353
+ }
354
+ }
341
355
  }
342
356
  }
357
+ for (const relPath of reverseDeps) {
358
+ const absPath = path.join(rootDir, relPath);
359
+ ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true });
360
+ }
343
361
  }
344
362
 
345
363
  function detectHasEmbeddings(db: BetterSqlite3Database): boolean {
@@ -415,7 +433,7 @@ export async function detectChanges(ctx: PipelineContext): Promise<void> {
415
433
  }
416
434
  const increResult =
417
435
  incremental && !forceFullRebuild
418
- ? getChangedFiles(db, allFiles, rootDir)
436
+ ? getChangedFiles(db, allFiles, rootDir, ctx.nativeDb)
419
437
  : {
420
438
  changed: allFiles.map((f): ChangedFile => ({ file: f })),
421
439
  removed: [] as string[],
@@ -6,7 +6,12 @@
6
6
  import { tmpdir } from 'node:os';
7
7
  import path from 'node:path';
8
8
  import { performance } from 'node:perf_hooks';
9
- import { closeDb, closeDbDeferred, getBuildMeta, setBuildMeta } from '../../../../db/index.js';
9
+ import {
10
+ closeDbPair,
11
+ closeDbPairDeferred,
12
+ getBuildMeta,
13
+ setBuildMeta,
14
+ } from '../../../../db/index.js';
10
15
  import { debug, info, warn } from '../../../../infrastructure/logger.js';
11
16
  import { CODEGRAPH_VERSION } from '../../../../shared/version.js';
12
17
  import { writeJournalHeader } from '../../journal.js';
@@ -43,8 +48,12 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
43
48
  // Incremental drift detection — skip for small incremental changes where
44
49
  // count fluctuation is expected (reverse-dep edge churn).
45
50
  if (!isFullBuild && allSymbols.size > 3) {
46
- const prevNodes = getBuildMeta(db, 'node_count');
47
- const prevEdges = getBuildMeta(db, 'edge_count');
51
+ const prevNodes = ctx.nativeDb
52
+ ? ctx.nativeDb.getBuildMeta('node_count')
53
+ : getBuildMeta(db, 'node_count');
54
+ const prevEdges = ctx.nativeDb
55
+ ? ctx.nativeDb.getBuildMeta('edge_count')
56
+ : getBuildMeta(db, 'edge_count');
48
57
  if (prevNodes && prevEdges) {
49
58
  const prevN = Number(prevNodes);
50
59
  const prevE = Number(prevEdges);
@@ -67,17 +76,33 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
67
76
  // built_at is only used by stale-embeddings check (skipped for incremental),
68
77
  // and counts are only used by drift detection (skipped for ≤3 files).
69
78
  // This avoids a transaction commit + WAL fsync (~15-30ms).
70
- if (isFullBuild || allSymbols.size > 5) {
79
+ // Threshold aligned with drift detection gate (allSymbols.size > 3) so stored
80
+ // counts stay fresh whenever drift detection reads them.
81
+ if (isFullBuild || allSymbols.size > 3) {
71
82
  try {
72
- setBuildMeta(db, {
73
- engine: ctx.engineName,
74
- engine_version: ctx.engineVersion || '',
75
- codegraph_version: CODEGRAPH_VERSION,
76
- schema_version: String(schemaVersion),
77
- built_at: buildNow.toISOString(),
78
- node_count: nodeCount,
79
- edge_count: actualEdgeCount,
80
- });
83
+ if (ctx.nativeDb) {
84
+ ctx.nativeDb.setBuildMeta(
85
+ Object.entries({
86
+ engine: ctx.engineName,
87
+ engine_version: ctx.engineVersion || '',
88
+ codegraph_version: CODEGRAPH_VERSION,
89
+ schema_version: String(schemaVersion),
90
+ built_at: buildNow.toISOString(),
91
+ node_count: String(nodeCount),
92
+ edge_count: String(actualEdgeCount),
93
+ }).map(([key, value]) => ({ key, value: String(value) })),
94
+ );
95
+ } else {
96
+ setBuildMeta(db, {
97
+ engine: ctx.engineName,
98
+ engine_version: ctx.engineVersion || '',
99
+ codegraph_version: CODEGRAPH_VERSION,
100
+ schema_version: String(schemaVersion),
101
+ built_at: buildNow.toISOString(),
102
+ node_count: nodeCount,
103
+ edge_count: actualEdgeCount,
104
+ });
105
+ }
81
106
  } catch (err) {
82
107
  warn(`Failed to write build metadata: ${(err as Error).message}`);
83
108
  }
@@ -157,17 +182,22 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
157
182
  }
158
183
  }
159
184
 
185
+ // Intentionally measured before closeDb / writeJournalHeader / auto-registration:
186
+ // for the deferred-close path the close is async (setImmediate), and for full
187
+ // builds the metric captures finalize logic only — DB close cost is tracked
188
+ // separately via timing.closeDbMs when available.
160
189
  ctx.timing.finalizeMs = performance.now() - t0;
161
190
 
162
- // For small incremental builds, defer db.close() to the next event loop tick.
163
- // The WAL checkpoint in db.close() costs ~250ms on Windows NTFS due to fsync.
164
- // Deferring lets buildGraph() return immediately; the checkpoint runs after.
165
- // Skip for temp directories (tests) — they rmSync immediately after build.
191
+ // Close NativeDatabase (fast, ~1ms) then better-sqlite3 (WAL checkpoint).
192
+ // For small incremental builds, defer the expensive WAL checkpoint to the
193
+ // next event loop tick. Skip for temp directories (tests) — they rmSync
194
+ // immediately after build.
195
+ const pair = { db, nativeDb: ctx.nativeDb };
166
196
  const isTempDir = path.resolve(rootDir).startsWith(path.resolve(tmpdir()));
167
197
  if (!isFullBuild && allSymbols.size <= 5 && !isTempDir) {
168
- closeDbDeferred(db);
198
+ closeDbPairDeferred(pair);
169
199
  } else {
170
- closeDb(db);
200
+ closeDbPair(pair);
171
201
  }
172
202
 
173
203
  // Write journal header after successful build
@@ -177,7 +207,6 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
177
207
  // registered during the initial full build. The dynamic import + file I/O
178
208
  // costs ~100ms which dominates incremental finalize time.
179
209
  if (!opts.skipRegistry && isFullBuild) {
180
- const { tmpdir } = await import('node:os');
181
210
  const tmpDir = path.resolve(tmpdir());
182
211
  const resolvedRoot = path.resolve(rootDir);
183
212
  if (resolvedRoot.startsWith(tmpDir)) {