@optave/codegraph 3.4.1 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -21
- package/dist/ast-analysis/rules/javascript.d.ts.map +1 -1
- package/dist/ast-analysis/rules/javascript.js +1 -0
- package/dist/ast-analysis/rules/javascript.js.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.js +103 -35
- package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
- package/dist/db/better-sqlite3.d.ts +3 -0
- package/dist/db/better-sqlite3.d.ts.map +1 -0
- package/dist/db/better-sqlite3.js +19 -0
- package/dist/db/better-sqlite3.js.map +1 -0
- package/dist/db/connection.d.ts +13 -2
- package/dist/db/connection.d.ts.map +1 -1
- package/dist/db/connection.js +76 -2
- package/dist/db/connection.js.map +1 -1
- package/dist/db/index.d.ts +2 -2
- package/dist/db/index.d.ts.map +1 -1
- package/dist/db/index.js +1 -1
- package/dist/db/index.js.map +1 -1
- package/dist/db/migrations.d.ts.map +1 -1
- package/dist/db/migrations.js +2 -0
- package/dist/db/migrations.js.map +1 -1
- package/dist/db/query-builder.d.ts +5 -5
- package/dist/db/query-builder.d.ts.map +1 -1
- package/dist/db/query-builder.js +20 -4
- package/dist/db/query-builder.js.map +1 -1
- package/dist/db/repository/index.d.ts +1 -0
- package/dist/db/repository/index.d.ts.map +1 -1
- package/dist/db/repository/index.js +1 -0
- package/dist/db/repository/index.js.map +1 -1
- package/dist/db/repository/native-repository.d.ts +58 -0
- package/dist/db/repository/native-repository.d.ts.map +1 -0
- package/dist/db/repository/native-repository.js +261 -0
- package/dist/db/repository/native-repository.js.map +1 -0
- package/dist/db/repository/nodes.d.ts +4 -4
- package/dist/db/repository/nodes.d.ts.map +1 -1
- package/dist/db/repository/nodes.js +6 -6
- package/dist/db/repository/nodes.js.map +1 -1
- package/dist/domain/graph/builder/context.d.ts +2 -1
- package/dist/domain/graph/builder/context.d.ts.map +1 -1
- package/dist/domain/graph/builder/context.js +1 -0
- package/dist/domain/graph/builder/context.js.map +1 -1
- package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
- package/dist/domain/graph/builder/pipeline.js +29 -7
- package/dist/domain/graph/builder/pipeline.js.map +1 -1
- package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/build-edges.js +103 -15
- package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
- package/dist/domain/graph/builder/stages/build-structure.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/build-structure.js +33 -5
- package/dist/domain/graph/builder/stages/build-structure.js.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.js +71 -7
- package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.js +36 -14
- package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.js +43 -20
- package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.js +104 -9
- package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
- package/dist/domain/parser.d.ts.map +1 -1
- package/dist/domain/parser.js +1 -0
- package/dist/domain/parser.js.map +1 -1
- package/dist/extractors/javascript.js +53 -38
- package/dist/extractors/javascript.js.map +1 -1
- package/dist/extractors/rust.js +2 -1
- package/dist/extractors/rust.js.map +1 -1
- package/dist/features/ast.d.ts +14 -1
- package/dist/features/ast.d.ts.map +1 -1
- package/dist/features/ast.js +38 -1
- package/dist/features/ast.js.map +1 -1
- package/dist/features/branch-compare.d.ts.map +1 -1
- package/dist/features/branch-compare.js +3 -1
- package/dist/features/branch-compare.js.map +1 -1
- package/dist/features/snapshot.d.ts.map +1 -1
- package/dist/features/snapshot.js +2 -1
- package/dist/features/snapshot.js.map +1 -1
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/mcp/server.js +2 -9
- package/dist/mcp/server.js.map +1 -1
- package/dist/types.d.ts +228 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +61 -11
- package/src/ast-analysis/rules/javascript.ts +1 -0
- package/src/ast-analysis/visitors/ast-store-visitor.ts +102 -33
- package/src/db/better-sqlite3.ts +20 -0
- package/src/db/connection.ts +94 -3
- package/src/db/index.ts +3 -1
- package/src/db/migrations.ts +2 -0
- package/src/db/query-builder.ts +30 -5
- package/src/db/repository/index.ts +1 -0
- package/src/db/repository/native-repository.ts +361 -0
- package/src/db/repository/nodes.ts +7 -3
- package/src/domain/graph/builder/context.ts +2 -0
- package/src/domain/graph/builder/pipeline.ts +30 -6
- package/src/domain/graph/builder/stages/build-edges.ts +117 -19
- package/src/domain/graph/builder/stages/build-structure.ts +47 -11
- package/src/domain/graph/builder/stages/collect-files.ts +84 -7
- package/src/domain/graph/builder/stages/detect-changes.ts +39 -21
- package/src/domain/graph/builder/stages/finalize.ts +49 -20
- package/src/domain/graph/builder/stages/insert-nodes.ts +129 -10
- package/src/domain/parser.ts +1 -0
- package/src/extractors/javascript.ts +54 -36
- package/src/extractors/rust.ts +2 -1
- package/src/features/ast.ts +66 -1
- package/src/features/branch-compare.ts +3 -1
- package/src/features/snapshot.ts +2 -1
- package/src/mcp/server.ts +2 -10
- package/src/types.ts +273 -0
|
@@ -6,8 +6,9 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import path from 'node:path';
|
|
8
8
|
import { performance } from 'node:perf_hooks';
|
|
9
|
-
import { getNodeId } from '
|
|
10
|
-
import {
|
|
9
|
+
import { getNodeId } from '#db/index.js';
|
|
10
|
+
import { debug } from '#infrastructure/logger.js';
|
|
11
|
+
import { loadNative } from '#infrastructure/native.js';
|
|
11
12
|
import type {
|
|
12
13
|
BetterSqlite3Database,
|
|
13
14
|
Call,
|
|
@@ -17,7 +18,7 @@ import type {
|
|
|
17
18
|
NativeAddon,
|
|
18
19
|
NodeRow,
|
|
19
20
|
TypeMapEntry,
|
|
20
|
-
} from '
|
|
21
|
+
} from '#types';
|
|
21
22
|
import { computeConfidence } from '../../resolve.js';
|
|
22
23
|
import type { PipelineContext } from '../context.js';
|
|
23
24
|
import { BUILTIN_RECEIVERS, batchInsertEdges } from '../helpers.js';
|
|
@@ -561,22 +562,94 @@ function buildClassHierarchyEdges(
|
|
|
561
562
|
|
|
562
563
|
// ── Main entry point ────────────────────────────────────────────────────
|
|
563
564
|
|
|
565
|
+
/**
|
|
566
|
+
* For small incremental builds (≤5 changed files on a large codebase), scope
|
|
567
|
+
* the node loading query to only files that are relevant: changed files +
|
|
568
|
+
* their import targets. Falls back to loading ALL nodes for full builds or
|
|
569
|
+
* larger incremental changes.
|
|
570
|
+
*/
|
|
571
|
+
function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolean } {
|
|
572
|
+
const { db, fileSymbols, isFullBuild, batchResolved } = ctx;
|
|
573
|
+
const nodeKindFilter = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`;
|
|
574
|
+
|
|
575
|
+
// Gate: only scope for small incremental on large codebases
|
|
576
|
+
if (!isFullBuild && fileSymbols.size <= 5) {
|
|
577
|
+
const existingFileCount = (
|
|
578
|
+
db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get() as { c: number }
|
|
579
|
+
).c;
|
|
580
|
+
if (existingFileCount > 20) {
|
|
581
|
+
// Collect relevant files: changed files + their import targets
|
|
582
|
+
const relevantFiles = new Set<string>(fileSymbols.keys());
|
|
583
|
+
if (batchResolved) {
|
|
584
|
+
for (const resolvedPath of batchResolved.values()) {
|
|
585
|
+
relevantFiles.add(resolvedPath);
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
// Also add barrel-only files
|
|
589
|
+
for (const barrelPath of ctx.barrelOnlyFiles) {
|
|
590
|
+
relevantFiles.add(barrelPath);
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
const placeholders = [...relevantFiles].map(() => '?').join(',');
|
|
594
|
+
const rows = db
|
|
595
|
+
.prepare(
|
|
596
|
+
`SELECT id, name, kind, file, line FROM nodes WHERE ${nodeKindFilter} AND file IN (${placeholders})`,
|
|
597
|
+
)
|
|
598
|
+
.all(...relevantFiles) as QueryNodeRow[];
|
|
599
|
+
return { rows, scoped: true };
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
const rows = db
|
|
604
|
+
.prepare(`SELECT id, name, kind, file, line FROM nodes WHERE ${nodeKindFilter}`)
|
|
605
|
+
.all() as QueryNodeRow[];
|
|
606
|
+
return { rows, scoped: false };
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
/**
|
|
610
|
+
* For scoped node loading, patch nodesByName.get with a lazy SQL fallback
|
|
611
|
+
* so global name-only lookups (resolveByMethodOrGlobal, supplementReceiverEdges)
|
|
612
|
+
* can still find nodes outside the scoped set.
|
|
613
|
+
*/
|
|
614
|
+
function addLazyFallback(ctx: PipelineContext, scopedLoad: boolean): void {
|
|
615
|
+
if (!scopedLoad) return;
|
|
616
|
+
const { db } = ctx;
|
|
617
|
+
const fallbackStmt = db.prepare(
|
|
618
|
+
`SELECT id, name, kind, file, line FROM nodes WHERE name = ? AND kind != 'file'`,
|
|
619
|
+
);
|
|
620
|
+
const originalGet = ctx.nodesByName.get.bind(ctx.nodesByName);
|
|
621
|
+
ctx.nodesByName.get = (name: string) => {
|
|
622
|
+
const result = originalGet(name);
|
|
623
|
+
if (result !== undefined) return result;
|
|
624
|
+
const rows = fallbackStmt.all(name) as unknown as NodeRow[];
|
|
625
|
+
if (rows.length > 0) {
|
|
626
|
+
ctx.nodesByName.set(name, rows);
|
|
627
|
+
return rows;
|
|
628
|
+
}
|
|
629
|
+
return undefined;
|
|
630
|
+
};
|
|
631
|
+
}
|
|
632
|
+
|
|
564
633
|
export async function buildEdges(ctx: PipelineContext): Promise<void> {
|
|
565
634
|
const { db, engineName } = ctx;
|
|
566
635
|
|
|
567
636
|
const getNodeIdStmt = makeGetNodeIdStmt(db);
|
|
568
637
|
|
|
569
|
-
const
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
)
|
|
573
|
-
.all() as QueryNodeRow[];
|
|
574
|
-
setupNodeLookups(ctx, allNodes);
|
|
638
|
+
const { rows: allNodesBefore, scoped: scopedLoad } = loadNodes(ctx);
|
|
639
|
+
setupNodeLookups(ctx, allNodesBefore);
|
|
640
|
+
addLazyFallback(ctx, scopedLoad);
|
|
575
641
|
|
|
576
642
|
const t0 = performance.now();
|
|
577
|
-
const
|
|
578
|
-
|
|
579
|
-
|
|
643
|
+
const native = engineName === 'native' ? loadNative() : null;
|
|
644
|
+
|
|
645
|
+
// Phase 1: Compute edges inside a better-sqlite3 transaction.
|
|
646
|
+
// Barrel-edge deletion lives here so that the JS path (which also inserts
|
|
647
|
+
// edges in this transaction) keeps deletion + insertion atomic.
|
|
648
|
+
// When using the native rusqlite path, insertion happens in Phase 2 on a
|
|
649
|
+
// separate connection — a crash between Phase 1 and Phase 2 would leave
|
|
650
|
+
// barrel edges missing until the next incremental rebuild re-creates them.
|
|
651
|
+
const allEdgeRows: EdgeRowTuple[] = [];
|
|
652
|
+
const computeEdgesTx = db.transaction(() => {
|
|
580
653
|
if (ctx.barrelOnlyFiles.size > 0) {
|
|
581
654
|
const deleteOutgoingEdges = db.prepare(
|
|
582
655
|
'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)',
|
|
@@ -586,19 +659,44 @@ export async function buildEdges(ctx: PipelineContext): Promise<void> {
|
|
|
586
659
|
}
|
|
587
660
|
}
|
|
588
661
|
|
|
589
|
-
const allEdgeRows: EdgeRowTuple[] = [];
|
|
590
|
-
|
|
591
662
|
buildImportEdges(ctx, getNodeIdStmt, allEdgeRows);
|
|
592
663
|
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
664
|
+
// Skip native call-edge path for small incremental builds (≤3 files):
|
|
665
|
+
// napi-rs marshaling overhead for allNodes exceeds computation savings.
|
|
666
|
+
const useNativeCallEdges =
|
|
667
|
+
native?.buildCallEdges && (ctx.isFullBuild || ctx.fileSymbols.size > 3);
|
|
668
|
+
if (useNativeCallEdges) {
|
|
669
|
+
buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native!);
|
|
596
670
|
} else {
|
|
597
671
|
buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows);
|
|
598
672
|
}
|
|
599
673
|
|
|
600
|
-
|
|
674
|
+
// When using native edge insert, skip JS insert here — do it after tx commits.
|
|
675
|
+
// Otherwise insert edges within this transaction for atomicity.
|
|
676
|
+
const useNativeEdgeInsert = !!ctx.nativeDb?.bulkInsertEdges;
|
|
677
|
+
if (!useNativeEdgeInsert) {
|
|
678
|
+
batchInsertEdges(db, allEdgeRows);
|
|
679
|
+
}
|
|
601
680
|
});
|
|
602
|
-
|
|
681
|
+
computeEdgesTx();
|
|
682
|
+
|
|
683
|
+
// Phase 2: Native rusqlite bulk insert (outside better-sqlite3 transaction
|
|
684
|
+
// to avoid SQLITE_BUSY contention). Uses NativeDatabase persistent connection.
|
|
685
|
+
// Standalone napi functions were removed in 6.17.
|
|
686
|
+
if (ctx.nativeDb?.bulkInsertEdges && allEdgeRows.length > 0) {
|
|
687
|
+
const nativeEdges = allEdgeRows.map((r) => ({
|
|
688
|
+
sourceId: r[0],
|
|
689
|
+
targetId: r[1],
|
|
690
|
+
kind: r[2],
|
|
691
|
+
confidence: r[3],
|
|
692
|
+
dynamic: r[4],
|
|
693
|
+
}));
|
|
694
|
+
const ok = ctx.nativeDb.bulkInsertEdges(nativeEdges);
|
|
695
|
+
if (!ok) {
|
|
696
|
+
debug('Native bulkInsertEdges failed — falling back to JS batchInsertEdges');
|
|
697
|
+
batchInsertEdges(db, allEdgeRows);
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
|
|
603
701
|
ctx.timing.edgesMs = performance.now() - t0;
|
|
604
702
|
}
|
|
@@ -5,9 +5,9 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import path from 'node:path';
|
|
7
7
|
import { performance } from 'node:perf_hooks';
|
|
8
|
-
import { debug } from '
|
|
9
|
-
import { normalizePath } from '
|
|
10
|
-
import type { ExtractorOutput } from '
|
|
8
|
+
import { debug } from '#infrastructure/logger.js';
|
|
9
|
+
import { normalizePath } from '#shared/constants.js';
|
|
10
|
+
import type { ExtractorOutput } from '#types';
|
|
11
11
|
import type { PipelineContext } from '../context.js';
|
|
12
12
|
import { readFileSafe } from '../helpers.js';
|
|
13
13
|
|
|
@@ -40,7 +40,13 @@ export async function buildStructure(ctx: PipelineContext): Promise<void> {
|
|
|
40
40
|
// Gate: ≤5 changed files AND significantly more existing files (>20) to
|
|
41
41
|
// avoid triggering on small test fixtures where directory metrics matter.
|
|
42
42
|
const existingFileCount = !isFullBuild
|
|
43
|
-
? (
|
|
43
|
+
? (
|
|
44
|
+
(ctx.nativeDb
|
|
45
|
+
? ctx.nativeDb.queryGet("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'", [])
|
|
46
|
+
: db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get()) as {
|
|
47
|
+
c: number;
|
|
48
|
+
}
|
|
49
|
+
).c
|
|
44
50
|
: 0;
|
|
45
51
|
const useSmallIncrementalFastPath =
|
|
46
52
|
!isFullBuild &&
|
|
@@ -86,13 +92,43 @@ export async function buildStructure(ctx: PipelineContext): Promise<void> {
|
|
|
86
92
|
// Classify node roles (incremental: only reclassify changed files' nodes)
|
|
87
93
|
const t1 = performance.now();
|
|
88
94
|
try {
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
95
|
+
let roleSummary: Record<string, number> | null = null;
|
|
96
|
+
|
|
97
|
+
// Use NativeDatabase persistent connection (Phase 6.15+).
|
|
98
|
+
// Standalone napi functions were removed in 6.17 — falls through to JS if nativeDb unavailable.
|
|
99
|
+
if (ctx.nativeDb?.classifyRolesFull) {
|
|
100
|
+
const nativeResult =
|
|
101
|
+
changedFileList && changedFileList.length > 0
|
|
102
|
+
? ctx.nativeDb.classifyRolesIncremental(changedFileList)
|
|
103
|
+
: ctx.nativeDb.classifyRolesFull();
|
|
104
|
+
if (nativeResult) {
|
|
105
|
+
roleSummary = {
|
|
106
|
+
entry: nativeResult.entry,
|
|
107
|
+
core: nativeResult.core,
|
|
108
|
+
utility: nativeResult.utility,
|
|
109
|
+
adapter: nativeResult.adapter,
|
|
110
|
+
dead: nativeResult.dead,
|
|
111
|
+
'dead-leaf': nativeResult.deadLeaf,
|
|
112
|
+
'dead-entry': nativeResult.deadEntry,
|
|
113
|
+
'dead-ffi': nativeResult.deadFfi,
|
|
114
|
+
'dead-unresolved': nativeResult.deadUnresolved,
|
|
115
|
+
'test-only': nativeResult.testOnly,
|
|
116
|
+
leaf: nativeResult.leaf,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Fall back to JS path
|
|
122
|
+
if (!roleSummary) {
|
|
123
|
+
const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as {
|
|
124
|
+
classifyNodeRoles: (
|
|
125
|
+
db: PipelineContext['db'],
|
|
126
|
+
changedFiles?: string[] | null,
|
|
127
|
+
) => Record<string, number>;
|
|
128
|
+
};
|
|
129
|
+
roleSummary = classifyNodeRoles(db, changedFileList);
|
|
130
|
+
}
|
|
131
|
+
|
|
96
132
|
debug(
|
|
97
133
|
`Roles${changedFileList ? ` (incremental, ${changedFileList.length} files)` : ''}: ${Object.entries(
|
|
98
134
|
roleSummary,
|
|
@@ -2,14 +2,78 @@
|
|
|
2
2
|
* Stage: collectFiles
|
|
3
3
|
*
|
|
4
4
|
* Collects all source files to process. Handles both normal and scoped rebuilds.
|
|
5
|
+
* For incremental builds with a valid journal, reconstructs the file list from
|
|
6
|
+
* the DB's file_hashes table + journal deltas, skipping the filesystem scan.
|
|
5
7
|
*/
|
|
6
8
|
import fs from 'node:fs';
|
|
7
9
|
import path from 'node:path';
|
|
8
|
-
import { info } from '
|
|
9
|
-
import { normalizePath } from '
|
|
10
|
+
import { debug, info } from '#infrastructure/logger.js';
|
|
11
|
+
import { normalizePath } from '#shared/constants.js';
|
|
12
|
+
import { readJournal } from '../../journal.js';
|
|
10
13
|
import type { PipelineContext } from '../context.js';
|
|
11
14
|
import { collectFiles as collectFilesUtil } from '../helpers.js';
|
|
12
15
|
|
|
16
|
+
/**
|
|
17
|
+
* Reconstruct allFiles from DB file_hashes + journal deltas.
|
|
18
|
+
* Returns null when the fast path isn't applicable (first build, no journal, etc).
|
|
19
|
+
*/
|
|
20
|
+
function tryFastCollect(
|
|
21
|
+
ctx: PipelineContext,
|
|
22
|
+
): { files: string[]; directories: Set<string> } | null {
|
|
23
|
+
const { db, rootDir } = ctx;
|
|
24
|
+
|
|
25
|
+
// 1. Check that file_hashes table exists and has entries
|
|
26
|
+
let dbFileCount: number;
|
|
27
|
+
try {
|
|
28
|
+
dbFileCount = (db.prepare('SELECT COUNT(*) as c FROM file_hashes').get() as { c: number }).c;
|
|
29
|
+
} catch {
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
if (dbFileCount === 0) return null;
|
|
33
|
+
|
|
34
|
+
// 2. Read the journal — only use fast path when journal has entries,
|
|
35
|
+
// proving the watcher was active and tracking changes. An empty-but-valid
|
|
36
|
+
// journal (no watcher) could miss file deletions.
|
|
37
|
+
const journal = readJournal(rootDir);
|
|
38
|
+
if (!journal.valid) return null;
|
|
39
|
+
const hasEntries =
|
|
40
|
+
(journal.changed && journal.changed.length > 0) ||
|
|
41
|
+
(journal.removed && journal.removed.length > 0);
|
|
42
|
+
if (!hasEntries) return null;
|
|
43
|
+
|
|
44
|
+
// 3. Load existing file list from file_hashes (relative paths)
|
|
45
|
+
const dbFiles = (db.prepare('SELECT file FROM file_hashes').all() as Array<{ file: string }>).map(
|
|
46
|
+
(r) => r.file,
|
|
47
|
+
);
|
|
48
|
+
|
|
49
|
+
// 4. Apply journal deltas: remove deleted files, add new/changed files
|
|
50
|
+
const fileSet = new Set(dbFiles);
|
|
51
|
+
if (journal.removed) {
|
|
52
|
+
for (const removed of journal.removed) {
|
|
53
|
+
fileSet.delete(removed);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
if (journal.changed) {
|
|
57
|
+
for (const changed of journal.changed) {
|
|
58
|
+
fileSet.add(changed);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// 5. Convert to absolute paths and compute directories
|
|
63
|
+
const files: string[] = [];
|
|
64
|
+
const directories = new Set<string>();
|
|
65
|
+
for (const relPath of fileSet) {
|
|
66
|
+
const absPath = path.join(rootDir, relPath);
|
|
67
|
+
files.push(absPath);
|
|
68
|
+
directories.add(path.dirname(absPath));
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
debug(
|
|
72
|
+
`collectFiles fast path: ${dbFiles.length} from DB, journal: +${journal.changed?.length ?? 0}/-${journal.removed?.length ?? 0} → ${files.length} files`,
|
|
73
|
+
);
|
|
74
|
+
return { files, directories };
|
|
75
|
+
}
|
|
76
|
+
|
|
13
77
|
export async function collectFiles(ctx: PipelineContext): Promise<void> {
|
|
14
78
|
const { rootDir, config, opts } = ctx;
|
|
15
79
|
|
|
@@ -33,10 +97,23 @@ export async function collectFiles(ctx: PipelineContext): Promise<void> {
|
|
|
33
97
|
ctx.removed = missing;
|
|
34
98
|
ctx.isFullBuild = false;
|
|
35
99
|
info(`Scoped rebuild: ${existing.length} files to rebuild, ${missing.length} to purge`);
|
|
36
|
-
|
|
37
|
-
const collected = collectFilesUtil(rootDir, [], config, new Set<string>());
|
|
38
|
-
ctx.allFiles = collected.files;
|
|
39
|
-
ctx.discoveredDirs = collected.directories;
|
|
40
|
-
info(`Found ${ctx.allFiles.length} files to parse`);
|
|
100
|
+
return;
|
|
41
101
|
}
|
|
102
|
+
|
|
103
|
+
// Incremental fast path: reconstruct file list from DB + journal deltas
|
|
104
|
+
// instead of full recursive filesystem scan (~8ms savings on 473 files).
|
|
105
|
+
if (ctx.incremental && !ctx.forceFullRebuild) {
|
|
106
|
+
const fast = tryFastCollect(ctx);
|
|
107
|
+
if (fast) {
|
|
108
|
+
ctx.allFiles = fast.files;
|
|
109
|
+
ctx.discoveredDirs = fast.directories;
|
|
110
|
+
info(`Found ${ctx.allFiles.length} files (cached)`);
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const collected = collectFilesUtil(rootDir, [], config, new Set<string>());
|
|
116
|
+
ctx.allFiles = collected.files;
|
|
117
|
+
ctx.discoveredDirs = collected.directories;
|
|
118
|
+
info(`Found ${ctx.allFiles.length} files to parse`);
|
|
42
119
|
}
|
|
@@ -10,7 +10,7 @@ import path from 'node:path';
|
|
|
10
10
|
import { closeDb } from '../../../../db/index.js';
|
|
11
11
|
import { debug, info } from '../../../../infrastructure/logger.js';
|
|
12
12
|
import { normalizePath } from '../../../../shared/constants.js';
|
|
13
|
-
import type { BetterSqlite3Database, ExtractorOutput } from '../../../../types.js';
|
|
13
|
+
import type { BetterSqlite3Database, ExtractorOutput, NativeDatabase } from '../../../../types.js';
|
|
14
14
|
import { parseFilesAuto } from '../../../parser.js';
|
|
15
15
|
import { readJournal, writeJournalHeader } from '../../journal.js';
|
|
16
16
|
import type { PipelineContext } from '../context.js';
|
|
@@ -58,10 +58,16 @@ function getChangedFiles(
|
|
|
58
58
|
db: BetterSqlite3Database,
|
|
59
59
|
allFiles: string[],
|
|
60
60
|
rootDir: string,
|
|
61
|
+
nativeDb?: NativeDatabase,
|
|
61
62
|
): ChangeResult {
|
|
62
63
|
let hasTable = false;
|
|
63
64
|
try {
|
|
64
|
-
|
|
65
|
+
if (nativeDb) {
|
|
66
|
+
nativeDb.queryGet('SELECT 1 FROM file_hashes LIMIT 1', []);
|
|
67
|
+
} else {
|
|
68
|
+
db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get();
|
|
69
|
+
}
|
|
70
|
+
// Query succeeded → table exists (result may be undefined if table is empty)
|
|
65
71
|
hasTable = true;
|
|
66
72
|
} catch {
|
|
67
73
|
/* table doesn't exist */
|
|
@@ -75,11 +81,11 @@ function getChangedFiles(
|
|
|
75
81
|
};
|
|
76
82
|
}
|
|
77
83
|
|
|
78
|
-
const
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
)
|
|
82
|
-
);
|
|
84
|
+
const sql = 'SELECT file, hash, mtime, size FROM file_hashes';
|
|
85
|
+
const rows = nativeDb
|
|
86
|
+
? (nativeDb.queryAll(sql, []) as unknown as FileHashRow[])
|
|
87
|
+
: (db.prepare(sql).all() as FileHashRow[]);
|
|
88
|
+
const existing = new Map<string, FileHashRow>(rows.map((r) => [r.file, r]));
|
|
83
89
|
|
|
84
90
|
const removed = detectRemovedFiles(existing, allFiles, rootDir);
|
|
85
91
|
const journalResult = tryJournalTier(db, existing, rootDir, removed);
|
|
@@ -325,21 +331,33 @@ function purgeAndAddReverseDeps(
|
|
|
325
331
|
reverseDeps: Set<string>,
|
|
326
332
|
): void {
|
|
327
333
|
const { db, rootDir } = ctx;
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
)
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
334
|
+
const hasPurge = changePaths.length > 0 || ctx.removed.length > 0;
|
|
335
|
+
const hasReverseDeps = reverseDeps.size > 0;
|
|
336
|
+
const reverseDepList = hasReverseDeps ? [...reverseDeps] : [];
|
|
337
|
+
|
|
338
|
+
if (hasPurge || hasReverseDeps) {
|
|
339
|
+
const filesToPurge = hasPurge ? [...ctx.removed, ...changePaths] : [];
|
|
340
|
+
// Prefer NativeDatabase: purge + reverse-dep edge deletion in one transaction (#670)
|
|
341
|
+
if (ctx.nativeDb?.purgeFilesData) {
|
|
342
|
+
ctx.nativeDb.purgeFilesData(filesToPurge, false, hasReverseDeps ? reverseDepList : undefined);
|
|
343
|
+
} else {
|
|
344
|
+
if (hasPurge) {
|
|
345
|
+
purgeFilesFromGraph(db, filesToPurge, { purgeHashes: false });
|
|
346
|
+
}
|
|
347
|
+
if (hasReverseDeps) {
|
|
348
|
+
const deleteOutgoingEdgesForFile = db.prepare(
|
|
349
|
+
'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)',
|
|
350
|
+
);
|
|
351
|
+
for (const relPath of reverseDepList) {
|
|
352
|
+
deleteOutgoingEdgesForFile.run(relPath);
|
|
353
|
+
}
|
|
354
|
+
}
|
|
341
355
|
}
|
|
342
356
|
}
|
|
357
|
+
for (const relPath of reverseDeps) {
|
|
358
|
+
const absPath = path.join(rootDir, relPath);
|
|
359
|
+
ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true });
|
|
360
|
+
}
|
|
343
361
|
}
|
|
344
362
|
|
|
345
363
|
function detectHasEmbeddings(db: BetterSqlite3Database): boolean {
|
|
@@ -415,7 +433,7 @@ export async function detectChanges(ctx: PipelineContext): Promise<void> {
|
|
|
415
433
|
}
|
|
416
434
|
const increResult =
|
|
417
435
|
incremental && !forceFullRebuild
|
|
418
|
-
? getChangedFiles(db, allFiles, rootDir)
|
|
436
|
+
? getChangedFiles(db, allFiles, rootDir, ctx.nativeDb)
|
|
419
437
|
: {
|
|
420
438
|
changed: allFiles.map((f): ChangedFile => ({ file: f })),
|
|
421
439
|
removed: [] as string[],
|
|
@@ -6,7 +6,12 @@
|
|
|
6
6
|
import { tmpdir } from 'node:os';
|
|
7
7
|
import path from 'node:path';
|
|
8
8
|
import { performance } from 'node:perf_hooks';
|
|
9
|
-
import {
|
|
9
|
+
import {
|
|
10
|
+
closeDbPair,
|
|
11
|
+
closeDbPairDeferred,
|
|
12
|
+
getBuildMeta,
|
|
13
|
+
setBuildMeta,
|
|
14
|
+
} from '../../../../db/index.js';
|
|
10
15
|
import { debug, info, warn } from '../../../../infrastructure/logger.js';
|
|
11
16
|
import { CODEGRAPH_VERSION } from '../../../../shared/version.js';
|
|
12
17
|
import { writeJournalHeader } from '../../journal.js';
|
|
@@ -43,8 +48,12 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
|
|
|
43
48
|
// Incremental drift detection — skip for small incremental changes where
|
|
44
49
|
// count fluctuation is expected (reverse-dep edge churn).
|
|
45
50
|
if (!isFullBuild && allSymbols.size > 3) {
|
|
46
|
-
const prevNodes =
|
|
47
|
-
|
|
51
|
+
const prevNodes = ctx.nativeDb
|
|
52
|
+
? ctx.nativeDb.getBuildMeta('node_count')
|
|
53
|
+
: getBuildMeta(db, 'node_count');
|
|
54
|
+
const prevEdges = ctx.nativeDb
|
|
55
|
+
? ctx.nativeDb.getBuildMeta('edge_count')
|
|
56
|
+
: getBuildMeta(db, 'edge_count');
|
|
48
57
|
if (prevNodes && prevEdges) {
|
|
49
58
|
const prevN = Number(prevNodes);
|
|
50
59
|
const prevE = Number(prevEdges);
|
|
@@ -67,17 +76,33 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
|
|
|
67
76
|
// built_at is only used by stale-embeddings check (skipped for incremental),
|
|
68
77
|
// and counts are only used by drift detection (skipped for ≤3 files).
|
|
69
78
|
// This avoids a transaction commit + WAL fsync (~15-30ms).
|
|
70
|
-
|
|
79
|
+
// Threshold aligned with drift detection gate (allSymbols.size > 3) so stored
|
|
80
|
+
// counts stay fresh whenever drift detection reads them.
|
|
81
|
+
if (isFullBuild || allSymbols.size > 3) {
|
|
71
82
|
try {
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
83
|
+
if (ctx.nativeDb) {
|
|
84
|
+
ctx.nativeDb.setBuildMeta(
|
|
85
|
+
Object.entries({
|
|
86
|
+
engine: ctx.engineName,
|
|
87
|
+
engine_version: ctx.engineVersion || '',
|
|
88
|
+
codegraph_version: CODEGRAPH_VERSION,
|
|
89
|
+
schema_version: String(schemaVersion),
|
|
90
|
+
built_at: buildNow.toISOString(),
|
|
91
|
+
node_count: String(nodeCount),
|
|
92
|
+
edge_count: String(actualEdgeCount),
|
|
93
|
+
}).map(([key, value]) => ({ key, value: String(value) })),
|
|
94
|
+
);
|
|
95
|
+
} else {
|
|
96
|
+
setBuildMeta(db, {
|
|
97
|
+
engine: ctx.engineName,
|
|
98
|
+
engine_version: ctx.engineVersion || '',
|
|
99
|
+
codegraph_version: CODEGRAPH_VERSION,
|
|
100
|
+
schema_version: String(schemaVersion),
|
|
101
|
+
built_at: buildNow.toISOString(),
|
|
102
|
+
node_count: nodeCount,
|
|
103
|
+
edge_count: actualEdgeCount,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
81
106
|
} catch (err) {
|
|
82
107
|
warn(`Failed to write build metadata: ${(err as Error).message}`);
|
|
83
108
|
}
|
|
@@ -157,17 +182,22 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
|
|
|
157
182
|
}
|
|
158
183
|
}
|
|
159
184
|
|
|
185
|
+
// Intentionally measured before closeDb / writeJournalHeader / auto-registration:
|
|
186
|
+
// for the deferred-close path the close is async (setImmediate), and for full
|
|
187
|
+
// builds the metric captures finalize logic only — DB close cost is tracked
|
|
188
|
+
// separately via timing.closeDbMs when available.
|
|
160
189
|
ctx.timing.finalizeMs = performance.now() - t0;
|
|
161
190
|
|
|
162
|
-
//
|
|
163
|
-
//
|
|
164
|
-
//
|
|
165
|
-
//
|
|
191
|
+
// Close NativeDatabase (fast, ~1ms) then better-sqlite3 (WAL checkpoint).
|
|
192
|
+
// For small incremental builds, defer the expensive WAL checkpoint to the
|
|
193
|
+
// next event loop tick. Skip for temp directories (tests) — they rmSync
|
|
194
|
+
// immediately after build.
|
|
195
|
+
const pair = { db, nativeDb: ctx.nativeDb };
|
|
166
196
|
const isTempDir = path.resolve(rootDir).startsWith(path.resolve(tmpdir()));
|
|
167
197
|
if (!isFullBuild && allSymbols.size <= 5 && !isTempDir) {
|
|
168
|
-
|
|
198
|
+
closeDbPairDeferred(pair);
|
|
169
199
|
} else {
|
|
170
|
-
|
|
200
|
+
closeDbPair(pair);
|
|
171
201
|
}
|
|
172
202
|
|
|
173
203
|
// Write journal header after successful build
|
|
@@ -177,7 +207,6 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
|
|
|
177
207
|
// registered during the initial full build. The dynamic import + file I/O
|
|
178
208
|
// costs ~100ms which dominates incremental finalize time.
|
|
179
209
|
if (!opts.skipRegistry && isFullBuild) {
|
|
180
|
-
const { tmpdir } = await import('node:os');
|
|
181
210
|
const tmpDir = path.resolve(tmpdir());
|
|
182
211
|
const resolvedRoot = path.resolve(rootDir);
|
|
183
212
|
if (resolvedRoot.startsWith(tmpDir)) {
|