@optave/codegraph 3.9.4 → 3.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -10
- package/dist/cli/commands/watch.d.ts.map +1 -1
- package/dist/cli/commands/watch.js +2 -0
- package/dist/cli/commands/watch.js.map +1 -1
- package/dist/cli.js +24 -1
- package/dist/cli.js.map +1 -1
- package/dist/domain/graph/builder/context.d.ts +2 -0
- package/dist/domain/graph/builder/context.d.ts.map +1 -1
- package/dist/domain/graph/builder/context.js.map +1 -1
- package/dist/domain/graph/builder/helpers.d.ts +13 -2
- package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
- package/dist/domain/graph/builder/helpers.js +30 -4
- package/dist/domain/graph/builder/helpers.js.map +1 -1
- package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
- package/dist/domain/graph/builder/pipeline.js +129 -3
- package/dist/domain/graph/builder/pipeline.js.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/collect-files.js +58 -26
- package/dist/domain/graph/builder/stages/collect-files.js.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.js +54 -45
- package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.js +17 -0
- package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
- package/dist/domain/graph/journal.d.ts +15 -0
- package/dist/domain/graph/journal.d.ts.map +1 -1
- package/dist/domain/graph/journal.js +283 -28
- package/dist/domain/graph/journal.js.map +1 -1
- package/dist/domain/graph/watcher.d.ts +17 -0
- package/dist/domain/graph/watcher.d.ts.map +1 -1
- package/dist/domain/graph/watcher.js +23 -7
- package/dist/domain/graph/watcher.js.map +1 -1
- package/dist/domain/parser.d.ts +13 -4
- package/dist/domain/parser.d.ts.map +1 -1
- package/dist/domain/parser.js +174 -80
- package/dist/domain/parser.js.map +1 -1
- package/dist/domain/search/generator.d.ts.map +1 -1
- package/dist/domain/search/generator.js +28 -2
- package/dist/domain/search/generator.js.map +1 -1
- package/dist/domain/wasm-worker-entry.d.ts +24 -0
- package/dist/domain/wasm-worker-entry.d.ts.map +1 -0
- package/dist/domain/wasm-worker-entry.js +643 -0
- package/dist/domain/wasm-worker-entry.js.map +1 -0
- package/dist/domain/wasm-worker-pool.d.ts +59 -0
- package/dist/domain/wasm-worker-pool.d.ts.map +1 -0
- package/dist/domain/wasm-worker-pool.js +312 -0
- package/dist/domain/wasm-worker-pool.js.map +1 -0
- package/dist/domain/wasm-worker-protocol.d.ts +65 -0
- package/dist/domain/wasm-worker-protocol.d.ts.map +1 -0
- package/dist/domain/wasm-worker-protocol.js +13 -0
- package/dist/domain/wasm-worker-protocol.js.map +1 -0
- package/dist/extractors/javascript.js +146 -2
- package/dist/extractors/javascript.js.map +1 -1
- package/dist/features/boundaries.d.ts +2 -2
- package/dist/features/boundaries.d.ts.map +1 -1
- package/dist/features/boundaries.js +2 -31
- package/dist/features/boundaries.js.map +1 -1
- package/dist/features/snapshot.d.ts.map +1 -1
- package/dist/features/snapshot.js +99 -13
- package/dist/features/snapshot.js.map +1 -1
- package/dist/graph/algorithms/louvain.d.ts.map +1 -1
- package/dist/graph/algorithms/louvain.js +2 -4
- package/dist/graph/algorithms/louvain.js.map +1 -1
- package/dist/infrastructure/config.d.ts.map +1 -1
- package/dist/infrastructure/config.js +12 -2
- package/dist/infrastructure/config.js.map +1 -1
- package/dist/shared/globs.d.ts +40 -0
- package/dist/shared/globs.d.ts.map +1 -0
- package/dist/shared/globs.js +126 -0
- package/dist/shared/globs.js.map +1 -0
- package/dist/types.d.ts +26 -1
- package/dist/types.d.ts.map +1 -1
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/package.json +7 -7
- package/src/cli/commands/watch.ts +2 -0
- package/src/cli.ts +31 -8
- package/src/domain/graph/builder/context.ts +2 -0
- package/src/domain/graph/builder/helpers.ts +53 -3
- package/src/domain/graph/builder/pipeline.ts +142 -3
- package/src/domain/graph/builder/stages/collect-files.ts +56 -26
- package/src/domain/graph/builder/stages/detect-changes.ts +57 -49
- package/src/domain/graph/builder/stages/finalize.ts +16 -0
- package/src/domain/graph/journal.ts +284 -27
- package/src/domain/graph/watcher.ts +29 -9
- package/src/domain/parser.ts +166 -73
- package/src/domain/search/generator.ts +34 -2
- package/src/domain/wasm-worker-entry.ts +788 -0
- package/src/domain/wasm-worker-pool.ts +330 -0
- package/src/domain/wasm-worker-protocol.ts +81 -0
- package/src/extractors/javascript.ts +149 -2
- package/src/features/boundaries.ts +2 -27
- package/src/features/snapshot.ts +93 -14
- package/src/graph/algorithms/louvain.ts +2 -4
- package/src/infrastructure/config.ts +12 -2
- package/src/shared/globs.ts +121 -0
- package/src/types.ts +26 -1
package/src/cli.ts
CHANGED
|
@@ -1,14 +1,37 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
import { run } from './cli/index.js';
|
|
4
|
+
import { disposeParsers } from './domain/parser.js';
|
|
4
5
|
import { CodegraphError, toErrorMessage } from './shared/errors.js';
|
|
5
6
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
/**
|
|
8
|
+
* After the CLI command finishes, tear down any cached WASM parsers and the
|
|
9
|
+
* worker thread pool. The WASM parse worker (see `domain/wasm-worker-pool.ts`)
|
|
10
|
+
* keeps the event loop alive until `worker.terminate()` is called, so without
|
|
11
|
+
* this teardown short-lived commands like `codegraph build` would hang for
|
|
12
|
+
* minutes before Node gives up — surfacing in CI as `spawnSync ETIMEDOUT`
|
|
13
|
+
* even though the command's work is already complete.
|
|
14
|
+
*
|
|
15
|
+
* `disposeParsers` is safe to call when the pool was never instantiated
|
|
16
|
+
* (e.g. native engine, or commands that never parse): it no-ops cleanly.
|
|
17
|
+
*/
|
|
18
|
+
async function shutdown(): Promise<void> {
|
|
19
|
+
try {
|
|
20
|
+
await disposeParsers();
|
|
21
|
+
} catch {
|
|
22
|
+
/* don't mask the real exit status over a teardown failure */
|
|
12
23
|
}
|
|
13
|
-
|
|
14
|
-
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
run()
|
|
27
|
+
.then(shutdown)
|
|
28
|
+
.catch(async (err: unknown) => {
|
|
29
|
+
if (err instanceof CodegraphError) {
|
|
30
|
+
console.error(`codegraph [${err.code}]: ${err.message}`);
|
|
31
|
+
if (err.file) console.error(` file: ${err.file}`);
|
|
32
|
+
} else {
|
|
33
|
+
console.error(`codegraph: fatal error — ${toErrorMessage(err)}`);
|
|
34
|
+
}
|
|
35
|
+
await shutdown();
|
|
36
|
+
process.exit(1);
|
|
37
|
+
});
|
|
@@ -8,7 +8,8 @@ import fs from 'node:fs';
|
|
|
8
8
|
import path from 'node:path';
|
|
9
9
|
import { purgeFilesData } from '../../../db/index.js';
|
|
10
10
|
import { warn } from '../../../infrastructure/logger.js';
|
|
11
|
-
import { EXTENSIONS, IGNORE_DIRS } from '../../../shared/constants.js';
|
|
11
|
+
import { EXTENSIONS, IGNORE_DIRS, normalizePath } from '../../../shared/constants.js';
|
|
12
|
+
import { compileGlobs, matchesAny } from '../../../shared/globs.js';
|
|
12
13
|
import type {
|
|
13
14
|
BetterSqlite3Database,
|
|
14
15
|
CodegraphConfig,
|
|
@@ -58,9 +59,29 @@ function shouldSkipEntry(entry: fs.Dirent, extraIgnore: Set<string> | null): boo
|
|
|
58
59
|
return false;
|
|
59
60
|
}
|
|
60
61
|
|
|
62
|
+
/**
|
|
63
|
+
* Check whether a source file passes the configured include/exclude globs.
|
|
64
|
+
*
|
|
65
|
+
* Patterns are matched against the path relative to the project root,
|
|
66
|
+
* normalized to forward slashes (e.g. `src/foo/bar.ts`). When both lists
|
|
67
|
+
* are set, a file must match at least one include and no exclude.
|
|
68
|
+
*/
|
|
69
|
+
export function passesIncludeExclude(
|
|
70
|
+
relPath: string,
|
|
71
|
+
includeRegexes: readonly RegExp[],
|
|
72
|
+
excludeRegexes: readonly RegExp[],
|
|
73
|
+
): boolean {
|
|
74
|
+
if (includeRegexes.length > 0 && !matchesAny(includeRegexes, relPath)) return false;
|
|
75
|
+
if (excludeRegexes.length > 0 && matchesAny(excludeRegexes, relPath)) return false;
|
|
76
|
+
return true;
|
|
77
|
+
}
|
|
78
|
+
|
|
61
79
|
/**
|
|
62
80
|
* Recursively collect all source files under `dir`.
|
|
63
81
|
* When `directories` is a Set, also tracks which directories contain files.
|
|
82
|
+
*
|
|
83
|
+
* The first invocation establishes `dir` as the project root against which
|
|
84
|
+
* `config.include` / `config.exclude` globs are matched.
|
|
64
85
|
*/
|
|
65
86
|
export function collectFiles(
|
|
66
87
|
dir: string,
|
|
@@ -68,6 +89,9 @@ export function collectFiles(
|
|
|
68
89
|
config: Partial<CodegraphConfig>,
|
|
69
90
|
directories: Set<string>,
|
|
70
91
|
_visited?: Set<string>,
|
|
92
|
+
_rootDir?: string,
|
|
93
|
+
_includeRegexes?: readonly RegExp[],
|
|
94
|
+
_excludeRegexes?: readonly RegExp[],
|
|
71
95
|
): { files: string[]; directories: Set<string> };
|
|
72
96
|
export function collectFiles(
|
|
73
97
|
dir: string,
|
|
@@ -75,6 +99,9 @@ export function collectFiles(
|
|
|
75
99
|
config?: Partial<CodegraphConfig>,
|
|
76
100
|
directories?: null,
|
|
77
101
|
_visited?: Set<string>,
|
|
102
|
+
_rootDir?: string,
|
|
103
|
+
_includeRegexes?: readonly RegExp[],
|
|
104
|
+
_excludeRegexes?: readonly RegExp[],
|
|
78
105
|
): string[];
|
|
79
106
|
export function collectFiles(
|
|
80
107
|
dir: string,
|
|
@@ -82,10 +109,20 @@ export function collectFiles(
|
|
|
82
109
|
config: Partial<CodegraphConfig> = {},
|
|
83
110
|
directories: Set<string> | null = null,
|
|
84
111
|
_visited: Set<string> = new Set(),
|
|
112
|
+
_rootDir?: string,
|
|
113
|
+
_includeRegexes?: readonly RegExp[],
|
|
114
|
+
_excludeRegexes?: readonly RegExp[],
|
|
85
115
|
): string[] | { files: string[]; directories: Set<string> } {
|
|
86
116
|
const trackDirs = directories instanceof Set;
|
|
87
117
|
let hasFiles = false;
|
|
88
118
|
|
|
119
|
+
// First call: compute root and compile include/exclude patterns once,
|
|
120
|
+
// then pass them down recursive calls so we don't recompile per directory.
|
|
121
|
+
const rootDir = _rootDir ?? dir;
|
|
122
|
+
const includeRegexes = _includeRegexes ?? compileGlobs(config.include);
|
|
123
|
+
const excludeRegexes = _excludeRegexes ?? compileGlobs(config.exclude);
|
|
124
|
+
const hasGlobFilters = includeRegexes.length > 0 || excludeRegexes.length > 0;
|
|
125
|
+
|
|
89
126
|
// Merge config ignoreDirs with defaults
|
|
90
127
|
const extraIgnore = config.ignoreDirs ? new Set(config.ignoreDirs) : null;
|
|
91
128
|
|
|
@@ -116,11 +153,24 @@ export function collectFiles(
|
|
|
116
153
|
const full = path.join(dir, entry.name);
|
|
117
154
|
if (entry.isDirectory()) {
|
|
118
155
|
if (trackDirs) {
|
|
119
|
-
collectFiles(
|
|
156
|
+
collectFiles(
|
|
157
|
+
full,
|
|
158
|
+
files,
|
|
159
|
+
config,
|
|
160
|
+
directories as Set<string>,
|
|
161
|
+
_visited,
|
|
162
|
+
rootDir,
|
|
163
|
+
includeRegexes,
|
|
164
|
+
excludeRegexes,
|
|
165
|
+
);
|
|
120
166
|
} else {
|
|
121
|
-
collectFiles(full, files, config, null, _visited);
|
|
167
|
+
collectFiles(full, files, config, null, _visited, rootDir, includeRegexes, excludeRegexes);
|
|
122
168
|
}
|
|
123
169
|
} else if (EXTENSIONS.has(path.extname(entry.name))) {
|
|
170
|
+
if (hasGlobFilters) {
|
|
171
|
+
const rel = normalizePath(path.relative(rootDir, full));
|
|
172
|
+
if (!passesIncludeExclude(rel, includeRegexes, excludeRegexes)) continue;
|
|
173
|
+
}
|
|
124
174
|
files.push(full);
|
|
125
175
|
hasFiles = true;
|
|
126
176
|
}
|
|
@@ -21,6 +21,7 @@ import { detectWorkspaces, loadConfig } from '../../../infrastructure/config.js'
|
|
|
21
21
|
import { debug, info, warn } from '../../../infrastructure/logger.js';
|
|
22
22
|
import { loadNative } from '../../../infrastructure/native.js';
|
|
23
23
|
import { semverCompare } from '../../../infrastructure/update-check.js';
|
|
24
|
+
import { normalizePath } from '../../../shared/constants.js';
|
|
24
25
|
import { toErrorMessage } from '../../../shared/errors.js';
|
|
25
26
|
import { CODEGRAPH_VERSION } from '../../../shared/version.js';
|
|
26
27
|
import type {
|
|
@@ -29,11 +30,12 @@ import type {
|
|
|
29
30
|
BuildResult,
|
|
30
31
|
Definition,
|
|
31
32
|
ExtractorOutput,
|
|
33
|
+
SqliteStatement,
|
|
32
34
|
} from '../../../types.js';
|
|
33
|
-
import { getActiveEngine } from '../../parser.js';
|
|
35
|
+
import { getActiveEngine, getInstalledWasmExtensions, parseFilesAuto } from '../../parser.js';
|
|
34
36
|
import { setWorkspaces } from '../resolve.js';
|
|
35
37
|
import { PipelineContext } from './context.js';
|
|
36
|
-
import { loadPathAliases } from './helpers.js';
|
|
38
|
+
import { batchInsertNodes, collectFiles as collectFilesUtil, loadPathAliases } from './helpers.js';
|
|
37
39
|
import { NativeDbProxy } from './native-db-proxy.js';
|
|
38
40
|
import { buildEdges } from './stages/build-edges.js';
|
|
39
41
|
import { buildStructure } from './stages/build-structure.js';
|
|
@@ -104,6 +106,21 @@ function checkEngineSchemaMismatch(ctx: PipelineContext): void {
|
|
|
104
106
|
}
|
|
105
107
|
}
|
|
106
108
|
|
|
109
|
+
function warnOnEmbeddingsWipe(ctx: PipelineContext): void {
|
|
110
|
+
const willBeFullBuild = !ctx.incremental || ctx.forceFullRebuild;
|
|
111
|
+
if (!willBeFullBuild) return;
|
|
112
|
+
let count = 0;
|
|
113
|
+
try {
|
|
114
|
+
count = (ctx.db.prepare('SELECT COUNT(*) AS c FROM embeddings').get() as { c: number }).c;
|
|
115
|
+
} catch {
|
|
116
|
+
return; // embeddings table missing — nothing to warn about
|
|
117
|
+
}
|
|
118
|
+
if (count === 0) return;
|
|
119
|
+
warn(
|
|
120
|
+
`Full rebuild will discard ${count} embedding${count === 1 ? '' : 's'}; re-run \`codegraph embed\` after the build.`,
|
|
121
|
+
);
|
|
122
|
+
}
|
|
123
|
+
|
|
107
124
|
function loadAliases(ctx: PipelineContext): void {
|
|
108
125
|
ctx.aliases = loadPathAliases(ctx.rootDir);
|
|
109
126
|
if (ctx.config.aliases) {
|
|
@@ -149,6 +166,7 @@ function setupPipeline(ctx: PipelineContext): void {
|
|
|
149
166
|
|
|
150
167
|
initializeEngine(ctx);
|
|
151
168
|
checkEngineSchemaMismatch(ctx);
|
|
169
|
+
warnOnEmbeddingsWipe(ctx);
|
|
152
170
|
loadAliases(ctx);
|
|
153
171
|
|
|
154
172
|
// Workspace packages (monorepo)
|
|
@@ -166,6 +184,8 @@ function formatTimingResult(ctx: PipelineContext): BuildResult {
|
|
|
166
184
|
return {
|
|
167
185
|
phases: {
|
|
168
186
|
setupMs: +(t.setupMs ?? 0).toFixed(1),
|
|
187
|
+
collectMs: +(t.collectMs ?? 0).toFixed(1),
|
|
188
|
+
detectMs: +(t.detectMs ?? 0).toFixed(1),
|
|
169
189
|
parseMs: +(t.parseMs ?? 0).toFixed(1),
|
|
170
190
|
insertMs: +(t.insertMs ?? 0).toFixed(1),
|
|
171
191
|
resolveMs: +(t.resolveMs ?? 0).toFixed(1),
|
|
@@ -540,7 +560,9 @@ function formatNativeTimingResult(
|
|
|
540
560
|
): BuildResult {
|
|
541
561
|
return {
|
|
542
562
|
phases: {
|
|
543
|
-
setupMs: +(
|
|
563
|
+
setupMs: +(p.setupMs ?? 0).toFixed(1),
|
|
564
|
+
collectMs: +(p.collectMs ?? 0).toFixed(1),
|
|
565
|
+
detectMs: +(p.detectMs ?? 0).toFixed(1),
|
|
544
566
|
parseMs: +(p.parseMs ?? 0).toFixed(1),
|
|
545
567
|
insertMs: +(p.insertMs ?? 0).toFixed(1),
|
|
546
568
|
resolveMs: +(p.resolveMs ?? 0).toFixed(1),
|
|
@@ -696,10 +718,123 @@ async function tryNativeOrchestrator(
|
|
|
696
718
|
}
|
|
697
719
|
}
|
|
698
720
|
|
|
721
|
+
// Engine parity: the native orchestrator silently drops files whose
|
|
722
|
+
// Rust extractor/grammar is missing or fails (e.g. HCL, Scala, Swift on
|
|
723
|
+
// stale native binaries). WASM handles those — backfill via WASM so both
|
|
724
|
+
// engines process the same file set (#967).
|
|
725
|
+
//
|
|
726
|
+
// Only runs on full builds: incremental builds only touch changed files,
|
|
727
|
+
// which are parsed through parseFilesAuto (which has its own per-file
|
|
728
|
+
// backfill), so a full filesystem scan here would be wasted work.
|
|
729
|
+
if (result.isFullBuild) {
|
|
730
|
+
await backfillNativeDroppedFiles(ctx);
|
|
731
|
+
}
|
|
732
|
+
|
|
699
733
|
closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
|
|
700
734
|
return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
|
|
701
735
|
}
|
|
702
736
|
|
|
737
|
+
/**
|
|
738
|
+
* Backfill files that the native orchestrator silently dropped during parse.
|
|
739
|
+
* Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
|
|
740
|
+
*/
|
|
741
|
+
async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
|
|
742
|
+
// Needs a real better-sqlite3 connection for INSERT.
|
|
743
|
+
if (ctx.nativeFirstProxy) {
|
|
744
|
+
closeNativeDb(ctx, 'pre-parity-backfill');
|
|
745
|
+
ctx.db = openDb(ctx.dbPath);
|
|
746
|
+
ctx.nativeFirstProxy = false;
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
|
|
750
|
+
const expected = new Set(
|
|
751
|
+
collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
|
|
752
|
+
);
|
|
753
|
+
|
|
754
|
+
const existingRows = ctx.db
|
|
755
|
+
.prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
|
|
756
|
+
.all() as Array<{ file: string }>;
|
|
757
|
+
const existing = new Set(existingRows.map((r) => r.file));
|
|
758
|
+
|
|
759
|
+
// Restrict backfill to files with an installed WASM grammar. Extensions in
|
|
760
|
+
// LANGUAGE_REGISTRY without a shipped grammar file (e.g. groovy, erlang on
|
|
761
|
+
// minimal installs) can't be parsed by either engine, so they're not a
|
|
762
|
+
// native regression — excluding them keeps the warn count meaningful.
|
|
763
|
+
const installedExts = getInstalledWasmExtensions();
|
|
764
|
+
const missingAbs: string[] = [];
|
|
765
|
+
for (const rel of expected) {
|
|
766
|
+
if (existing.has(rel)) continue;
|
|
767
|
+
const ext = path.extname(rel).toLowerCase();
|
|
768
|
+
if (!installedExts.has(ext)) continue;
|
|
769
|
+
missingAbs.push(path.join(ctx.rootDir, rel));
|
|
770
|
+
}
|
|
771
|
+
if (missingAbs.length === 0) return;
|
|
772
|
+
|
|
773
|
+
warn(
|
|
774
|
+
`Native orchestrator dropped ${missingAbs.length} file(s); backfilling via WASM for engine parity`,
|
|
775
|
+
);
|
|
776
|
+
const wasmResults = await parseFilesAuto(missingAbs, ctx.rootDir, { engine: 'wasm' });
|
|
777
|
+
|
|
778
|
+
const rows: unknown[][] = [];
|
|
779
|
+
const exportKeys: unknown[][] = [];
|
|
780
|
+
for (const [relPath, symbols] of wasmResults) {
|
|
781
|
+
// File row — mirrors insertDefinitionsAndExports: qualified_name is null.
|
|
782
|
+
rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
|
|
783
|
+
for (const def of symbols.definitions ?? []) {
|
|
784
|
+
// Populate qualified_name/scope the same way the JS fallback does so
|
|
785
|
+
// downstream queries (cross-file references, "go to definition") find
|
|
786
|
+
// these symbols.
|
|
787
|
+
const dotIdx = def.name.lastIndexOf('.');
|
|
788
|
+
const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null;
|
|
789
|
+
rows.push([
|
|
790
|
+
def.name,
|
|
791
|
+
def.kind,
|
|
792
|
+
relPath,
|
|
793
|
+
def.line,
|
|
794
|
+
def.endLine ?? null,
|
|
795
|
+
null,
|
|
796
|
+
def.name,
|
|
797
|
+
scope,
|
|
798
|
+
def.visibility ?? null,
|
|
799
|
+
]);
|
|
800
|
+
}
|
|
801
|
+
// Exports: insert the row (INSERT OR IGNORE — a matching definition row
|
|
802
|
+
// is a no-op) and queue a key for the second-pass exported=1 update, so
|
|
803
|
+
// queries filtering on exported=1 find backfilled symbols (#970).
|
|
804
|
+
for (const exp of symbols.exports ?? []) {
|
|
805
|
+
rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
|
|
806
|
+
exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
const db = ctx.db as unknown as BetterSqlite3Database;
|
|
810
|
+
batchInsertNodes(db, rows);
|
|
811
|
+
|
|
812
|
+
// Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
|
|
813
|
+
if (exportKeys.length > 0) {
|
|
814
|
+
const EXPORT_CHUNK = 500;
|
|
815
|
+
const exportStmtCache = new Map<number, SqliteStatement>();
|
|
816
|
+
for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
|
|
817
|
+
const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
|
|
818
|
+
const chunkSize = end - i;
|
|
819
|
+
let updateStmt = exportStmtCache.get(chunkSize);
|
|
820
|
+
if (!updateStmt) {
|
|
821
|
+
const conditions = Array.from(
|
|
822
|
+
{ length: chunkSize },
|
|
823
|
+
() => '(name = ? AND kind = ? AND file = ? AND line = ?)',
|
|
824
|
+
).join(' OR ');
|
|
825
|
+
updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
|
|
826
|
+
exportStmtCache.set(chunkSize, updateStmt);
|
|
827
|
+
}
|
|
828
|
+
const vals: unknown[] = [];
|
|
829
|
+
for (let j = i; j < end; j++) {
|
|
830
|
+
const k = exportKeys[j] as unknown[];
|
|
831
|
+
vals.push(k[0], k[1], k[2], k[3]);
|
|
832
|
+
}
|
|
833
|
+
updateStmt.run(...vals);
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
|
|
703
838
|
// ── Pipeline stages execution ───────────────────────────────────────────
|
|
704
839
|
|
|
705
840
|
async function runPipelineStages(ctx: PipelineContext): Promise<void> {
|
|
@@ -845,6 +980,10 @@ export async function buildGraph(
|
|
|
845
980
|
`Codegraph version changed (${prevVersion} → ${CODEGRAPH_VERSION}), promoting to full rebuild.`,
|
|
846
981
|
);
|
|
847
982
|
ctx.forceFullRebuild = true;
|
|
983
|
+
// Re-check embeddings: the initial warnOnEmbeddingsWipe ran before
|
|
984
|
+
// forceFullRebuild was set here, so the silent-data-loss guard
|
|
985
|
+
// would otherwise miss this late-promotion path (#986 follow-up).
|
|
986
|
+
warnOnEmbeddingsWipe(ctx);
|
|
848
987
|
}
|
|
849
988
|
}
|
|
850
989
|
}
|
|
@@ -7,11 +7,13 @@
|
|
|
7
7
|
*/
|
|
8
8
|
import fs from 'node:fs';
|
|
9
9
|
import path from 'node:path';
|
|
10
|
+
import { performance } from 'node:perf_hooks';
|
|
10
11
|
import { debug, info } from '../../../../infrastructure/logger.js';
|
|
11
12
|
import { normalizePath } from '../../../../shared/constants.js';
|
|
13
|
+
import { compileGlobs } from '../../../../shared/globs.js';
|
|
12
14
|
import { readJournal } from '../../journal.js';
|
|
13
15
|
import type { PipelineContext } from '../context.js';
|
|
14
|
-
import { collectFiles as collectFilesUtil } from '../helpers.js';
|
|
16
|
+
import { collectFiles as collectFilesUtil, passesIncludeExclude } from '../helpers.js';
|
|
15
17
|
|
|
16
18
|
/**
|
|
17
19
|
* Reconstruct allFiles from DB file_hashes + journal deltas.
|
|
@@ -20,7 +22,7 @@ import { collectFiles as collectFilesUtil } from '../helpers.js';
|
|
|
20
22
|
function tryFastCollect(
|
|
21
23
|
ctx: PipelineContext,
|
|
22
24
|
): { files: string[]; directories: Set<string> } | null {
|
|
23
|
-
const { db, rootDir } = ctx;
|
|
25
|
+
const { db, rootDir, config } = ctx;
|
|
24
26
|
const useNative = ctx.engineName === 'native' && !!ctx.nativeDb?.getCollectFilesData;
|
|
25
27
|
|
|
26
28
|
// 1. Check that file_hashes table exists and has entries
|
|
@@ -70,10 +72,20 @@ function tryFastCollect(
|
|
|
70
72
|
}
|
|
71
73
|
}
|
|
72
74
|
|
|
73
|
-
// 5. Convert to absolute paths and compute directories
|
|
75
|
+
// 5. Convert to absolute paths and compute directories, honoring
|
|
76
|
+
// config.include / config.exclude globs so incremental builds reflect
|
|
77
|
+
// config changes (paths from the DB were collected under older config).
|
|
78
|
+
const includeRegexes = compileGlobs(config?.include);
|
|
79
|
+
const excludeRegexes = compileGlobs(config?.exclude);
|
|
80
|
+
const hasGlobFilters = includeRegexes.length > 0 || excludeRegexes.length > 0;
|
|
81
|
+
|
|
74
82
|
const files: string[] = [];
|
|
75
83
|
const directories = new Set<string>();
|
|
76
84
|
for (const relPath of fileSet) {
|
|
85
|
+
if (hasGlobFilters) {
|
|
86
|
+
const normRel = normalizePath(relPath);
|
|
87
|
+
if (!passesIncludeExclude(normRel, includeRegexes, excludeRegexes)) continue;
|
|
88
|
+
}
|
|
77
89
|
const absPath = path.join(rootDir, relPath);
|
|
78
90
|
files.push(absPath);
|
|
79
91
|
directories.add(path.dirname(absPath));
|
|
@@ -89,42 +101,60 @@ export async function collectFiles(ctx: PipelineContext): Promise<void> {
|
|
|
89
101
|
const { rootDir, config, opts } = ctx;
|
|
90
102
|
|
|
91
103
|
if (opts.scope) {
|
|
92
|
-
// Scoped rebuild: rebuild only specified files
|
|
104
|
+
// Scoped rebuild: rebuild only specified files.
|
|
105
|
+
//
|
|
106
|
+
// Timer only wraps the filesystem-walk portion (existence checks + file
|
|
107
|
+
// list construction). Change-detection outputs (parseChanges, removed,
|
|
108
|
+
// isFullBuild) are attributed to detectMs for semantic consistency with
|
|
109
|
+
// the non-scoped path, even though this stage computes them.
|
|
110
|
+
const start = performance.now();
|
|
93
111
|
const scopedFiles = opts.scope.map((f: string) => normalizePath(f));
|
|
94
112
|
const existing: Array<{ file: string; relPath: string }> = [];
|
|
95
113
|
const missing: string[] = [];
|
|
96
|
-
|
|
97
|
-
const
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
114
|
+
try {
|
|
115
|
+
for (const rel of scopedFiles) {
|
|
116
|
+
const abs = path.join(rootDir, rel);
|
|
117
|
+
if (fs.existsSync(abs)) {
|
|
118
|
+
existing.push({ file: abs, relPath: rel });
|
|
119
|
+
} else {
|
|
120
|
+
missing.push(rel);
|
|
121
|
+
}
|
|
102
122
|
}
|
|
123
|
+
ctx.allFiles = existing.map((e) => e.file);
|
|
124
|
+
ctx.discoveredDirs = new Set(existing.map((e) => path.dirname(e.file)));
|
|
125
|
+
} finally {
|
|
126
|
+
ctx.timing.collectMs = performance.now() - start;
|
|
103
127
|
}
|
|
104
|
-
|
|
105
|
-
|
|
128
|
+
// Change-detection outputs — timed under detectMs for semantic parity.
|
|
129
|
+
const detectStart = performance.now();
|
|
106
130
|
ctx.parseChanges = existing;
|
|
107
131
|
ctx.metadataUpdates = [];
|
|
108
132
|
ctx.removed = missing;
|
|
109
133
|
ctx.isFullBuild = false;
|
|
134
|
+
ctx.timing.detectMs = (ctx.timing.detectMs ?? 0) + (performance.now() - detectStart);
|
|
110
135
|
info(`Scoped rebuild: ${existing.length} files to rebuild, ${missing.length} to purge`);
|
|
111
136
|
return;
|
|
112
137
|
}
|
|
113
138
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
if (
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
139
|
+
const start = performance.now();
|
|
140
|
+
try {
|
|
141
|
+
// Incremental fast path: reconstruct file list from DB + journal deltas
|
|
142
|
+
// instead of full recursive filesystem scan (~8ms savings on 473 files).
|
|
143
|
+
if (ctx.incremental && !ctx.forceFullRebuild) {
|
|
144
|
+
const fast = tryFastCollect(ctx);
|
|
145
|
+
if (fast) {
|
|
146
|
+
ctx.allFiles = fast.files;
|
|
147
|
+
ctx.discoveredDirs = fast.directories;
|
|
148
|
+
info(`Found ${ctx.allFiles.length} files (cached)`);
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
123
151
|
}
|
|
124
|
-
}
|
|
125
152
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
153
|
+
const collected = collectFilesUtil(rootDir, [], config, new Set<string>());
|
|
154
|
+
ctx.allFiles = collected.files;
|
|
155
|
+
ctx.discoveredDirs = collected.directories;
|
|
156
|
+
info(`Found ${ctx.allFiles.length} files to parse`);
|
|
157
|
+
} finally {
|
|
158
|
+
ctx.timing.collectMs = performance.now() - start;
|
|
159
|
+
}
|
|
130
160
|
}
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
*/
|
|
8
8
|
import fs from 'node:fs';
|
|
9
9
|
import path from 'node:path';
|
|
10
|
+
import { performance } from 'node:perf_hooks';
|
|
10
11
|
import { closeDb } from '../../../../db/index.js';
|
|
11
12
|
import { debug, info } from '../../../../infrastructure/logger.js';
|
|
12
13
|
import { normalizePath } from '../../../../shared/constants.js';
|
|
@@ -512,59 +513,66 @@ function handleIncrementalBuild(ctx: PipelineContext): void {
|
|
|
512
513
|
}
|
|
513
514
|
|
|
514
515
|
export async function detectChanges(ctx: PipelineContext): Promise<void> {
|
|
515
|
-
const
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
.
|
|
532
|
-
.
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
if (
|
|
516
|
+
const start = performance.now();
|
|
517
|
+
try {
|
|
518
|
+
const { db, allFiles, rootDir, incremental, forceFullRebuild, opts } = ctx;
|
|
519
|
+
if ((opts as Record<string, unknown>).scope) {
|
|
520
|
+
handleScopedBuild(ctx);
|
|
521
|
+
return;
|
|
522
|
+
}
|
|
523
|
+
const increResult =
|
|
524
|
+
incremental && !forceFullRebuild
|
|
525
|
+
? getChangedFiles(db, allFiles, rootDir)
|
|
526
|
+
: {
|
|
527
|
+
changed: allFiles.map((f): ChangedFile => ({ file: f })),
|
|
528
|
+
removed: [] as string[],
|
|
529
|
+
isFullBuild: true,
|
|
530
|
+
};
|
|
531
|
+
ctx.removed = increResult.removed;
|
|
532
|
+
ctx.isFullBuild = increResult.isFullBuild;
|
|
533
|
+
ctx.parseChanges = increResult.changed
|
|
534
|
+
.filter((c) => !c.metadataOnly)
|
|
535
|
+
.map((c) => ({
|
|
536
|
+
file: c.file,
|
|
537
|
+
relPath: c.relPath,
|
|
538
|
+
content: c.content,
|
|
539
|
+
hash: c.hash,
|
|
540
|
+
stat: c.stat ? { mtime: Math.floor(c.stat.mtimeMs), size: c.stat.size } : undefined,
|
|
541
|
+
_reverseDepOnly: c._reverseDepOnly,
|
|
542
|
+
}));
|
|
543
|
+
ctx.metadataUpdates = increResult.changed
|
|
544
|
+
.filter(
|
|
545
|
+
(c): c is ChangedFile & { relPath: string; hash: string; stat: FileStat } =>
|
|
546
|
+
!!c.metadataOnly && !!c.relPath && !!c.hash && !!c.stat,
|
|
547
|
+
)
|
|
548
|
+
.map((c) => ({
|
|
549
|
+
relPath: c.relPath,
|
|
550
|
+
hash: c.hash,
|
|
551
|
+
stat: { mtime: Math.floor(c.stat.mtimeMs), size: c.stat.size },
|
|
552
|
+
}));
|
|
553
|
+
if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) {
|
|
554
|
+
const ranAnalysis = await runPendingAnalysis(ctx);
|
|
555
|
+
if (ranAnalysis) {
|
|
556
|
+
closeDb(db);
|
|
557
|
+
writeJournalHeader(rootDir, Date.now());
|
|
558
|
+
ctx.earlyExit = true;
|
|
559
|
+
return;
|
|
560
|
+
}
|
|
561
|
+
healMetadata(ctx);
|
|
562
|
+
info('No changes detected. Graph is up to date.');
|
|
553
563
|
closeDb(db);
|
|
554
564
|
writeJournalHeader(rootDir, Date.now());
|
|
555
565
|
ctx.earlyExit = true;
|
|
556
566
|
return;
|
|
557
567
|
}
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
} else {
|
|
568
|
-
handleIncrementalBuild(ctx);
|
|
568
|
+
if (ctx.isFullBuild) {
|
|
569
|
+
handleFullBuild(ctx);
|
|
570
|
+
} else {
|
|
571
|
+
handleIncrementalBuild(ctx);
|
|
572
|
+
}
|
|
573
|
+
} finally {
|
|
574
|
+
// Additive to respect any partial detectMs contribution from collectFiles
|
|
575
|
+
// (scoped-rebuild path splits change-detection outputs across both stages).
|
|
576
|
+
ctx.timing.detectMs = (ctx.timing.detectMs ?? 0) + (performance.now() - start);
|
|
569
577
|
}
|
|
570
578
|
}
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* WASM cleanup, stats logging, drift detection, build metadata, registry, journal.
|
|
5
5
|
*/
|
|
6
|
+
import fs from 'node:fs';
|
|
6
7
|
import { tmpdir } from 'node:os';
|
|
7
8
|
import path from 'node:path';
|
|
8
9
|
import { performance } from 'node:perf_hooks';
|
|
@@ -88,6 +89,19 @@ function persistBuildMetadata(
|
|
|
88
89
|
// subsequent build to be a full rebuild.
|
|
89
90
|
const codeVersionToWrite =
|
|
90
91
|
ctx.engineName === 'native' && ctx.engineVersion ? ctx.engineVersion : CODEGRAPH_VERSION;
|
|
92
|
+
// Persist the repo root so downstream commands (e.g. `codegraph embed`)
|
|
93
|
+
// can resolve relative file paths regardless of the invoking cwd.
|
|
94
|
+
// Use realpathSync (symlink-resolving) to match the Rust engine's
|
|
95
|
+
// std::fs::canonicalize — otherwise the JS write here would overwrite the
|
|
96
|
+
// canonical path Rust wrote for native full builds and could re-introduce
|
|
97
|
+
// a non-canonical path when the project root is behind a symlink.
|
|
98
|
+
const resolvedRootDir = path.resolve(ctx.rootDir);
|
|
99
|
+
let rootDirToWrite = resolvedRootDir;
|
|
100
|
+
try {
|
|
101
|
+
rootDirToWrite = fs.realpathSync(resolvedRootDir);
|
|
102
|
+
} catch {
|
|
103
|
+
/* realpath can fail (e.g. path no longer exists); fall back to resolve() */
|
|
104
|
+
}
|
|
91
105
|
try {
|
|
92
106
|
if (useNativeDb) {
|
|
93
107
|
ctx.nativeDb!.setBuildMeta(
|
|
@@ -99,6 +113,7 @@ function persistBuildMetadata(
|
|
|
99
113
|
built_at: buildNow.toISOString(),
|
|
100
114
|
node_count: String(nodeCount),
|
|
101
115
|
edge_count: String(actualEdgeCount),
|
|
116
|
+
root_dir: rootDirToWrite,
|
|
102
117
|
}).map(([key, value]) => ({ key, value: String(value) })),
|
|
103
118
|
);
|
|
104
119
|
} else {
|
|
@@ -110,6 +125,7 @@ function persistBuildMetadata(
|
|
|
110
125
|
built_at: buildNow.toISOString(),
|
|
111
126
|
node_count: nodeCount,
|
|
112
127
|
edge_count: actualEdgeCount,
|
|
128
|
+
root_dir: rootDirToWrite,
|
|
113
129
|
});
|
|
114
130
|
}
|
|
115
131
|
} catch (err) {
|