@optave/codegraph 3.10.0 → 3.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -33
- package/dist/ast-analysis/engine.d.ts.map +1 -1
- package/dist/ast-analysis/engine.js +91 -60
- package/dist/ast-analysis/engine.js.map +1 -1
- package/dist/ast-analysis/rules/index.d.ts.map +1 -1
- package/dist/ast-analysis/rules/index.js +77 -0
- package/dist/ast-analysis/rules/index.js.map +1 -1
- package/dist/ast-analysis/visitor-utils.d.ts +3 -0
- package/dist/ast-analysis/visitor-utils.d.ts.map +1 -1
- package/dist/ast-analysis/visitor-utils.js +83 -49
- package/dist/ast-analysis/visitor-utils.js.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.js +78 -62
- package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
- package/dist/ast-analysis/visitors/dataflow-visitor.d.ts.map +1 -1
- package/dist/ast-analysis/visitors/dataflow-visitor.js +61 -42
- package/dist/ast-analysis/visitors/dataflow-visitor.js.map +1 -1
- package/dist/cli/commands/audit.js +1 -1
- package/dist/cli/commands/audit.js.map +1 -1
- package/dist/cli/commands/build.d.ts.map +1 -1
- package/dist/cli/commands/build.js +2 -0
- package/dist/cli/commands/build.js.map +1 -1
- package/dist/cli/commands/check.js +1 -1
- package/dist/cli/commands/check.js.map +1 -1
- package/dist/cli/commands/children.js +1 -1
- package/dist/cli/commands/children.js.map +1 -1
- package/dist/cli/commands/diff-impact.js +1 -1
- package/dist/cli/commands/diff-impact.js.map +1 -1
- package/dist/cli/commands/embed.d.ts.map +1 -1
- package/dist/cli/commands/embed.js +49 -4
- package/dist/cli/commands/embed.js.map +1 -1
- package/dist/cli/commands/roles.js +1 -1
- package/dist/cli/commands/roles.js.map +1 -1
- package/dist/cli/commands/structure.js +1 -1
- package/dist/cli/commands/structure.js.map +1 -1
- package/dist/cli/shared/options.js +1 -1
- package/dist/cli/shared/options.js.map +1 -1
- package/dist/db/connection.d.ts.map +1 -1
- package/dist/db/connection.js +8 -0
- package/dist/db/connection.js.map +1 -1
- package/dist/domain/analysis/dependencies.d.ts.map +1 -1
- package/dist/domain/analysis/dependencies.js +106 -80
- package/dist/domain/analysis/dependencies.js.map +1 -1
- package/dist/domain/analysis/fn-impact.d.ts.map +1 -1
- package/dist/domain/analysis/fn-impact.js +77 -52
- package/dist/domain/analysis/fn-impact.js.map +1 -1
- package/dist/domain/analysis/module-map.d.ts.map +1 -1
- package/dist/domain/analysis/module-map.js +132 -121
- package/dist/domain/analysis/module-map.js.map +1 -1
- package/dist/domain/graph/builder/helpers.d.ts +4 -4
- package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
- package/dist/domain/graph/builder/helpers.js +47 -33
- package/dist/domain/graph/builder/helpers.js.map +1 -1
- package/dist/domain/graph/builder/incremental.d.ts +6 -6
- package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
- package/dist/domain/graph/builder/incremental.js +148 -99
- package/dist/domain/graph/builder/incremental.js.map +1 -1
- package/dist/domain/graph/builder/pipeline.d.ts +1 -0
- package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
- package/dist/domain/graph/builder/pipeline.js +23 -637
- package/dist/domain/graph/builder/pipeline.js.map +1 -1
- package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/build-edges.js +141 -98
- package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
- package/dist/domain/graph/builder/stages/build-structure.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/build-structure.js +82 -65
- package/dist/domain/graph/builder/stages/build-structure.js.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.js +84 -56
- package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.js +60 -51
- package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts +8 -6
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.js +107 -122
- package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
- package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts +14 -0
- package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts.map +1 -0
- package/dist/domain/graph/builder/stages/native-db-lifecycle.js +77 -0
- package/dist/domain/graph/builder/stages/native-db-lifecycle.js.map +1 -0
- package/dist/domain/graph/builder/stages/native-orchestrator.d.ts +62 -0
- package/dist/domain/graph/builder/stages/native-orchestrator.d.ts.map +1 -0
- package/dist/domain/graph/builder/stages/native-orchestrator.js +747 -0
- package/dist/domain/graph/builder/stages/native-orchestrator.js.map +1 -0
- package/dist/domain/graph/builder/stages/resolve-imports.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/resolve-imports.js +73 -22
- package/dist/domain/graph/builder/stages/resolve-imports.js.map +1 -1
- package/dist/domain/graph/cycles.d.ts +6 -4
- package/dist/domain/graph/cycles.d.ts.map +1 -1
- package/dist/domain/graph/cycles.js +50 -55
- package/dist/domain/graph/cycles.js.map +1 -1
- package/dist/domain/graph/journal.d.ts.map +1 -1
- package/dist/domain/graph/journal.js +89 -70
- package/dist/domain/graph/journal.js.map +1 -1
- package/dist/domain/graph/watcher.d.ts.map +1 -1
- package/dist/domain/graph/watcher.js +28 -20
- package/dist/domain/graph/watcher.js.map +1 -1
- package/dist/domain/parser.d.ts +12 -23
- package/dist/domain/parser.d.ts.map +1 -1
- package/dist/domain/parser.js +153 -80
- package/dist/domain/parser.js.map +1 -1
- package/dist/domain/search/generator.d.ts +3 -1
- package/dist/domain/search/generator.d.ts.map +1 -1
- package/dist/domain/search/generator.js +68 -45
- package/dist/domain/search/generator.js.map +1 -1
- package/dist/domain/search/models.d.ts +18 -0
- package/dist/domain/search/models.d.ts.map +1 -1
- package/dist/domain/search/models.js +72 -4
- package/dist/domain/search/models.js.map +1 -1
- package/dist/domain/search/search/hybrid.d.ts.map +1 -1
- package/dist/domain/search/search/hybrid.js +49 -40
- package/dist/domain/search/search/hybrid.js.map +1 -1
- package/dist/domain/search/search/semantic.d.ts.map +1 -1
- package/dist/domain/search/search/semantic.js +69 -49
- package/dist/domain/search/search/semantic.js.map +1 -1
- package/dist/domain/wasm-worker-entry.js +209 -137
- package/dist/domain/wasm-worker-entry.js.map +1 -1
- package/dist/extractors/c.js +25 -6
- package/dist/extractors/c.js.map +1 -1
- package/dist/extractors/cpp.js +47 -6
- package/dist/extractors/cpp.js.map +1 -1
- package/dist/extractors/cuda.js +90 -14
- package/dist/extractors/cuda.js.map +1 -1
- package/dist/extractors/elixir.js +108 -4
- package/dist/extractors/elixir.js.map +1 -1
- package/dist/extractors/erlang.js +56 -20
- package/dist/extractors/erlang.js.map +1 -1
- package/dist/extractors/fsharp.d.ts +7 -0
- package/dist/extractors/fsharp.d.ts.map +1 -1
- package/dist/extractors/fsharp.js +94 -0
- package/dist/extractors/fsharp.js.map +1 -1
- package/dist/extractors/gleam.d.ts.map +1 -1
- package/dist/extractors/gleam.js +29 -33
- package/dist/extractors/gleam.js.map +1 -1
- package/dist/extractors/groovy.js +41 -1
- package/dist/extractors/groovy.js.map +1 -1
- package/dist/extractors/haskell.js +48 -4
- package/dist/extractors/haskell.js.map +1 -1
- package/dist/extractors/helpers.d.ts +79 -1
- package/dist/extractors/helpers.d.ts.map +1 -1
- package/dist/extractors/helpers.js +137 -0
- package/dist/extractors/helpers.js.map +1 -1
- package/dist/extractors/java.d.ts.map +1 -1
- package/dist/extractors/java.js +37 -49
- package/dist/extractors/java.js.map +1 -1
- package/dist/extractors/javascript.d.ts.map +1 -1
- package/dist/extractors/javascript.js +44 -44
- package/dist/extractors/javascript.js.map +1 -1
- package/dist/extractors/julia.js +198 -74
- package/dist/extractors/julia.js.map +1 -1
- package/dist/extractors/kotlin.js +4 -0
- package/dist/extractors/kotlin.js.map +1 -1
- package/dist/extractors/objc.js +184 -47
- package/dist/extractors/objc.js.map +1 -1
- package/dist/extractors/python.js +7 -4
- package/dist/extractors/python.js.map +1 -1
- package/dist/extractors/r.d.ts.map +1 -1
- package/dist/extractors/r.js +103 -87
- package/dist/extractors/r.js.map +1 -1
- package/dist/extractors/scala.d.ts.map +1 -1
- package/dist/extractors/scala.js +18 -32
- package/dist/extractors/scala.js.map +1 -1
- package/dist/extractors/solidity.d.ts.map +1 -1
- package/dist/extractors/solidity.js +55 -69
- package/dist/extractors/solidity.js.map +1 -1
- package/dist/extractors/verilog.js +80 -15
- package/dist/extractors/verilog.js.map +1 -1
- package/dist/features/boundaries.d.ts.map +1 -1
- package/dist/features/boundaries.js +49 -39
- package/dist/features/boundaries.js.map +1 -1
- package/dist/features/cfg.d.ts.map +1 -1
- package/dist/features/cfg.js +90 -63
- package/dist/features/cfg.js.map +1 -1
- package/dist/features/check.d.ts.map +1 -1
- package/dist/features/check.js +43 -34
- package/dist/features/check.js.map +1 -1
- package/dist/features/cochange.d.ts.map +1 -1
- package/dist/features/cochange.js +68 -56
- package/dist/features/cochange.js.map +1 -1
- package/dist/features/complexity.d.ts.map +1 -1
- package/dist/features/complexity.js +105 -75
- package/dist/features/complexity.js.map +1 -1
- package/dist/features/dataflow.d.ts.map +1 -1
- package/dist/features/dataflow.js +37 -29
- package/dist/features/dataflow.js.map +1 -1
- package/dist/features/flow.d.ts.map +1 -1
- package/dist/features/flow.js +31 -22
- package/dist/features/flow.js.map +1 -1
- package/dist/features/graph-enrichment.d.ts.map +1 -1
- package/dist/features/graph-enrichment.js +77 -70
- package/dist/features/graph-enrichment.js.map +1 -1
- package/dist/features/owners.d.ts +17 -26
- package/dist/features/owners.d.ts.map +1 -1
- package/dist/features/owners.js +120 -109
- package/dist/features/owners.js.map +1 -1
- package/dist/features/sequence.d.ts.map +1 -1
- package/dist/features/sequence.js +59 -54
- package/dist/features/sequence.js.map +1 -1
- package/dist/features/structure-query.d.ts.map +1 -1
- package/dist/features/structure-query.js +60 -60
- package/dist/features/structure-query.js.map +1 -1
- package/dist/features/structure.js +28 -36
- package/dist/features/structure.js.map +1 -1
- package/dist/graph/algorithms/leiden/optimiser.d.ts.map +1 -1
- package/dist/graph/algorithms/leiden/optimiser.js +100 -69
- package/dist/graph/algorithms/leiden/optimiser.js.map +1 -1
- package/dist/graph/classifiers/roles.d.ts.map +1 -1
- package/dist/graph/classifiers/roles.js +63 -59
- package/dist/graph/classifiers/roles.js.map +1 -1
- package/dist/infrastructure/config.d.ts +1 -1
- package/dist/infrastructure/config.d.ts.map +1 -1
- package/dist/infrastructure/config.js +1 -1
- package/dist/infrastructure/config.js.map +1 -1
- package/dist/mcp/tool-registry.d.ts.map +1 -1
- package/dist/mcp/tool-registry.js +4 -0
- package/dist/mcp/tool-registry.js.map +1 -1
- package/dist/mcp/tools/semantic-search.d.ts +1 -0
- package/dist/mcp/tools/semantic-search.d.ts.map +1 -1
- package/dist/mcp/tools/semantic-search.js +1 -0
- package/dist/mcp/tools/semantic-search.js.map +1 -1
- package/dist/presentation/cfg.d.ts.map +1 -1
- package/dist/presentation/cfg.js +44 -29
- package/dist/presentation/cfg.js.map +1 -1
- package/dist/presentation/flow.d.ts.map +1 -1
- package/dist/presentation/flow.js +58 -38
- package/dist/presentation/flow.js.map +1 -1
- package/dist/types.d.ts +16 -2
- package/dist/types.d.ts.map +1 -1
- package/grammars/tree-sitter-erlang.wasm +0 -0
- package/grammars/tree-sitter-fsharp.wasm +0 -0
- package/grammars/tree-sitter-fsharp_signature.wasm +0 -0
- package/grammars/tree-sitter-gleam.wasm +0 -0
- package/package.json +10 -10
- package/src/ast-analysis/engine.ts +145 -61
- package/src/ast-analysis/rules/index.ts +87 -0
- package/src/ast-analysis/visitor-utils.ts +86 -46
- package/src/ast-analysis/visitors/ast-store-visitor.ts +104 -69
- package/src/ast-analysis/visitors/dataflow-visitor.ts +86 -47
- package/src/cli/commands/audit.ts +1 -1
- package/src/cli/commands/build.ts +2 -0
- package/src/cli/commands/check.ts +1 -1
- package/src/cli/commands/children.ts +1 -1
- package/src/cli/commands/diff-impact.ts +1 -1
- package/src/cli/commands/embed.ts +54 -4
- package/src/cli/commands/roles.ts +1 -1
- package/src/cli/commands/structure.ts +1 -1
- package/src/cli/shared/options.ts +1 -1
- package/src/db/connection.ts +8 -0
- package/src/domain/analysis/dependencies.ts +166 -85
- package/src/domain/analysis/fn-impact.ts +120 -50
- package/src/domain/analysis/module-map.ts +175 -140
- package/src/domain/graph/builder/helpers.ts +85 -76
- package/src/domain/graph/builder/incremental.ts +223 -131
- package/src/domain/graph/builder/pipeline.ts +32 -785
- package/src/domain/graph/builder/stages/build-edges.ts +207 -142
- package/src/domain/graph/builder/stages/build-structure.ts +115 -82
- package/src/domain/graph/builder/stages/detect-changes.ts +107 -64
- package/src/domain/graph/builder/stages/finalize.ts +72 -70
- package/src/domain/graph/builder/stages/insert-nodes.ts +154 -120
- package/src/domain/graph/builder/stages/native-db-lifecycle.ts +74 -0
- package/src/domain/graph/builder/stages/native-orchestrator.ts +942 -0
- package/src/domain/graph/builder/stages/resolve-imports.ts +79 -25
- package/src/domain/graph/cycles.ts +51 -49
- package/src/domain/graph/journal.ts +84 -69
- package/src/domain/graph/watcher.ts +29 -25
- package/src/domain/parser.ts +170 -67
- package/src/domain/search/generator.ts +132 -74
- package/src/domain/search/models.ts +75 -4
- package/src/domain/search/search/hybrid.ts +53 -42
- package/src/domain/search/search/semantic.ts +105 -65
- package/src/domain/wasm-worker-entry.ts +243 -153
- package/src/extractors/c.ts +27 -8
- package/src/extractors/cpp.ts +50 -8
- package/src/extractors/cuda.ts +90 -16
- package/src/extractors/elixir.ts +103 -4
- package/src/extractors/erlang.ts +63 -20
- package/src/extractors/fsharp.ts +104 -0
- package/src/extractors/gleam.ts +40 -39
- package/src/extractors/groovy.ts +45 -1
- package/src/extractors/haskell.ts +45 -4
- package/src/extractors/helpers.ts +205 -1
- package/src/extractors/java.ts +42 -45
- package/src/extractors/javascript.ts +44 -43
- package/src/extractors/julia.ts +191 -77
- package/src/extractors/kotlin.ts +4 -0
- package/src/extractors/objc.ts +171 -47
- package/src/extractors/python.ts +5 -3
- package/src/extractors/r.ts +104 -82
- package/src/extractors/scala.ts +24 -36
- package/src/extractors/solidity.ts +59 -78
- package/src/extractors/verilog.ts +83 -15
- package/src/features/boundaries.ts +64 -46
- package/src/features/cfg.ts +145 -74
- package/src/features/check.ts +60 -43
- package/src/features/cochange.ts +95 -72
- package/src/features/complexity.ts +134 -79
- package/src/features/dataflow.ts +57 -34
- package/src/features/flow.ts +48 -24
- package/src/features/graph-enrichment.ts +105 -70
- package/src/features/owners.ts +186 -146
- package/src/features/sequence.ts +99 -69
- package/src/features/structure-query.ts +94 -79
- package/src/features/structure.ts +56 -56
- package/src/graph/algorithms/leiden/optimiser.ts +142 -87
- package/src/graph/classifiers/roles.ts +64 -54
- package/src/infrastructure/config.ts +1 -1
- package/src/mcp/tool-registry.ts +5 -0
- package/src/mcp/tools/semantic-search.ts +2 -0
- package/src/presentation/cfg.ts +48 -32
- package/src/presentation/flow.ts +100 -52
- package/src/types.ts +16 -1
package/src/domain/parser.ts
CHANGED
|
@@ -322,12 +322,15 @@ export function getParser(parsers: Map<string, Parser | null>, filePath: string)
|
|
|
322
322
|
* without _tree", which was the source of #1036 — a single file missing one
|
|
323
323
|
* analysis triggered a full-build re-parse of every WASM-parseable file.
|
|
324
324
|
*/
|
|
325
|
-
|
|
325
|
+
/**
|
|
326
|
+
* Select files from `fileSymbols` that still need analysis data and are
|
|
327
|
+
* parseable by an installed WASM grammar. Pure (no I/O) — safe to unit-test.
|
|
328
|
+
*/
|
|
329
|
+
function collectBackfillPending(
|
|
326
330
|
fileSymbols: Map<string, any>,
|
|
327
331
|
rootDir: string,
|
|
328
332
|
needsFn?: (relPath: string, symbols: any) => boolean,
|
|
329
|
-
):
|
|
330
|
-
// Collect files that still need analysis data and are parseable by WASM.
|
|
333
|
+
): Array<{ relPath: string; absPath: string; symbols: any }> {
|
|
331
334
|
const pending: Array<{ relPath: string; absPath: string; symbols: any }> = [];
|
|
332
335
|
for (const [relPath, symbols] of fileSymbols) {
|
|
333
336
|
if (symbols._tree) continue; // legacy path — leave existing trees alone
|
|
@@ -335,6 +338,15 @@ export async function ensureWasmTrees(
|
|
|
335
338
|
if (needsFn && !needsFn(relPath, symbols)) continue;
|
|
336
339
|
pending.push({ relPath, absPath: path.join(rootDir, relPath), symbols });
|
|
337
340
|
}
|
|
341
|
+
return pending;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
export async function ensureWasmTrees(
|
|
345
|
+
fileSymbols: Map<string, any>,
|
|
346
|
+
rootDir: string,
|
|
347
|
+
needsFn?: (relPath: string, symbols: any) => boolean,
|
|
348
|
+
): Promise<void> {
|
|
349
|
+
const pending = collectBackfillPending(fileSymbols, rootDir, needsFn);
|
|
338
350
|
if (pending.length === 0) return;
|
|
339
351
|
|
|
340
352
|
const pool = getWasmWorkerPool();
|
|
@@ -352,30 +364,37 @@ export async function ensureWasmTrees(
|
|
|
352
364
|
}
|
|
353
365
|
}
|
|
354
366
|
|
|
355
|
-
/**
|
|
356
|
-
|
|
357
|
-
* Only fills gaps — never overwrites fields the caller already populated.
|
|
358
|
-
* Used to patch native-parsed symbols with worker-produced astNodes / dataflow /
|
|
359
|
-
* per-definition complexity and cfg.
|
|
360
|
-
*/
|
|
361
|
-
function mergeAnalysisData(symbols: any, worker: ExtractorOutput): void {
|
|
367
|
+
/** Fill gap-only scalar metadata (`_langId`, `_lineCount`) from the worker output. */
|
|
368
|
+
function mergeScalarMetadata(symbols: any, worker: ExtractorOutput): void {
|
|
362
369
|
if (!symbols._langId && worker._langId) symbols._langId = worker._langId;
|
|
363
370
|
if (!symbols._lineCount && worker._lineCount) symbols._lineCount = worker._lineCount;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/** Fill gap-only analysis arrays (`astNodes`, `dataflow`) from the worker output. */
|
|
374
|
+
function mergeAnalysisArrays(symbols: any, worker: ExtractorOutput): void {
|
|
364
375
|
if (!Array.isArray(symbols.astNodes) && Array.isArray(worker.astNodes)) {
|
|
365
376
|
symbols.astNodes = worker.astNodes;
|
|
366
377
|
}
|
|
367
378
|
if (!symbols.dataflow && worker.dataflow) symbols.dataflow = worker.dataflow;
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
/** Merge worker typeMap into existing symbols.typeMap with first-wins semantics. */
|
|
382
|
+
function mergeTypeMap(symbols: any, worker: ExtractorOutput): void {
|
|
383
|
+
if (!worker.typeMap || worker.typeMap.size === 0) return;
|
|
384
|
+
if (!symbols.typeMap || !(symbols.typeMap instanceof Map)) {
|
|
385
|
+
symbols.typeMap = new Map(worker.typeMap);
|
|
386
|
+
return;
|
|
387
|
+
}
|
|
388
|
+
for (const [k, v] of worker.typeMap) {
|
|
389
|
+
if (!symbols.typeMap.has(k)) symbols.typeMap.set(k, v);
|
|
376
390
|
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
/** Patch existing definitions with worker complexity/cfg when absent. */
|
|
394
|
+
function mergeDefinitionAnalysis(symbols: any, worker: ExtractorOutput): void {
|
|
377
395
|
const existingDefs: any[] = Array.isArray(symbols.definitions) ? symbols.definitions : [];
|
|
378
396
|
const workerDefs: any[] = Array.isArray(worker.definitions) ? worker.definitions : [];
|
|
397
|
+
if (existingDefs.length === 0 || workerDefs.length === 0) return;
|
|
379
398
|
// Index existing defs by (kind, name, line) — mirrors engine.ts matching key.
|
|
380
399
|
const byKey = new Map<string, any>();
|
|
381
400
|
for (const d of existingDefs) byKey.set(`${d.kind}|${d.name}|${d.line}`, d);
|
|
@@ -389,6 +408,19 @@ function mergeAnalysisData(symbols: any, worker: ExtractorOutput): void {
|
|
|
389
408
|
}
|
|
390
409
|
}
|
|
391
410
|
|
|
411
|
+
/**
|
|
412
|
+
* Merge pre-computed analysis data from a worker result onto existing symbols.
|
|
413
|
+
* Only fills gaps — never overwrites fields the caller already populated.
|
|
414
|
+
* Used to patch native-parsed symbols with worker-produced astNodes / dataflow /
|
|
415
|
+
* per-definition complexity and cfg.
|
|
416
|
+
*/
|
|
417
|
+
function mergeAnalysisData(symbols: any, worker: ExtractorOutput): void {
|
|
418
|
+
mergeScalarMetadata(symbols, worker);
|
|
419
|
+
mergeAnalysisArrays(symbols, worker);
|
|
420
|
+
mergeTypeMap(symbols, worker);
|
|
421
|
+
mergeDefinitionAnalysis(symbols, worker);
|
|
422
|
+
}
|
|
423
|
+
|
|
392
424
|
/**
|
|
393
425
|
* Check whether the required WASM grammar files exist on disk.
|
|
394
426
|
*/
|
|
@@ -457,6 +489,8 @@ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet<string> = new Set([
|
|
|
457
489
|
'.cc',
|
|
458
490
|
'.cxx',
|
|
459
491
|
'.hpp',
|
|
492
|
+
'.cu',
|
|
493
|
+
'.cuh',
|
|
460
494
|
'.kt',
|
|
461
495
|
'.kts',
|
|
462
496
|
'.swift',
|
|
@@ -471,6 +505,23 @@ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet<string> = new Set([
|
|
|
471
505
|
'.hs',
|
|
472
506
|
'.ml',
|
|
473
507
|
'.mli',
|
|
508
|
+
'.fs',
|
|
509
|
+
'.fsx',
|
|
510
|
+
'.fsi',
|
|
511
|
+
'.m',
|
|
512
|
+
'.gleam',
|
|
513
|
+
'.jl',
|
|
514
|
+
'.clj',
|
|
515
|
+
'.cljs',
|
|
516
|
+
'.cljc',
|
|
517
|
+
'.erl',
|
|
518
|
+
'.hrl',
|
|
519
|
+
'.groovy',
|
|
520
|
+
'.gvy',
|
|
521
|
+
'.r',
|
|
522
|
+
'.sol',
|
|
523
|
+
'.v',
|
|
524
|
+
'.sv',
|
|
474
525
|
]);
|
|
475
526
|
|
|
476
527
|
/**
|
|
@@ -520,25 +571,36 @@ export function classifyNativeDrops(relPaths: Iterable<string>): NativeDropClass
|
|
|
520
571
|
}
|
|
521
572
|
|
|
522
573
|
/**
|
|
523
|
-
* Render `{ ext → paths[] }` as
|
|
524
|
-
*
|
|
525
|
-
*
|
|
526
|
-
*
|
|
527
|
-
*
|
|
574
|
+
* Render `{ ext → paths[] }` as a multi-line tabular breakdown for log lines.
|
|
575
|
+
* Each extension occupies its own line so a long warning scans like a table
|
|
576
|
+
* instead of a wall of semicolon-separated slices. Caps at 3 sample paths per
|
|
577
|
+
* extension and 6 extensions total to keep output bounded when many languages
|
|
578
|
+
* are dropped at once. Extensions are sorted by descending file count so the
|
|
579
|
+
* loudest offender shows up first; ties keep insertion order.
|
|
580
|
+
*
|
|
581
|
+
* Returns the empty string for empty input, and otherwise a string that
|
|
582
|
+
* begins with `\n` so callers can append it directly after the header line
|
|
583
|
+
* (`"Backfilling via WASM:" + formatDropExtensionSummary(...)`).
|
|
584
|
+
*
|
|
585
|
+
* Pure function — safe to unit-test independently.
|
|
528
586
|
*/
|
|
529
587
|
export function formatDropExtensionSummary(buckets: Map<string, string[]>): string {
|
|
530
588
|
const MAX_EXTS = 6;
|
|
531
589
|
const MAX_SAMPLES = 3;
|
|
532
590
|
const entries = Array.from(buckets.entries()).sort((a, b) => b[1].length - a[1].length);
|
|
533
|
-
|
|
591
|
+
if (entries.length === 0) return '';
|
|
592
|
+
const shown = entries.slice(0, MAX_EXTS);
|
|
593
|
+
const extWidth = Math.max(...shown.map(([ext]) => ext.length));
|
|
594
|
+
const countWidth = Math.max(...shown.map(([, paths]) => String(paths.length).length));
|
|
595
|
+
const lines = shown.map(([ext, paths]) => {
|
|
534
596
|
const sample = paths.slice(0, MAX_SAMPLES).join(', ');
|
|
535
|
-
const more = paths.length > MAX_SAMPLES ?
|
|
536
|
-
return
|
|
597
|
+
const more = paths.length > MAX_SAMPLES ? ` (+${paths.length - MAX_SAMPLES} more)` : '';
|
|
598
|
+
return ` ${ext.padEnd(extWidth)} ${String(paths.length).padStart(countWidth)} ${sample}${more}`;
|
|
537
599
|
});
|
|
538
600
|
if (entries.length > MAX_EXTS) {
|
|
539
|
-
|
|
601
|
+
lines.push(` (+${entries.length - MAX_EXTS} more extension(s))`);
|
|
540
602
|
}
|
|
541
|
-
return
|
|
603
|
+
return `\n${lines.join('\n')}`;
|
|
542
604
|
}
|
|
543
605
|
|
|
544
606
|
// ── Unified API ──────────────────────────────────────────────────────────────
|
|
@@ -573,24 +635,36 @@ function patchDefinitions(definitions: any[]): void {
|
|
|
573
635
|
}
|
|
574
636
|
}
|
|
575
637
|
|
|
638
|
+
/**
|
|
639
|
+
* Field renames applied to each import record to bridge older native binaries
|
|
640
|
+
* that emit snake_case names. Each `[camel, snake]` pair becomes:
|
|
641
|
+
* `if (imp[camel] === undefined) imp[camel] = imp[snake];`
|
|
642
|
+
* Defined as data so the loop body stays trivially linear in cognitive complexity.
|
|
643
|
+
*/
|
|
644
|
+
const IMPORT_FIELD_RENAMES: ReadonlyArray<readonly [string, string]> = [
|
|
645
|
+
['typeOnly', 'type_only'],
|
|
646
|
+
['wildcardReexport', 'wildcard_reexport'],
|
|
647
|
+
['pythonImport', 'python_import'],
|
|
648
|
+
['goImport', 'go_import'],
|
|
649
|
+
['rustUse', 'rust_use'],
|
|
650
|
+
['javaImport', 'java_import'],
|
|
651
|
+
['csharpUsing', 'csharp_using'],
|
|
652
|
+
['rubyRequire', 'ruby_require'],
|
|
653
|
+
['phpUse', 'php_use'],
|
|
654
|
+
['cInclude', 'c_include'],
|
|
655
|
+
['kotlinImport', 'kotlin_import'],
|
|
656
|
+
['swiftImport', 'swift_import'],
|
|
657
|
+
['scalaImport', 'scala_import'],
|
|
658
|
+
['bashSource', 'bash_source'],
|
|
659
|
+
['dynamicImport', 'dynamic_import'],
|
|
660
|
+
];
|
|
661
|
+
|
|
576
662
|
/** Patch import fields for backward compat with older native binaries. */
|
|
577
663
|
function patchImports(imports: any[]): void {
|
|
578
664
|
for (const i of imports) {
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
if (i.goImport === undefined) i.goImport = i.go_import;
|
|
583
|
-
if (i.rustUse === undefined) i.rustUse = i.rust_use;
|
|
584
|
-
if (i.javaImport === undefined) i.javaImport = i.java_import;
|
|
585
|
-
if (i.csharpUsing === undefined) i.csharpUsing = i.csharp_using;
|
|
586
|
-
if (i.rubyRequire === undefined) i.rubyRequire = i.ruby_require;
|
|
587
|
-
if (i.phpUse === undefined) i.phpUse = i.php_use;
|
|
588
|
-
if (i.cInclude === undefined) i.cInclude = i.c_include;
|
|
589
|
-
if (i.kotlinImport === undefined) i.kotlinImport = i.kotlin_import;
|
|
590
|
-
if (i.swiftImport === undefined) i.swiftImport = i.swift_import;
|
|
591
|
-
if (i.scalaImport === undefined) i.scalaImport = i.scala_import;
|
|
592
|
-
if (i.bashSource === undefined) i.bashSource = i.bash_source;
|
|
593
|
-
if (i.dynamicImport === undefined) i.dynamicImport = i.dynamic_import;
|
|
665
|
+
for (const [camel, snake] of IMPORT_FIELD_RENAMES) {
|
|
666
|
+
if (i[camel] === undefined) i[camel] = i[snake];
|
|
667
|
+
}
|
|
594
668
|
}
|
|
595
669
|
}
|
|
596
670
|
|
|
@@ -812,11 +886,18 @@ export const LANGUAGE_REGISTRY: LanguageRegistryEntry[] = [
|
|
|
812
886
|
},
|
|
813
887
|
{
|
|
814
888
|
id: 'fsharp',
|
|
815
|
-
extensions: ['.fs', '.fsx'
|
|
889
|
+
extensions: ['.fs', '.fsx'],
|
|
816
890
|
grammarFile: 'tree-sitter-fsharp.wasm',
|
|
817
891
|
extractor: extractFSharpSymbols,
|
|
818
892
|
required: false,
|
|
819
893
|
},
|
|
894
|
+
{
|
|
895
|
+
id: 'fsharp-signature',
|
|
896
|
+
extensions: ['.fsi'],
|
|
897
|
+
grammarFile: 'tree-sitter-fsharp_signature.wasm',
|
|
898
|
+
extractor: extractFSharpSymbols,
|
|
899
|
+
required: false,
|
|
900
|
+
},
|
|
820
901
|
{
|
|
821
902
|
id: 'gleam',
|
|
822
903
|
extensions: ['.gleam'],
|
|
@@ -1133,18 +1214,16 @@ export async function parseFilesWasmForBackfill(
|
|
|
1133
1214
|
}
|
|
1134
1215
|
|
|
1135
1216
|
/**
|
|
1136
|
-
*
|
|
1217
|
+
* Run the native engine over `filePaths` and ingest the results into `result`.
|
|
1218
|
+
* Returns the set of file paths the native engine successfully parsed and the
|
|
1219
|
+
* TS/TSX files that need a typeMap backfill pass.
|
|
1137
1220
|
*/
|
|
1138
|
-
|
|
1221
|
+
function ingestNativeResults(
|
|
1222
|
+
native: any,
|
|
1139
1223
|
filePaths: string[],
|
|
1140
1224
|
rootDir: string,
|
|
1141
|
-
|
|
1142
|
-
):
|
|
1143
|
-
const { native } = resolveEngine(opts);
|
|
1144
|
-
|
|
1145
|
-
if (!native) return parseFilesWasm(filePaths, rootDir);
|
|
1146
|
-
|
|
1147
|
-
const result = new Map<string, ExtractorOutput>();
|
|
1225
|
+
result: Map<string, ExtractorOutput>,
|
|
1226
|
+
): { nativeParsed: Set<string>; needsTypeMap: { filePath: string; relPath: string }[] } {
|
|
1148
1227
|
// Always extract all analysis data (dataflow + AST nodes) during native parse.
|
|
1149
1228
|
// This eliminates the need for any downstream WASM re-parse or native standalone calls.
|
|
1150
1229
|
const nativeResults = native.parseFilesFull
|
|
@@ -1167,27 +1246,51 @@ export async function parseFilesAuto(
|
|
|
1167
1246
|
needsTypeMap.push({ filePath: r.file, relPath });
|
|
1168
1247
|
}
|
|
1169
1248
|
}
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
}
|
|
1249
|
+
return { nativeParsed, needsTypeMap };
|
|
1250
|
+
}
|
|
1173
1251
|
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1252
|
+
/**
|
|
1253
|
+
* Engine parity: native may silently drop files whose extensions are in
|
|
1254
|
+
* SUPPORTED_EXTENSIONS (because a WASM grammar exists) but whose Rust
|
|
1255
|
+
* extractor/grammar is missing or fails. WASM handles these — fall back so
|
|
1256
|
+
* both engines process the same file set (#967). Restrict to installed WASM
|
|
1257
|
+
* grammars so we don't warn about files that neither engine can parse.
|
|
1258
|
+
*/
|
|
1259
|
+
async function backfillNativeDrops(
|
|
1260
|
+
filePaths: string[],
|
|
1261
|
+
nativeParsed: Set<string>,
|
|
1262
|
+
rootDir: string,
|
|
1263
|
+
result: Map<string, ExtractorOutput>,
|
|
1264
|
+
): Promise<void> {
|
|
1179
1265
|
const installedExts = getInstalledWasmExtensions();
|
|
1180
1266
|
const dropped = filePaths.filter(
|
|
1181
1267
|
(f) => !nativeParsed.has(f) && installedExts.has(path.extname(f).toLowerCase()),
|
|
1182
1268
|
);
|
|
1183
|
-
if (dropped.length
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
}
|
|
1269
|
+
if (dropped.length === 0) return;
|
|
1270
|
+
warn(`Native engine dropped ${dropped.length} file(s); falling back to WASM for parity`);
|
|
1271
|
+
const wasmResults = await parseFilesWasmForBackfill(dropped, rootDir);
|
|
1272
|
+
for (const [relPath, symbols] of wasmResults) {
|
|
1273
|
+
result.set(relPath, symbols);
|
|
1189
1274
|
}
|
|
1275
|
+
}
|
|
1190
1276
|
|
|
1277
|
+
/**
|
|
1278
|
+
* Parse multiple files in bulk and return a Map<relPath, symbols>.
|
|
1279
|
+
*/
|
|
1280
|
+
export async function parseFilesAuto(
|
|
1281
|
+
filePaths: string[],
|
|
1282
|
+
rootDir: string,
|
|
1283
|
+
opts: ParseEngineOpts = {},
|
|
1284
|
+
): Promise<Map<string, ExtractorOutput>> {
|
|
1285
|
+
const { native } = resolveEngine(opts);
|
|
1286
|
+
if (!native) return parseFilesWasm(filePaths, rootDir);
|
|
1287
|
+
|
|
1288
|
+
const result = new Map<string, ExtractorOutput>();
|
|
1289
|
+
const { nativeParsed, needsTypeMap } = ingestNativeResults(native, filePaths, rootDir, result);
|
|
1290
|
+
if (needsTypeMap.length > 0) {
|
|
1291
|
+
await backfillTypeMapBatch(needsTypeMap, result);
|
|
1292
|
+
}
|
|
1293
|
+
await backfillNativeDrops(filePaths, nativeParsed, rootDir, result);
|
|
1191
1294
|
return result;
|
|
1192
1295
|
}
|
|
1193
1296
|
|
|
@@ -8,6 +8,19 @@ import { embed, getModelConfig } from './models.js';
|
|
|
8
8
|
import { buildSourceText } from './strategies/source.js';
|
|
9
9
|
import { buildStructuredText } from './strategies/structured.js';
|
|
10
10
|
|
|
11
|
+
type EmbeddingNode = NodeRow & { id: number };
|
|
12
|
+
type EmbeddingStrategy = 'structured' | 'source';
|
|
13
|
+
|
|
14
|
+
interface PreparedEmbeddings {
|
|
15
|
+
texts: string[];
|
|
16
|
+
nodeIds: number[];
|
|
17
|
+
nodeNames: string[];
|
|
18
|
+
previews: string[];
|
|
19
|
+
overflowCount: number;
|
|
20
|
+
filesRead: number;
|
|
21
|
+
filesSkipped: number;
|
|
22
|
+
}
|
|
23
|
+
|
|
11
24
|
/**
|
|
12
25
|
* Rough token estimate (~4 chars per token for code/English).
|
|
13
26
|
* Conservative — avoids adding a tokenizer dependency.
|
|
@@ -47,47 +60,22 @@ function initEmbeddingsSchema(db: BetterSqlite3Database): void {
|
|
|
47
60
|
`);
|
|
48
61
|
}
|
|
49
62
|
|
|
50
|
-
export interface BuildEmbeddingsOptions {
|
|
51
|
-
strategy?: 'structured' | 'source';
|
|
52
|
-
}
|
|
53
|
-
|
|
54
63
|
/**
|
|
55
|
-
*
|
|
64
|
+
* Resolve the repo root for embedding. Prefer the root recorded at build time;
|
|
65
|
+
* fall back to `<dbParent>` only when the DB lives at the conventional
|
|
66
|
+
* `<root>/.codegraph/graph.db` layout — otherwise trust the caller's rootDir.
|
|
56
67
|
*/
|
|
57
|
-
|
|
58
|
-
rootDir: string,
|
|
59
|
-
modelKey: string,
|
|
60
|
-
customDbPath?: string,
|
|
61
|
-
options: BuildEmbeddingsOptions = {},
|
|
62
|
-
): Promise<void> {
|
|
63
|
-
const strategy = options.strategy || 'structured';
|
|
64
|
-
const dbPath = customDbPath || findDbPath(undefined);
|
|
65
|
-
|
|
66
|
-
if (!fs.existsSync(dbPath)) {
|
|
67
|
-
throw new DbError(
|
|
68
|
-
`No codegraph database found at ${dbPath}.\nRun "codegraph build" first to analyze your codebase.`,
|
|
69
|
-
{ file: dbPath },
|
|
70
|
-
);
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
const db = openDb(dbPath) as BetterSqlite3Database;
|
|
74
|
-
initEmbeddingsSchema(db);
|
|
75
|
-
|
|
76
|
-
// Prefer the repo root recorded at build time — embed may be invoked from a
|
|
77
|
-
// different cwd (e.g. `codegraph embed --db /abs/path/graph.db`) and the
|
|
78
|
-
// positional rootDir will be wrong in that case. For legacy DBs without
|
|
79
|
-
// root_dir metadata, fall back to `<dbParent>` only when the DB lives at
|
|
80
|
-
// the conventional `<root>/.codegraph/graph.db` layout — otherwise trust
|
|
81
|
-
// the caller-provided rootDir (which may be an explicit positional arg).
|
|
82
|
-
// `path.dirname(...)` is always non-empty (`'.'` at minimum), so the
|
|
83
|
-
// conventional-layout check is required to keep the rootDir path reachable.
|
|
68
|
+
function resolveRoot(db: BetterSqlite3Database, dbPath: string, rootDir: string): string {
|
|
84
69
|
const metaRoot = getBuildMeta(db, 'root_dir');
|
|
85
70
|
const resolvedDbPath = path.resolve(dbPath);
|
|
86
71
|
const dbDirName = path.basename(path.dirname(resolvedDbPath));
|
|
87
72
|
const dbParent =
|
|
88
73
|
dbDirName === '.codegraph' ? path.dirname(path.dirname(resolvedDbPath)) : undefined;
|
|
89
|
-
|
|
74
|
+
return metaRoot || dbParent || rootDir;
|
|
75
|
+
}
|
|
90
76
|
|
|
77
|
+
/** Reset embedding tables and load eligible symbols grouped by file. */
|
|
78
|
+
function loadNodesByFile(db: BetterSqlite3Database): Map<string, EmbeddingNode[]> {
|
|
91
79
|
db.exec('DELETE FROM embeddings');
|
|
92
80
|
db.exec('DELETE FROM embedding_meta');
|
|
93
81
|
db.exec('DELETE FROM fts_index');
|
|
@@ -96,22 +84,52 @@ export async function buildEmbeddings(
|
|
|
96
84
|
.prepare(
|
|
97
85
|
`SELECT * FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`,
|
|
98
86
|
)
|
|
99
|
-
.all() as
|
|
87
|
+
.all() as EmbeddingNode[];
|
|
100
88
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
const byFile = new Map<string, typeof nodes>();
|
|
89
|
+
const byFile = new Map<string, EmbeddingNode[]>();
|
|
104
90
|
for (const node of nodes) {
|
|
105
91
|
if (!byFile.has(node.file)) byFile.set(node.file, []);
|
|
106
92
|
byFile.get(node.file)?.push(node);
|
|
107
93
|
}
|
|
94
|
+
return byFile;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/** Build embedding text for a single node, truncating if it would overflow. */
|
|
98
|
+
function buildNodeText(
|
|
99
|
+
node: EmbeddingNode,
|
|
100
|
+
file: string,
|
|
101
|
+
lines: string[],
|
|
102
|
+
db: BetterSqlite3Database,
|
|
103
|
+
strategy: EmbeddingStrategy,
|
|
104
|
+
contextWindow: number,
|
|
105
|
+
): { text: string; overflowed: boolean } {
|
|
106
|
+
let text =
|
|
107
|
+
strategy === 'structured'
|
|
108
|
+
? buildStructuredText(node, file, lines, db)
|
|
109
|
+
: buildSourceText(node, file, lines);
|
|
110
|
+
const tokens = estimateTokens(text);
|
|
111
|
+
if (tokens > contextWindow) {
|
|
112
|
+
text = text.slice(0, contextWindow * 4);
|
|
113
|
+
return { text, overflowed: true };
|
|
114
|
+
}
|
|
115
|
+
return { text, overflowed: false };
|
|
116
|
+
}
|
|
108
117
|
|
|
118
|
+
/**
|
|
119
|
+
* Walk files in the graph, read source, and produce parallel arrays of
|
|
120
|
+
* texts / nodeIds / nodeNames / previews ready for embedding.
|
|
121
|
+
*/
|
|
122
|
+
function prepareEmbeddingTexts(
|
|
123
|
+
byFile: Map<string, EmbeddingNode[]>,
|
|
124
|
+
db: BetterSqlite3Database,
|
|
125
|
+
resolvedRoot: string,
|
|
126
|
+
strategy: EmbeddingStrategy,
|
|
127
|
+
contextWindow: number,
|
|
128
|
+
): PreparedEmbeddings {
|
|
109
129
|
const texts: string[] = [];
|
|
110
130
|
const nodeIds: number[] = [];
|
|
111
131
|
const nodeNames: string[] = [];
|
|
112
132
|
const previews: string[] = [];
|
|
113
|
-
const config = getModelConfig(modelKey);
|
|
114
|
-
const contextWindow = config.contextWindow;
|
|
115
133
|
let overflowCount = 0;
|
|
116
134
|
let filesRead = 0;
|
|
117
135
|
let filesSkipped = 0;
|
|
@@ -129,19 +147,8 @@ export async function buildEmbeddings(
|
|
|
129
147
|
}
|
|
130
148
|
|
|
131
149
|
for (const node of fileNodes) {
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
? buildStructuredText(node, file, lines, db)
|
|
135
|
-
: buildSourceText(node, file, lines);
|
|
136
|
-
|
|
137
|
-
// Detect and handle context window overflow
|
|
138
|
-
const tokens = estimateTokens(text);
|
|
139
|
-
if (tokens > contextWindow) {
|
|
140
|
-
overflowCount++;
|
|
141
|
-
const maxChars = contextWindow * 4;
|
|
142
|
-
text = text.slice(0, maxChars);
|
|
143
|
-
}
|
|
144
|
-
|
|
150
|
+
const { text, overflowed } = buildNodeText(node, file, lines, db, strategy, contextWindow);
|
|
151
|
+
if (overflowed) overflowCount++;
|
|
145
152
|
texts.push(text);
|
|
146
153
|
nodeIds.push(node.id);
|
|
147
154
|
nodeNames.push(node.name);
|
|
@@ -149,28 +156,19 @@ export async function buildEmbeddings(
|
|
|
149
156
|
}
|
|
150
157
|
}
|
|
151
158
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
`${overflowCount} symbol(s) exceeded model context window (${contextWindow} tokens) and were truncated`,
|
|
155
|
-
);
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// If there were symbols to embed but every file failed to read, the DB was
|
|
159
|
-
// almost certainly built from a different location than the current cwd.
|
|
160
|
-
// Surface this clearly instead of emitting a silent "Stored 0 embeddings".
|
|
161
|
-
if (byFile.size > 0 && filesRead === 0) {
|
|
162
|
-
closeDb(db);
|
|
163
|
-
throw new DbError(
|
|
164
|
-
`embed: could not read any of the ${filesSkipped} source files recorded in the graph — the DB may have been built from a different location than the current working directory.\n` +
|
|
165
|
-
`Tried resolving against: ${resolvedRoot}\n` +
|
|
166
|
-
'Pass a positional <dir> argument pointing at the original repo root, or re-run "codegraph build" from that directory.',
|
|
167
|
-
{ file: dbPath },
|
|
168
|
-
);
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
console.log(`Embedding ${texts.length} symbols...`);
|
|
172
|
-
const { vectors, dim } = await embed(texts, modelKey);
|
|
159
|
+
return { texts, nodeIds, nodeNames, previews, overflowCount, filesRead, filesSkipped };
|
|
160
|
+
}
|
|
173
161
|
|
|
162
|
+
/** Persist vectors, FTS rows, and embedding metadata in a single transaction. */
|
|
163
|
+
function persistEmbeddings(
|
|
164
|
+
db: BetterSqlite3Database,
|
|
165
|
+
prepared: PreparedEmbeddings,
|
|
166
|
+
vectors: Float32Array[],
|
|
167
|
+
dim: number,
|
|
168
|
+
modelName: string,
|
|
169
|
+
strategy: EmbeddingStrategy,
|
|
170
|
+
): void {
|
|
171
|
+
const { nodeIds, nodeNames, previews, texts, overflowCount } = prepared;
|
|
174
172
|
const insert = db.prepare(
|
|
175
173
|
'INSERT OR REPLACE INTO embeddings (node_id, vector, text_preview, full_text) VALUES (?, ?, ?, ?)',
|
|
176
174
|
);
|
|
@@ -182,7 +180,7 @@ export async function buildEmbeddings(
|
|
|
182
180
|
insert.run(nodeIds[i], Buffer.from(vec.buffer), previews[i], texts[i]);
|
|
183
181
|
insertFts.run(nodeIds[i], nodeNames[i], texts[i]);
|
|
184
182
|
}
|
|
185
|
-
insertMeta.run('model',
|
|
183
|
+
insertMeta.run('model', modelName);
|
|
186
184
|
insertMeta.run('dim', String(dim));
|
|
187
185
|
insertMeta.run('count', String(vectors.length));
|
|
188
186
|
insertMeta.run('fts_count', String(vectors.length));
|
|
@@ -193,6 +191,66 @@ export async function buildEmbeddings(
|
|
|
193
191
|
}
|
|
194
192
|
});
|
|
195
193
|
insertAll();
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
export interface BuildEmbeddingsOptions {
|
|
197
|
+
strategy?: EmbeddingStrategy;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Build embeddings for all functions/methods/classes in the graph.
|
|
202
|
+
*/
|
|
203
|
+
export async function buildEmbeddings(
|
|
204
|
+
rootDir: string,
|
|
205
|
+
modelKey: string,
|
|
206
|
+
customDbPath?: string,
|
|
207
|
+
options: BuildEmbeddingsOptions = {},
|
|
208
|
+
): Promise<void> {
|
|
209
|
+
const strategy = options.strategy || 'structured';
|
|
210
|
+
const dbPath = customDbPath || findDbPath(undefined);
|
|
211
|
+
|
|
212
|
+
if (!fs.existsSync(dbPath)) {
|
|
213
|
+
throw new DbError(
|
|
214
|
+
`No codegraph database found at ${dbPath}.\nRun "codegraph build" first to analyze your codebase.`,
|
|
215
|
+
{ file: dbPath },
|
|
216
|
+
);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const db = openDb(dbPath) as BetterSqlite3Database;
|
|
220
|
+
initEmbeddingsSchema(db);
|
|
221
|
+
|
|
222
|
+
const resolvedRoot = resolveRoot(db, dbPath, rootDir);
|
|
223
|
+
const byFile = loadNodesByFile(db);
|
|
224
|
+
|
|
225
|
+
const nodeCount = [...byFile.values()].reduce((acc, list) => acc + list.length, 0);
|
|
226
|
+
console.log(`Building embeddings for ${nodeCount} symbols (strategy: ${strategy})...`);
|
|
227
|
+
|
|
228
|
+
const config = getModelConfig(modelKey);
|
|
229
|
+
const prepared = prepareEmbeddingTexts(byFile, db, resolvedRoot, strategy, config.contextWindow);
|
|
230
|
+
|
|
231
|
+
if (prepared.overflowCount > 0) {
|
|
232
|
+
warn(
|
|
233
|
+
`${prepared.overflowCount} symbol(s) exceeded model context window (${config.contextWindow} tokens) and were truncated`,
|
|
234
|
+
);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// If there were symbols to embed but every file failed to read, the DB was
|
|
238
|
+
// almost certainly built from a different location than the current cwd.
|
|
239
|
+
// Surface this clearly instead of emitting a silent "Stored 0 embeddings".
|
|
240
|
+
if (byFile.size > 0 && prepared.filesRead === 0) {
|
|
241
|
+
closeDb(db);
|
|
242
|
+
throw new DbError(
|
|
243
|
+
`embed: could not read any of the ${prepared.filesSkipped} source files recorded in the graph — the DB may have been built from a different location than the current working directory.\n` +
|
|
244
|
+
`Tried resolving against: ${resolvedRoot}\n` +
|
|
245
|
+
'Pass a positional <dir> argument pointing at the original repo root, or re-run "codegraph build" from that directory.',
|
|
246
|
+
{ file: dbPath },
|
|
247
|
+
);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
console.log(`Embedding ${prepared.texts.length} symbols...`);
|
|
251
|
+
const { vectors, dim } = await embed(prepared.texts, modelKey);
|
|
252
|
+
|
|
253
|
+
persistEmbeddings(db, prepared, vectors as Float32Array[], dim, config.name, strategy);
|
|
196
254
|
|
|
197
255
|
console.log(
|
|
198
256
|
`\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`,
|