@optave/codegraph 3.10.0 → 3.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -33
- package/dist/ast-analysis/engine.d.ts.map +1 -1
- package/dist/ast-analysis/engine.js +91 -60
- package/dist/ast-analysis/engine.js.map +1 -1
- package/dist/ast-analysis/rules/index.d.ts.map +1 -1
- package/dist/ast-analysis/rules/index.js +77 -0
- package/dist/ast-analysis/rules/index.js.map +1 -1
- package/dist/ast-analysis/visitor-utils.d.ts +3 -0
- package/dist/ast-analysis/visitor-utils.d.ts.map +1 -1
- package/dist/ast-analysis/visitor-utils.js +83 -49
- package/dist/ast-analysis/visitor-utils.js.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.js +78 -62
- package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
- package/dist/ast-analysis/visitors/dataflow-visitor.d.ts.map +1 -1
- package/dist/ast-analysis/visitors/dataflow-visitor.js +61 -42
- package/dist/ast-analysis/visitors/dataflow-visitor.js.map +1 -1
- package/dist/cli/commands/audit.js +1 -1
- package/dist/cli/commands/audit.js.map +1 -1
- package/dist/cli/commands/build.d.ts.map +1 -1
- package/dist/cli/commands/build.js +2 -0
- package/dist/cli/commands/build.js.map +1 -1
- package/dist/cli/commands/check.js +1 -1
- package/dist/cli/commands/check.js.map +1 -1
- package/dist/cli/commands/children.js +1 -1
- package/dist/cli/commands/children.js.map +1 -1
- package/dist/cli/commands/diff-impact.js +1 -1
- package/dist/cli/commands/diff-impact.js.map +1 -1
- package/dist/cli/commands/embed.d.ts.map +1 -1
- package/dist/cli/commands/embed.js +49 -4
- package/dist/cli/commands/embed.js.map +1 -1
- package/dist/cli/commands/roles.js +1 -1
- package/dist/cli/commands/roles.js.map +1 -1
- package/dist/cli/commands/structure.js +1 -1
- package/dist/cli/commands/structure.js.map +1 -1
- package/dist/cli/shared/options.js +1 -1
- package/dist/cli/shared/options.js.map +1 -1
- package/dist/db/connection.d.ts.map +1 -1
- package/dist/db/connection.js +8 -0
- package/dist/db/connection.js.map +1 -1
- package/dist/domain/analysis/dependencies.d.ts.map +1 -1
- package/dist/domain/analysis/dependencies.js +106 -80
- package/dist/domain/analysis/dependencies.js.map +1 -1
- package/dist/domain/analysis/fn-impact.d.ts.map +1 -1
- package/dist/domain/analysis/fn-impact.js +77 -52
- package/dist/domain/analysis/fn-impact.js.map +1 -1
- package/dist/domain/analysis/module-map.d.ts.map +1 -1
- package/dist/domain/analysis/module-map.js +132 -121
- package/dist/domain/analysis/module-map.js.map +1 -1
- package/dist/domain/graph/builder/helpers.d.ts +4 -4
- package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
- package/dist/domain/graph/builder/helpers.js +47 -33
- package/dist/domain/graph/builder/helpers.js.map +1 -1
- package/dist/domain/graph/builder/incremental.d.ts +6 -6
- package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
- package/dist/domain/graph/builder/incremental.js +148 -99
- package/dist/domain/graph/builder/incremental.js.map +1 -1
- package/dist/domain/graph/builder/pipeline.d.ts +1 -0
- package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
- package/dist/domain/graph/builder/pipeline.js +23 -637
- package/dist/domain/graph/builder/pipeline.js.map +1 -1
- package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/build-edges.js +141 -98
- package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
- package/dist/domain/graph/builder/stages/build-structure.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/build-structure.js +82 -65
- package/dist/domain/graph/builder/stages/build-structure.js.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.js +84 -56
- package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.js +60 -51
- package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts +8 -6
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.js +107 -122
- package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
- package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts +14 -0
- package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts.map +1 -0
- package/dist/domain/graph/builder/stages/native-db-lifecycle.js +77 -0
- package/dist/domain/graph/builder/stages/native-db-lifecycle.js.map +1 -0
- package/dist/domain/graph/builder/stages/native-orchestrator.d.ts +62 -0
- package/dist/domain/graph/builder/stages/native-orchestrator.d.ts.map +1 -0
- package/dist/domain/graph/builder/stages/native-orchestrator.js +747 -0
- package/dist/domain/graph/builder/stages/native-orchestrator.js.map +1 -0
- package/dist/domain/graph/builder/stages/resolve-imports.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/resolve-imports.js +73 -22
- package/dist/domain/graph/builder/stages/resolve-imports.js.map +1 -1
- package/dist/domain/graph/cycles.d.ts +6 -4
- package/dist/domain/graph/cycles.d.ts.map +1 -1
- package/dist/domain/graph/cycles.js +50 -55
- package/dist/domain/graph/cycles.js.map +1 -1
- package/dist/domain/graph/journal.d.ts.map +1 -1
- package/dist/domain/graph/journal.js +89 -70
- package/dist/domain/graph/journal.js.map +1 -1
- package/dist/domain/graph/watcher.d.ts.map +1 -1
- package/dist/domain/graph/watcher.js +28 -20
- package/dist/domain/graph/watcher.js.map +1 -1
- package/dist/domain/parser.d.ts +12 -23
- package/dist/domain/parser.d.ts.map +1 -1
- package/dist/domain/parser.js +153 -80
- package/dist/domain/parser.js.map +1 -1
- package/dist/domain/search/generator.d.ts +3 -1
- package/dist/domain/search/generator.d.ts.map +1 -1
- package/dist/domain/search/generator.js +68 -45
- package/dist/domain/search/generator.js.map +1 -1
- package/dist/domain/search/models.d.ts +18 -0
- package/dist/domain/search/models.d.ts.map +1 -1
- package/dist/domain/search/models.js +72 -4
- package/dist/domain/search/models.js.map +1 -1
- package/dist/domain/search/search/hybrid.d.ts.map +1 -1
- package/dist/domain/search/search/hybrid.js +49 -40
- package/dist/domain/search/search/hybrid.js.map +1 -1
- package/dist/domain/search/search/semantic.d.ts.map +1 -1
- package/dist/domain/search/search/semantic.js +69 -49
- package/dist/domain/search/search/semantic.js.map +1 -1
- package/dist/domain/wasm-worker-entry.js +209 -137
- package/dist/domain/wasm-worker-entry.js.map +1 -1
- package/dist/extractors/c.js +25 -6
- package/dist/extractors/c.js.map +1 -1
- package/dist/extractors/cpp.js +47 -6
- package/dist/extractors/cpp.js.map +1 -1
- package/dist/extractors/cuda.js +90 -14
- package/dist/extractors/cuda.js.map +1 -1
- package/dist/extractors/elixir.js +108 -4
- package/dist/extractors/elixir.js.map +1 -1
- package/dist/extractors/erlang.js +56 -20
- package/dist/extractors/erlang.js.map +1 -1
- package/dist/extractors/fsharp.d.ts +7 -0
- package/dist/extractors/fsharp.d.ts.map +1 -1
- package/dist/extractors/fsharp.js +94 -0
- package/dist/extractors/fsharp.js.map +1 -1
- package/dist/extractors/gleam.d.ts.map +1 -1
- package/dist/extractors/gleam.js +29 -33
- package/dist/extractors/gleam.js.map +1 -1
- package/dist/extractors/groovy.js +41 -1
- package/dist/extractors/groovy.js.map +1 -1
- package/dist/extractors/haskell.js +48 -4
- package/dist/extractors/haskell.js.map +1 -1
- package/dist/extractors/helpers.d.ts +79 -1
- package/dist/extractors/helpers.d.ts.map +1 -1
- package/dist/extractors/helpers.js +137 -0
- package/dist/extractors/helpers.js.map +1 -1
- package/dist/extractors/java.d.ts.map +1 -1
- package/dist/extractors/java.js +37 -49
- package/dist/extractors/java.js.map +1 -1
- package/dist/extractors/javascript.d.ts.map +1 -1
- package/dist/extractors/javascript.js +44 -44
- package/dist/extractors/javascript.js.map +1 -1
- package/dist/extractors/julia.js +198 -74
- package/dist/extractors/julia.js.map +1 -1
- package/dist/extractors/kotlin.js +4 -0
- package/dist/extractors/kotlin.js.map +1 -1
- package/dist/extractors/objc.js +184 -47
- package/dist/extractors/objc.js.map +1 -1
- package/dist/extractors/python.js +7 -4
- package/dist/extractors/python.js.map +1 -1
- package/dist/extractors/r.d.ts.map +1 -1
- package/dist/extractors/r.js +103 -87
- package/dist/extractors/r.js.map +1 -1
- package/dist/extractors/scala.d.ts.map +1 -1
- package/dist/extractors/scala.js +18 -32
- package/dist/extractors/scala.js.map +1 -1
- package/dist/extractors/solidity.d.ts.map +1 -1
- package/dist/extractors/solidity.js +55 -69
- package/dist/extractors/solidity.js.map +1 -1
- package/dist/extractors/verilog.js +80 -15
- package/dist/extractors/verilog.js.map +1 -1
- package/dist/features/boundaries.d.ts.map +1 -1
- package/dist/features/boundaries.js +49 -39
- package/dist/features/boundaries.js.map +1 -1
- package/dist/features/cfg.d.ts.map +1 -1
- package/dist/features/cfg.js +90 -63
- package/dist/features/cfg.js.map +1 -1
- package/dist/features/check.d.ts.map +1 -1
- package/dist/features/check.js +43 -34
- package/dist/features/check.js.map +1 -1
- package/dist/features/cochange.d.ts.map +1 -1
- package/dist/features/cochange.js +68 -56
- package/dist/features/cochange.js.map +1 -1
- package/dist/features/complexity.d.ts.map +1 -1
- package/dist/features/complexity.js +105 -75
- package/dist/features/complexity.js.map +1 -1
- package/dist/features/dataflow.d.ts.map +1 -1
- package/dist/features/dataflow.js +37 -29
- package/dist/features/dataflow.js.map +1 -1
- package/dist/features/flow.d.ts.map +1 -1
- package/dist/features/flow.js +31 -22
- package/dist/features/flow.js.map +1 -1
- package/dist/features/graph-enrichment.d.ts.map +1 -1
- package/dist/features/graph-enrichment.js +77 -70
- package/dist/features/graph-enrichment.js.map +1 -1
- package/dist/features/owners.d.ts +17 -26
- package/dist/features/owners.d.ts.map +1 -1
- package/dist/features/owners.js +120 -109
- package/dist/features/owners.js.map +1 -1
- package/dist/features/sequence.d.ts.map +1 -1
- package/dist/features/sequence.js +59 -54
- package/dist/features/sequence.js.map +1 -1
- package/dist/features/structure-query.d.ts.map +1 -1
- package/dist/features/structure-query.js +60 -60
- package/dist/features/structure-query.js.map +1 -1
- package/dist/features/structure.js +28 -36
- package/dist/features/structure.js.map +1 -1
- package/dist/graph/algorithms/leiden/optimiser.d.ts.map +1 -1
- package/dist/graph/algorithms/leiden/optimiser.js +100 -69
- package/dist/graph/algorithms/leiden/optimiser.js.map +1 -1
- package/dist/graph/classifiers/roles.d.ts.map +1 -1
- package/dist/graph/classifiers/roles.js +63 -59
- package/dist/graph/classifiers/roles.js.map +1 -1
- package/dist/infrastructure/config.d.ts +1 -1
- package/dist/infrastructure/config.d.ts.map +1 -1
- package/dist/infrastructure/config.js +1 -1
- package/dist/infrastructure/config.js.map +1 -1
- package/dist/mcp/tool-registry.d.ts.map +1 -1
- package/dist/mcp/tool-registry.js +4 -0
- package/dist/mcp/tool-registry.js.map +1 -1
- package/dist/mcp/tools/semantic-search.d.ts +1 -0
- package/dist/mcp/tools/semantic-search.d.ts.map +1 -1
- package/dist/mcp/tools/semantic-search.js +1 -0
- package/dist/mcp/tools/semantic-search.js.map +1 -1
- package/dist/presentation/cfg.d.ts.map +1 -1
- package/dist/presentation/cfg.js +44 -29
- package/dist/presentation/cfg.js.map +1 -1
- package/dist/presentation/flow.d.ts.map +1 -1
- package/dist/presentation/flow.js +58 -38
- package/dist/presentation/flow.js.map +1 -1
- package/dist/types.d.ts +16 -2
- package/dist/types.d.ts.map +1 -1
- package/grammars/tree-sitter-erlang.wasm +0 -0
- package/grammars/tree-sitter-fsharp.wasm +0 -0
- package/grammars/tree-sitter-fsharp_signature.wasm +0 -0
- package/grammars/tree-sitter-gleam.wasm +0 -0
- package/package.json +10 -10
- package/src/ast-analysis/engine.ts +145 -61
- package/src/ast-analysis/rules/index.ts +87 -0
- package/src/ast-analysis/visitor-utils.ts +86 -46
- package/src/ast-analysis/visitors/ast-store-visitor.ts +104 -69
- package/src/ast-analysis/visitors/dataflow-visitor.ts +86 -47
- package/src/cli/commands/audit.ts +1 -1
- package/src/cli/commands/build.ts +2 -0
- package/src/cli/commands/check.ts +1 -1
- package/src/cli/commands/children.ts +1 -1
- package/src/cli/commands/diff-impact.ts +1 -1
- package/src/cli/commands/embed.ts +54 -4
- package/src/cli/commands/roles.ts +1 -1
- package/src/cli/commands/structure.ts +1 -1
- package/src/cli/shared/options.ts +1 -1
- package/src/db/connection.ts +8 -0
- package/src/domain/analysis/dependencies.ts +166 -85
- package/src/domain/analysis/fn-impact.ts +120 -50
- package/src/domain/analysis/module-map.ts +175 -140
- package/src/domain/graph/builder/helpers.ts +85 -76
- package/src/domain/graph/builder/incremental.ts +223 -131
- package/src/domain/graph/builder/pipeline.ts +32 -785
- package/src/domain/graph/builder/stages/build-edges.ts +207 -142
- package/src/domain/graph/builder/stages/build-structure.ts +115 -82
- package/src/domain/graph/builder/stages/detect-changes.ts +107 -64
- package/src/domain/graph/builder/stages/finalize.ts +72 -70
- package/src/domain/graph/builder/stages/insert-nodes.ts +154 -120
- package/src/domain/graph/builder/stages/native-db-lifecycle.ts +74 -0
- package/src/domain/graph/builder/stages/native-orchestrator.ts +942 -0
- package/src/domain/graph/builder/stages/resolve-imports.ts +79 -25
- package/src/domain/graph/cycles.ts +51 -49
- package/src/domain/graph/journal.ts +84 -69
- package/src/domain/graph/watcher.ts +29 -25
- package/src/domain/parser.ts +170 -67
- package/src/domain/search/generator.ts +132 -74
- package/src/domain/search/models.ts +75 -4
- package/src/domain/search/search/hybrid.ts +53 -42
- package/src/domain/search/search/semantic.ts +105 -65
- package/src/domain/wasm-worker-entry.ts +243 -153
- package/src/extractors/c.ts +27 -8
- package/src/extractors/cpp.ts +50 -8
- package/src/extractors/cuda.ts +90 -16
- package/src/extractors/elixir.ts +103 -4
- package/src/extractors/erlang.ts +63 -20
- package/src/extractors/fsharp.ts +104 -0
- package/src/extractors/gleam.ts +40 -39
- package/src/extractors/groovy.ts +45 -1
- package/src/extractors/haskell.ts +45 -4
- package/src/extractors/helpers.ts +205 -1
- package/src/extractors/java.ts +42 -45
- package/src/extractors/javascript.ts +44 -43
- package/src/extractors/julia.ts +191 -77
- package/src/extractors/kotlin.ts +4 -0
- package/src/extractors/objc.ts +171 -47
- package/src/extractors/python.ts +5 -3
- package/src/extractors/r.ts +104 -82
- package/src/extractors/scala.ts +24 -36
- package/src/extractors/solidity.ts +59 -78
- package/src/extractors/verilog.ts +83 -15
- package/src/features/boundaries.ts +64 -46
- package/src/features/cfg.ts +145 -74
- package/src/features/check.ts +60 -43
- package/src/features/cochange.ts +95 -72
- package/src/features/complexity.ts +134 -79
- package/src/features/dataflow.ts +57 -34
- package/src/features/flow.ts +48 -24
- package/src/features/graph-enrichment.ts +105 -70
- package/src/features/owners.ts +186 -146
- package/src/features/sequence.ts +99 -69
- package/src/features/structure-query.ts +94 -79
- package/src/features/structure.ts +56 -56
- package/src/graph/algorithms/leiden/optimiser.ts +142 -87
- package/src/graph/classifiers/roles.ts +64 -54
- package/src/infrastructure/config.ts +1 -1
- package/src/mcp/tool-registry.ts +5 -0
- package/src/mcp/tools/semantic-search.ts +2 -0
- package/src/presentation/cfg.ts +48 -32
- package/src/presentation/flow.ts +100 -52
- package/src/types.ts +16 -1
|
@@ -1,14 +1,48 @@
|
|
|
1
1
|
import { execFileSync } from 'node:child_process';
|
|
2
|
+
import { createRequire } from 'node:module';
|
|
3
|
+
import path from 'node:path';
|
|
2
4
|
import { createInterface } from 'node:readline';
|
|
3
5
|
import { info } from '../../infrastructure/logger.js';
|
|
4
6
|
import { ConfigError, EngineError } from '../../shared/errors.js';
|
|
5
7
|
|
|
8
|
+
const _require = createRequire(import.meta.url);
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Resolve the directory where `npm install` should run so the installed
|
|
12
|
+
* package ends up reachable by `await import(pkg)` from inside this module.
|
|
13
|
+
*
|
|
14
|
+
* Without a `cwd`, `execFileSync('npm', ['install', ...])` operates on
|
|
15
|
+
* `process.cwd()` — when the user runs codegraph against a repo that is *not*
|
|
16
|
+
* the directory where codegraph itself is installed, npm installs into the
|
|
17
|
+
* wrong `node_modules`, the dynamic import still fails, and the user gets
|
|
18
|
+
* `ENGINE_UNAVAILABLE: ... installed but failed to load`.
|
|
19
|
+
*
|
|
20
|
+
* Pin cwd to the directory that contains @optave/codegraph's `node_modules`
|
|
21
|
+
* so the install lands where Node's resolution algorithm will find it.
|
|
22
|
+
*
|
|
23
|
+
* @internal Exported for unit tests; not part of the public barrel.
|
|
24
|
+
*/
|
|
25
|
+
export function resolveNpmInstallCwd(): string | undefined {
|
|
26
|
+
try {
|
|
27
|
+
const pkgJsonPath = _require.resolve('@optave/codegraph/package.json');
|
|
28
|
+
// pkgJsonPath = <host>/node_modules/@optave/codegraph/package.json
|
|
29
|
+
// dirname x4: package.json → codegraph → @optave → node_modules → <host>
|
|
30
|
+
return path.dirname(path.dirname(path.dirname(path.dirname(pkgJsonPath))));
|
|
31
|
+
} catch {
|
|
32
|
+
// Source-of-truth checkout (no @optave/codegraph in node_modules) — fall back
|
|
33
|
+
// to process.cwd() so legacy behavior survives in tests.
|
|
34
|
+
return undefined;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
6
38
|
export interface ModelConfig {
|
|
7
39
|
name: string;
|
|
8
40
|
dim: number;
|
|
9
41
|
contextWindow: number;
|
|
10
42
|
desc: string;
|
|
11
43
|
quantized: boolean;
|
|
44
|
+
/** Pooling strategy passed to the transformers pipeline. Defaults to 'mean'. */
|
|
45
|
+
pooling?: 'mean' | 'cls';
|
|
12
46
|
}
|
|
13
47
|
|
|
14
48
|
// Lazy-load transformers (heavy, optional module)
|
|
@@ -59,7 +93,7 @@ export const MODELS: Record<string, ModelConfig> = {
|
|
|
59
93
|
name: 'nomic-ai/nomic-embed-text-v1.5',
|
|
60
94
|
dim: 768,
|
|
61
95
|
contextWindow: 8192,
|
|
62
|
-
desc: '
|
|
96
|
+
desc: 'Matryoshka MRL trained (~137MB). 8192 context. Codegraph stores full 768d (no truncation); v1 scores higher on our benchmark.',
|
|
63
97
|
quantized: false,
|
|
64
98
|
},
|
|
65
99
|
'bge-large': {
|
|
@@ -69,11 +103,41 @@ export const MODELS: Record<string, ModelConfig> = {
|
|
|
69
103
|
desc: 'Best general retrieval (~335MB). Top MTEB scores.',
|
|
70
104
|
quantized: false,
|
|
71
105
|
},
|
|
106
|
+
'mxbai-xsmall': {
|
|
107
|
+
name: 'mixedbread-ai/mxbai-embed-xsmall-v1',
|
|
108
|
+
dim: 384,
|
|
109
|
+
contextWindow: 4096,
|
|
110
|
+
desc: 'Tiny model with long context (~50MB). 4096 ctx.',
|
|
111
|
+
quantized: false,
|
|
112
|
+
pooling: 'cls',
|
|
113
|
+
},
|
|
114
|
+
'mxbai-large': {
|
|
115
|
+
name: 'mixedbread-ai/mxbai-embed-large-v1',
|
|
116
|
+
dim: 1024,
|
|
117
|
+
contextWindow: 512,
|
|
118
|
+
desc: 'Top MTEB BERT-large, Matryoshka dimensions (~400MB). 512 ctx.',
|
|
119
|
+
quantized: false,
|
|
120
|
+
pooling: 'cls',
|
|
121
|
+
},
|
|
122
|
+
'bge-m3': {
|
|
123
|
+
name: 'Xenova/bge-m3',
|
|
124
|
+
dim: 1024,
|
|
125
|
+
contextWindow: 8192,
|
|
126
|
+
desc: 'Multilingual, multi-task (~600MB). 100+ languages, 8192 context.',
|
|
127
|
+
quantized: false,
|
|
128
|
+
},
|
|
129
|
+
modernbert: {
|
|
130
|
+
name: 'nomic-ai/modernbert-embed-base',
|
|
131
|
+
dim: 768,
|
|
132
|
+
contextWindow: 8192,
|
|
133
|
+
desc: 'ModernBERT base (~150MB). Newer architecture, 8192 ctx, English.',
|
|
134
|
+
quantized: false,
|
|
135
|
+
},
|
|
72
136
|
};
|
|
73
137
|
|
|
74
138
|
export const EMBEDDING_STRATEGIES: readonly string[] = ['structured', 'source'];
|
|
75
139
|
|
|
76
|
-
export const DEFAULT_MODEL: string = 'nomic
|
|
140
|
+
export const DEFAULT_MODEL: string = 'nomic';
|
|
77
141
|
const NPM_BIN = process.platform === 'win32' ? 'npm.cmd' : 'npm';
|
|
78
142
|
const BATCH_SIZE_MAP: Record<string, number> = {
|
|
79
143
|
minilm: 32,
|
|
@@ -83,6 +147,10 @@ const BATCH_SIZE_MAP: Record<string, number> = {
|
|
|
83
147
|
nomic: 8,
|
|
84
148
|
'nomic-v1.5': 8,
|
|
85
149
|
'bge-large': 4,
|
|
150
|
+
'mxbai-xsmall': 32,
|
|
151
|
+
'mxbai-large': 4,
|
|
152
|
+
'bge-m3': 4,
|
|
153
|
+
modernbert: 8,
|
|
86
154
|
};
|
|
87
155
|
const DEFAULT_BATCH_SIZE = 32;
|
|
88
156
|
|
|
@@ -104,12 +172,14 @@ export function getModelConfig(modelKey?: string): ModelConfig {
|
|
|
104
172
|
* @internal Not part of the public barrel.
|
|
105
173
|
*/
|
|
106
174
|
export function promptInstall(packageName: string): Promise<boolean> {
|
|
175
|
+
const installCwd = resolveNpmInstallCwd();
|
|
107
176
|
if (!process.stdin.isTTY) {
|
|
108
177
|
info(`Installing ${packageName} (optional dependency for semantic search)…`);
|
|
109
178
|
try {
|
|
110
179
|
execFileSync(NPM_BIN, ['install', '--no-save', packageName], {
|
|
111
180
|
stdio: 'inherit',
|
|
112
181
|
timeout: 300_000,
|
|
182
|
+
cwd: installCwd,
|
|
113
183
|
});
|
|
114
184
|
return Promise.resolve(true);
|
|
115
185
|
} catch (err) {
|
|
@@ -128,9 +198,10 @@ export function promptInstall(packageName: string): Promise<boolean> {
|
|
|
128
198
|
rl.close();
|
|
129
199
|
if (answer.trim().toLowerCase() !== 'y') return resolve(false);
|
|
130
200
|
try {
|
|
131
|
-
execFileSync(NPM_BIN, ['install', packageName], {
|
|
201
|
+
execFileSync(NPM_BIN, ['install', '--no-save', packageName], {
|
|
132
202
|
stdio: 'inherit',
|
|
133
203
|
timeout: 300_000,
|
|
204
|
+
cwd: installCwd,
|
|
134
205
|
});
|
|
135
206
|
resolve(true);
|
|
136
207
|
} catch (err) {
|
|
@@ -239,7 +310,7 @@ export async function embed(
|
|
|
239
310
|
const batch = texts.slice(i, i + batchSize);
|
|
240
311
|
const output =
|
|
241
312
|
(await // biome-ignore lint/complexity/noBannedTypes: dynamically loaded extractor is untyped
|
|
242
|
-
(ext as Function)(batch, { pooling: 'mean', normalize: true })) as {
|
|
313
|
+
(ext as Function)(batch, { pooling: config.pooling ?? 'mean', normalize: true })) as {
|
|
243
314
|
data: number[];
|
|
244
315
|
};
|
|
245
316
|
|
|
@@ -105,61 +105,72 @@ async function collectRankedLists(
|
|
|
105
105
|
return rankedLists;
|
|
106
106
|
}
|
|
107
107
|
|
|
108
|
+
/** Initialise a fusion entry seeded from the first ranked item we see for a key. */
|
|
109
|
+
function createFusionEntry(item: RankedItem): FusionEntry {
|
|
110
|
+
return {
|
|
111
|
+
name: item.name,
|
|
112
|
+
kind: item.kind,
|
|
113
|
+
file: item.file,
|
|
114
|
+
line: item.line,
|
|
115
|
+
endLine: (item.endLine as number | null) ?? null,
|
|
116
|
+
role: (item.role as string | null) ?? null,
|
|
117
|
+
fileHash: (item.fileHash as string | null) ?? null,
|
|
118
|
+
rrfScore: 0,
|
|
119
|
+
bm25Score: null,
|
|
120
|
+
bm25Rank: null,
|
|
121
|
+
similarity: null,
|
|
122
|
+
semanticRank: null,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/** Merge a single ranked item into its fusion entry: update RRF and best per-source rank. */
|
|
127
|
+
function mergeRankedItem(entry: FusionEntry, item: RankedItem, k: number): void {
|
|
128
|
+
entry.rrfScore += 1 / (k + item.rank);
|
|
129
|
+
if (item.source === 'bm25') {
|
|
130
|
+
if (entry.bm25Rank === null || item.rank < entry.bm25Rank) {
|
|
131
|
+
entry.bm25Score = item.bm25Score ?? null;
|
|
132
|
+
entry.bm25Rank = item.rank;
|
|
133
|
+
}
|
|
134
|
+
} else if (entry.semanticRank === null || item.rank < entry.semanticRank) {
|
|
135
|
+
entry.similarity = item.similarity ?? null;
|
|
136
|
+
entry.semanticRank = item.rank;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/** Flatten a fusion entry into the public-facing hybrid result shape. */
|
|
141
|
+
function toHybridResult(e: FusionEntry): HybridResult {
|
|
142
|
+
return {
|
|
143
|
+
name: e.name,
|
|
144
|
+
kind: e.kind,
|
|
145
|
+
file: e.file,
|
|
146
|
+
line: e.line,
|
|
147
|
+
endLine: e.endLine,
|
|
148
|
+
role: e.role,
|
|
149
|
+
fileHash: e.fileHash,
|
|
150
|
+
rrf: e.rrfScore,
|
|
151
|
+
bm25Score: e.bm25Score,
|
|
152
|
+
bm25Rank: e.bm25Rank,
|
|
153
|
+
similarity: e.similarity,
|
|
154
|
+
semanticRank: e.semanticRank,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
|
|
108
158
|
/** Reciprocal Rank Fusion: merge ranked lists into a single scored result set. */
|
|
109
159
|
function fuseResults(rankedLists: RankedItem[][], k: number, limit: number): HybridResult[] {
|
|
110
160
|
const fusionMap = new Map<string, FusionEntry>();
|
|
111
|
-
|
|
112
161
|
for (const list of rankedLists) {
|
|
113
162
|
for (const item of list) {
|
|
114
163
|
if (!fusionMap.has(item.key)) {
|
|
115
|
-
fusionMap.set(item.key,
|
|
116
|
-
name: item.name,
|
|
117
|
-
kind: item.kind,
|
|
118
|
-
file: item.file,
|
|
119
|
-
line: item.line,
|
|
120
|
-
endLine: (item.endLine as number | null) ?? null,
|
|
121
|
-
role: (item.role as string | null) ?? null,
|
|
122
|
-
fileHash: (item.fileHash as string | null) ?? null,
|
|
123
|
-
rrfScore: 0,
|
|
124
|
-
bm25Score: null,
|
|
125
|
-
bm25Rank: null,
|
|
126
|
-
similarity: null,
|
|
127
|
-
semanticRank: null,
|
|
128
|
-
});
|
|
129
|
-
}
|
|
130
|
-
const entry = fusionMap.get(item.key)!;
|
|
131
|
-
entry.rrfScore += 1 / (k + item.rank);
|
|
132
|
-
if (item.source === 'bm25') {
|
|
133
|
-
if (entry.bm25Rank === null || item.rank < entry.bm25Rank) {
|
|
134
|
-
entry.bm25Score = (item as RankedItem & { bm25Score?: number }).bm25Score ?? null;
|
|
135
|
-
entry.bm25Rank = item.rank;
|
|
136
|
-
}
|
|
137
|
-
} else {
|
|
138
|
-
if (entry.semanticRank === null || item.rank < entry.semanticRank) {
|
|
139
|
-
entry.similarity = (item as RankedItem & { similarity?: number }).similarity ?? null;
|
|
140
|
-
entry.semanticRank = item.rank;
|
|
141
|
-
}
|
|
164
|
+
fusionMap.set(item.key, createFusionEntry(item));
|
|
142
165
|
}
|
|
166
|
+
mergeRankedItem(fusionMap.get(item.key)!, item, k);
|
|
143
167
|
}
|
|
144
168
|
}
|
|
145
169
|
|
|
146
170
|
return [...fusionMap.values()]
|
|
147
171
|
.sort((a, b) => b.rrfScore - a.rrfScore)
|
|
148
172
|
.slice(0, limit)
|
|
149
|
-
.map(
|
|
150
|
-
name: e.name,
|
|
151
|
-
kind: e.kind,
|
|
152
|
-
file: e.file,
|
|
153
|
-
line: e.line,
|
|
154
|
-
endLine: e.endLine,
|
|
155
|
-
role: e.role,
|
|
156
|
-
fileHash: e.fileHash,
|
|
157
|
-
rrf: e.rrfScore,
|
|
158
|
-
bm25Score: e.bm25Score,
|
|
159
|
-
bm25Rank: e.bm25Rank,
|
|
160
|
-
similarity: e.similarity,
|
|
161
|
-
semanticRank: e.semanticRank,
|
|
162
|
-
}));
|
|
173
|
+
.map(toHybridResult);
|
|
163
174
|
}
|
|
164
175
|
|
|
165
176
|
export async function hybridSearchData(
|
|
@@ -4,7 +4,7 @@ import type { BetterSqlite3Database, CodegraphConfig } from '../../../types.js';
|
|
|
4
4
|
import { normalizeSymbol } from '../../queries.js';
|
|
5
5
|
import { embed } from '../models.js';
|
|
6
6
|
import { cosineSim } from '../stores/sqlite-blob.js';
|
|
7
|
-
import { prepareSearch } from './prepare.js';
|
|
7
|
+
import { type PreparedSearch, prepareSearch } from './prepare.js';
|
|
8
8
|
|
|
9
9
|
export interface SemanticSearchOpts {
|
|
10
10
|
config?: CodegraphConfig;
|
|
@@ -30,6 +30,25 @@ export interface SearchDataResult {
|
|
|
30
30
|
results: SemanticResult[];
|
|
31
31
|
}
|
|
32
32
|
|
|
33
|
+
type StoredRow = PreparedSearch['rows'][number];
|
|
34
|
+
|
|
35
|
+
/** Reconstitute a stored embedding row's vector blob into a Float32Array. */
|
|
36
|
+
function rowVector(row: StoredRow): Float32Array {
|
|
37
|
+
return new Float32Array(new Uint8Array(row.vector as unknown as ArrayBuffer).buffer);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Warn when stored embeddings and the query model use different dimensions. */
|
|
41
|
+
function checkDimensionMismatch(storedDim: number | null, dim: number): boolean {
|
|
42
|
+
if (storedDim && dim !== storedDim) {
|
|
43
|
+
console.log(
|
|
44
|
+
`Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`,
|
|
45
|
+
);
|
|
46
|
+
console.log(` Re-run \`codegraph embed\` with the same model, or use --model to match.`);
|
|
47
|
+
return true;
|
|
48
|
+
}
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
|
|
33
52
|
export async function searchData(
|
|
34
53
|
query: string,
|
|
35
54
|
customDbPath: string | undefined,
|
|
@@ -50,20 +69,12 @@ export async function searchData(
|
|
|
50
69
|
dim,
|
|
51
70
|
} = await embed([query], modelKey ?? undefined);
|
|
52
71
|
|
|
53
|
-
if (storedDim
|
|
54
|
-
console.log(
|
|
55
|
-
`Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`,
|
|
56
|
-
);
|
|
57
|
-
console.log(` Re-run \`codegraph embed\` with the same model, or use --model to match.`);
|
|
58
|
-
return null;
|
|
59
|
-
}
|
|
72
|
+
if (checkDimensionMismatch(storedDim, dim)) return null;
|
|
60
73
|
|
|
61
74
|
const hc = new Map<string, string>();
|
|
62
75
|
const results: SemanticResult[] = [];
|
|
63
76
|
for (const row of rows) {
|
|
64
|
-
const
|
|
65
|
-
const sim = cosineSim(queryVec!, vec);
|
|
66
|
-
|
|
77
|
+
const sim = cosineSim(queryVec!, rowVector(row));
|
|
67
78
|
if (sim >= minScore) {
|
|
68
79
|
results.push({
|
|
69
80
|
...normalizeSymbol(row, db as BetterSqlite3Database, hc),
|
|
@@ -91,6 +102,82 @@ export interface MultiSearchResult {
|
|
|
91
102
|
}>;
|
|
92
103
|
}
|
|
93
104
|
|
|
105
|
+
interface RankedHit {
|
|
106
|
+
rowIndex: number;
|
|
107
|
+
similarity: number;
|
|
108
|
+
rank: number;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
interface FusionEntry {
|
|
112
|
+
rrfScore: number;
|
|
113
|
+
queryScores: Array<{ query: string; similarity: number; rank: number }>;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Emit a warning for any query pair whose embeddings are nearly identical,
|
|
118
|
+
* since RRF would over-weight matches shared between them.
|
|
119
|
+
*/
|
|
120
|
+
function warnOnSimilarQueries(
|
|
121
|
+
queries: string[],
|
|
122
|
+
queryVecs: Float32Array[],
|
|
123
|
+
threshold: number,
|
|
124
|
+
): void {
|
|
125
|
+
for (let i = 0; i < queryVecs.length; i++) {
|
|
126
|
+
for (let j = i + 1; j < queryVecs.length; j++) {
|
|
127
|
+
const sim = cosineSim(queryVecs[i]!, queryVecs[j]!);
|
|
128
|
+
if (sim >= threshold) {
|
|
129
|
+
warn(
|
|
130
|
+
`Queries "${queries[i]}" and "${queries[j]}" are very similar ` +
|
|
131
|
+
`(${(sim * 100).toFixed(0)}% cosine similarity). ` +
|
|
132
|
+
`This may bias RRF results toward their shared matches. ` +
|
|
133
|
+
`Consider using more distinct queries.`,
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/** Rank stored rows for a single query, keeping only those above minScore. */
|
|
141
|
+
function rankRowsForQuery(
|
|
142
|
+
queryVec: Float32Array,
|
|
143
|
+
rowVecs: Float32Array[],
|
|
144
|
+
minScore: number,
|
|
145
|
+
): RankedHit[] {
|
|
146
|
+
const scored: Array<{ rowIndex: number; similarity: number }> = [];
|
|
147
|
+
for (let ri = 0; ri < rowVecs.length; ri++) {
|
|
148
|
+
const sim = cosineSim(queryVec, rowVecs[ri]!);
|
|
149
|
+
if (sim >= minScore) {
|
|
150
|
+
scored.push({ rowIndex: ri, similarity: sim });
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
scored.sort((a, b) => b.similarity - a.similarity);
|
|
154
|
+
return scored.map((item, rank) => ({ ...item, rank: rank + 1 }));
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/** Reciprocal Rank Fusion across each query's ranked hits. */
|
|
158
|
+
function fuseRankedHits(
|
|
159
|
+
queries: string[],
|
|
160
|
+
perQueryRanked: RankedHit[][],
|
|
161
|
+
k: number,
|
|
162
|
+
): Map<number, FusionEntry> {
|
|
163
|
+
const fusionMap = new Map<number, FusionEntry>();
|
|
164
|
+
for (let qi = 0; qi < queries.length; qi++) {
|
|
165
|
+
for (const item of perQueryRanked[qi]!) {
|
|
166
|
+
if (!fusionMap.has(item.rowIndex)) {
|
|
167
|
+
fusionMap.set(item.rowIndex, { rrfScore: 0, queryScores: [] });
|
|
168
|
+
}
|
|
169
|
+
const entry = fusionMap.get(item.rowIndex)!;
|
|
170
|
+
entry.rrfScore += 1 / (k + item.rank);
|
|
171
|
+
entry.queryScores.push({
|
|
172
|
+
query: queries[qi]!,
|
|
173
|
+
similarity: item.similarity,
|
|
174
|
+
rank: item.rank,
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
return fusionMap;
|
|
179
|
+
}
|
|
180
|
+
|
|
94
181
|
export async function multiSearchData(
|
|
95
182
|
queries: string[],
|
|
96
183
|
customDbPath: string | undefined,
|
|
@@ -101,6 +188,7 @@ export async function multiSearchData(
|
|
|
101
188
|
const limit = opts.limit ?? searchCfg.topK ?? 15;
|
|
102
189
|
const minScore = opts.minScore ?? searchCfg.defaultMinScore ?? 0.2;
|
|
103
190
|
const k = opts.rrfK ?? searchCfg.rrfK ?? 60;
|
|
191
|
+
const similarityWarnThreshold = searchCfg.similarityWarnThreshold ?? 0.85;
|
|
104
192
|
|
|
105
193
|
const prepared = prepareSearch(customDbPath, opts);
|
|
106
194
|
if (!prepared) return null;
|
|
@@ -109,63 +197,15 @@ export async function multiSearchData(
|
|
|
109
197
|
try {
|
|
110
198
|
const { vectors: queryVecs, dim } = await embed(queries, modelKey ?? undefined);
|
|
111
199
|
|
|
112
|
-
|
|
113
|
-
for (let i = 0; i < queryVecs.length; i++) {
|
|
114
|
-
for (let j = i + 1; j < queryVecs.length; j++) {
|
|
115
|
-
const sim = cosineSim(queryVecs[i]!, queryVecs[j]!);
|
|
116
|
-
if (sim >= SIMILARITY_WARN_THRESHOLD) {
|
|
117
|
-
warn(
|
|
118
|
-
`Queries "${queries[i]}" and "${queries[j]}" are very similar ` +
|
|
119
|
-
`(${(sim * 100).toFixed(0)}% cosine similarity). ` +
|
|
120
|
-
`This may bias RRF results toward their shared matches. ` +
|
|
121
|
-
`Consider using more distinct queries.`,
|
|
122
|
-
);
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
}
|
|
200
|
+
warnOnSimilarQueries(queries, queryVecs as Float32Array[], similarityWarnThreshold);
|
|
126
201
|
|
|
127
|
-
if (storedDim
|
|
128
|
-
console.log(
|
|
129
|
-
`Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`,
|
|
130
|
-
);
|
|
131
|
-
console.log(` Re-run \`codegraph embed\` with the same model, or use --model to match.`);
|
|
132
|
-
return null;
|
|
133
|
-
}
|
|
202
|
+
if (checkDimensionMismatch(storedDim, dim)) return null;
|
|
134
203
|
|
|
135
|
-
const rowVecs = rows.map(
|
|
136
|
-
|
|
204
|
+
const rowVecs = rows.map(rowVector);
|
|
205
|
+
const perQueryRanked = queries.map((_q, qi) =>
|
|
206
|
+
rankRowsForQuery(queryVecs[qi]!, rowVecs, minScore),
|
|
137
207
|
);
|
|
138
|
-
|
|
139
|
-
const perQueryRanked = queries.map((_query, qi) => {
|
|
140
|
-
const scored: Array<{ rowIndex: number; similarity: number }> = [];
|
|
141
|
-
for (let ri = 0; ri < rows.length; ri++) {
|
|
142
|
-
const sim = cosineSim(queryVecs[qi]!, rowVecs[ri]!);
|
|
143
|
-
if (sim >= minScore) {
|
|
144
|
-
scored.push({ rowIndex: ri, similarity: sim });
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
scored.sort((a, b) => b.similarity - a.similarity);
|
|
148
|
-
return scored.map((item, rank) => ({ ...item, rank: rank + 1 }));
|
|
149
|
-
});
|
|
150
|
-
|
|
151
|
-
const fusionMap = new Map<
|
|
152
|
-
number,
|
|
153
|
-
{ rrfScore: number; queryScores: Array<{ query: string; similarity: number; rank: number }> }
|
|
154
|
-
>();
|
|
155
|
-
for (let qi = 0; qi < queries.length; qi++) {
|
|
156
|
-
for (const item of perQueryRanked[qi]!) {
|
|
157
|
-
if (!fusionMap.has(item.rowIndex)) {
|
|
158
|
-
fusionMap.set(item.rowIndex, { rrfScore: 0, queryScores: [] });
|
|
159
|
-
}
|
|
160
|
-
const entry = fusionMap.get(item.rowIndex)!;
|
|
161
|
-
entry.rrfScore += 1 / (k + item.rank);
|
|
162
|
-
entry.queryScores.push({
|
|
163
|
-
query: queries[qi]!,
|
|
164
|
-
similarity: item.similarity,
|
|
165
|
-
rank: item.rank,
|
|
166
|
-
});
|
|
167
|
-
}
|
|
168
|
-
}
|
|
208
|
+
const fusionMap = fuseRankedHits(queries, perQueryRanked, k);
|
|
169
209
|
|
|
170
210
|
const hc = new Map<string, string>();
|
|
171
211
|
const results: MultiSearchResult['results'] = [];
|