@optave/codegraph 3.11.0 → 3.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -31
- package/dist/ast-analysis/engine.d.ts.map +1 -1
- package/dist/ast-analysis/engine.js +91 -60
- package/dist/ast-analysis/engine.js.map +1 -1
- package/dist/ast-analysis/visitor-utils.d.ts +3 -0
- package/dist/ast-analysis/visitor-utils.d.ts.map +1 -1
- package/dist/ast-analysis/visitor-utils.js +83 -49
- package/dist/ast-analysis/visitor-utils.js.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.d.ts.map +1 -1
- package/dist/ast-analysis/visitors/ast-store-visitor.js +78 -62
- package/dist/ast-analysis/visitors/ast-store-visitor.js.map +1 -1
- package/dist/ast-analysis/visitors/dataflow-visitor.d.ts.map +1 -1
- package/dist/ast-analysis/visitors/dataflow-visitor.js +61 -42
- package/dist/ast-analysis/visitors/dataflow-visitor.js.map +1 -1
- package/dist/cli/commands/embed.d.ts.map +1 -1
- package/dist/cli/commands/embed.js +49 -4
- package/dist/cli/commands/embed.js.map +1 -1
- package/dist/domain/analysis/dependencies.d.ts.map +1 -1
- package/dist/domain/analysis/dependencies.js +106 -80
- package/dist/domain/analysis/dependencies.js.map +1 -1
- package/dist/domain/analysis/fn-impact.d.ts.map +1 -1
- package/dist/domain/analysis/fn-impact.js +77 -52
- package/dist/domain/analysis/fn-impact.js.map +1 -1
- package/dist/domain/analysis/module-map.d.ts.map +1 -1
- package/dist/domain/analysis/module-map.js +132 -121
- package/dist/domain/analysis/module-map.js.map +1 -1
- package/dist/domain/graph/builder/helpers.d.ts +4 -4
- package/dist/domain/graph/builder/helpers.d.ts.map +1 -1
- package/dist/domain/graph/builder/helpers.js +47 -33
- package/dist/domain/graph/builder/helpers.js.map +1 -1
- package/dist/domain/graph/builder/incremental.d.ts +6 -0
- package/dist/domain/graph/builder/incremental.d.ts.map +1 -1
- package/dist/domain/graph/builder/incremental.js +142 -76
- package/dist/domain/graph/builder/incremental.js.map +1 -1
- package/dist/domain/graph/builder/pipeline.d.ts +1 -44
- package/dist/domain/graph/builder/pipeline.d.ts.map +1 -1
- package/dist/domain/graph/builder/pipeline.js +10 -766
- package/dist/domain/graph/builder/pipeline.js.map +1 -1
- package/dist/domain/graph/builder/stages/build-edges.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/build-edges.js +133 -96
- package/dist/domain/graph/builder/stages/build-edges.js.map +1 -1
- package/dist/domain/graph/builder/stages/build-structure.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/build-structure.js +82 -65
- package/dist/domain/graph/builder/stages/build-structure.js.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/detect-changes.js +84 -56
- package/dist/domain/graph/builder/stages/detect-changes.js.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/finalize.js +60 -51
- package/dist/domain/graph/builder/stages/finalize.js.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts +8 -6
- package/dist/domain/graph/builder/stages/insert-nodes.d.ts.map +1 -1
- package/dist/domain/graph/builder/stages/insert-nodes.js +107 -122
- package/dist/domain/graph/builder/stages/insert-nodes.js.map +1 -1
- package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts +14 -0
- package/dist/domain/graph/builder/stages/native-db-lifecycle.d.ts.map +1 -0
- package/dist/domain/graph/builder/stages/native-db-lifecycle.js +77 -0
- package/dist/domain/graph/builder/stages/native-db-lifecycle.js.map +1 -0
- package/dist/domain/graph/builder/stages/native-orchestrator.d.ts +62 -0
- package/dist/domain/graph/builder/stages/native-orchestrator.d.ts.map +1 -0
- package/dist/domain/graph/builder/stages/native-orchestrator.js +747 -0
- package/dist/domain/graph/builder/stages/native-orchestrator.js.map +1 -0
- package/dist/domain/graph/cycles.d.ts +6 -4
- package/dist/domain/graph/cycles.d.ts.map +1 -1
- package/dist/domain/graph/cycles.js +50 -55
- package/dist/domain/graph/cycles.js.map +1 -1
- package/dist/domain/graph/journal.d.ts.map +1 -1
- package/dist/domain/graph/journal.js +89 -70
- package/dist/domain/graph/journal.js.map +1 -1
- package/dist/domain/graph/watcher.d.ts.map +1 -1
- package/dist/domain/graph/watcher.js +5 -2
- package/dist/domain/graph/watcher.js.map +1 -1
- package/dist/domain/parser.d.ts +12 -23
- package/dist/domain/parser.d.ts.map +1 -1
- package/dist/domain/parser.js +126 -79
- package/dist/domain/parser.js.map +1 -1
- package/dist/domain/search/generator.d.ts +3 -1
- package/dist/domain/search/generator.d.ts.map +1 -1
- package/dist/domain/search/generator.js +68 -45
- package/dist/domain/search/generator.js.map +1 -1
- package/dist/domain/search/models.d.ts +2 -0
- package/dist/domain/search/models.d.ts.map +1 -1
- package/dist/domain/search/models.js +37 -3
- package/dist/domain/search/models.js.map +1 -1
- package/dist/domain/search/search/hybrid.d.ts.map +1 -1
- package/dist/domain/search/search/hybrid.js +49 -40
- package/dist/domain/search/search/hybrid.js.map +1 -1
- package/dist/domain/search/search/semantic.d.ts.map +1 -1
- package/dist/domain/search/search/semantic.js +69 -49
- package/dist/domain/search/search/semantic.js.map +1 -1
- package/dist/domain/wasm-worker-entry.js +201 -136
- package/dist/domain/wasm-worker-entry.js.map +1 -1
- package/dist/extractors/elixir.js +95 -71
- package/dist/extractors/elixir.js.map +1 -1
- package/dist/extractors/gleam.d.ts.map +1 -1
- package/dist/extractors/gleam.js +23 -31
- package/dist/extractors/gleam.js.map +1 -1
- package/dist/extractors/helpers.d.ts +79 -1
- package/dist/extractors/helpers.d.ts.map +1 -1
- package/dist/extractors/helpers.js +137 -0
- package/dist/extractors/helpers.js.map +1 -1
- package/dist/extractors/java.d.ts.map +1 -1
- package/dist/extractors/java.js +37 -49
- package/dist/extractors/java.js.map +1 -1
- package/dist/extractors/javascript.d.ts.map +1 -1
- package/dist/extractors/javascript.js +44 -44
- package/dist/extractors/javascript.js.map +1 -1
- package/dist/extractors/julia.js +27 -34
- package/dist/extractors/julia.js.map +1 -1
- package/dist/extractors/r.d.ts.map +1 -1
- package/dist/extractors/r.js +33 -58
- package/dist/extractors/r.js.map +1 -1
- package/dist/extractors/solidity.d.ts.map +1 -1
- package/dist/extractors/solidity.js +38 -61
- package/dist/extractors/solidity.js.map +1 -1
- package/dist/features/boundaries.d.ts.map +1 -1
- package/dist/features/boundaries.js +49 -39
- package/dist/features/boundaries.js.map +1 -1
- package/dist/features/cfg.d.ts.map +1 -1
- package/dist/features/cfg.js +90 -63
- package/dist/features/cfg.js.map +1 -1
- package/dist/features/check.d.ts.map +1 -1
- package/dist/features/check.js +43 -34
- package/dist/features/check.js.map +1 -1
- package/dist/features/cochange.d.ts.map +1 -1
- package/dist/features/cochange.js +68 -56
- package/dist/features/cochange.js.map +1 -1
- package/dist/features/complexity.d.ts.map +1 -1
- package/dist/features/complexity.js +105 -75
- package/dist/features/complexity.js.map +1 -1
- package/dist/features/dataflow.d.ts.map +1 -1
- package/dist/features/dataflow.js +37 -29
- package/dist/features/dataflow.js.map +1 -1
- package/dist/features/flow.d.ts.map +1 -1
- package/dist/features/flow.js +31 -22
- package/dist/features/flow.js.map +1 -1
- package/dist/features/graph-enrichment.d.ts.map +1 -1
- package/dist/features/graph-enrichment.js +77 -70
- package/dist/features/graph-enrichment.js.map +1 -1
- package/dist/features/owners.d.ts +17 -26
- package/dist/features/owners.d.ts.map +1 -1
- package/dist/features/owners.js +120 -109
- package/dist/features/owners.js.map +1 -1
- package/dist/features/sequence.d.ts.map +1 -1
- package/dist/features/sequence.js +59 -54
- package/dist/features/sequence.js.map +1 -1
- package/dist/features/structure-query.d.ts.map +1 -1
- package/dist/features/structure-query.js +60 -60
- package/dist/features/structure-query.js.map +1 -1
- package/dist/features/structure.js +28 -36
- package/dist/features/structure.js.map +1 -1
- package/dist/graph/algorithms/leiden/optimiser.d.ts.map +1 -1
- package/dist/graph/algorithms/leiden/optimiser.js +100 -69
- package/dist/graph/algorithms/leiden/optimiser.js.map +1 -1
- package/dist/graph/classifiers/roles.d.ts.map +1 -1
- package/dist/graph/classifiers/roles.js +63 -59
- package/dist/graph/classifiers/roles.js.map +1 -1
- package/dist/infrastructure/config.d.ts +1 -1
- package/dist/infrastructure/config.d.ts.map +1 -1
- package/dist/infrastructure/config.js +1 -1
- package/dist/infrastructure/config.js.map +1 -1
- package/dist/presentation/cfg.d.ts.map +1 -1
- package/dist/presentation/cfg.js +44 -29
- package/dist/presentation/cfg.js.map +1 -1
- package/dist/presentation/flow.d.ts.map +1 -1
- package/dist/presentation/flow.js +58 -38
- package/dist/presentation/flow.js.map +1 -1
- package/dist/types.d.ts +1 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +7 -7
- package/src/ast-analysis/engine.ts +145 -61
- package/src/ast-analysis/visitor-utils.ts +86 -46
- package/src/ast-analysis/visitors/ast-store-visitor.ts +104 -69
- package/src/ast-analysis/visitors/dataflow-visitor.ts +86 -47
- package/src/cli/commands/embed.ts +54 -4
- package/src/domain/analysis/dependencies.ts +166 -85
- package/src/domain/analysis/fn-impact.ts +120 -50
- package/src/domain/analysis/module-map.ts +175 -140
- package/src/domain/graph/builder/helpers.ts +85 -76
- package/src/domain/graph/builder/incremental.ts +217 -90
- package/src/domain/graph/builder/pipeline.ts +19 -957
- package/src/domain/graph/builder/stages/build-edges.ts +198 -140
- package/src/domain/graph/builder/stages/build-structure.ts +115 -82
- package/src/domain/graph/builder/stages/detect-changes.ts +107 -64
- package/src/domain/graph/builder/stages/finalize.ts +72 -70
- package/src/domain/graph/builder/stages/insert-nodes.ts +154 -120
- package/src/domain/graph/builder/stages/native-db-lifecycle.ts +74 -0
- package/src/domain/graph/builder/stages/native-orchestrator.ts +942 -0
- package/src/domain/graph/cycles.ts +51 -49
- package/src/domain/graph/journal.ts +84 -69
- package/src/domain/graph/watcher.ts +8 -2
- package/src/domain/parser.ts +143 -66
- package/src/domain/search/generator.ts +132 -74
- package/src/domain/search/models.ts +39 -3
- package/src/domain/search/search/hybrid.ts +53 -42
- package/src/domain/search/search/semantic.ts +105 -65
- package/src/domain/wasm-worker-entry.ts +235 -152
- package/src/extractors/elixir.ts +91 -64
- package/src/extractors/gleam.ts +33 -37
- package/src/extractors/helpers.ts +205 -1
- package/src/extractors/java.ts +42 -45
- package/src/extractors/javascript.ts +44 -43
- package/src/extractors/julia.ts +28 -35
- package/src/extractors/r.ts +38 -56
- package/src/extractors/solidity.ts +43 -71
- package/src/features/boundaries.ts +64 -46
- package/src/features/cfg.ts +145 -74
- package/src/features/check.ts +60 -43
- package/src/features/cochange.ts +95 -72
- package/src/features/complexity.ts +134 -79
- package/src/features/dataflow.ts +57 -34
- package/src/features/flow.ts +48 -24
- package/src/features/graph-enrichment.ts +105 -70
- package/src/features/owners.ts +186 -146
- package/src/features/sequence.ts +99 -69
- package/src/features/structure-query.ts +94 -79
- package/src/features/structure.ts +56 -56
- package/src/graph/algorithms/leiden/optimiser.ts +142 -87
- package/src/graph/classifiers/roles.ts +64 -54
- package/src/infrastructure/config.ts +1 -1
- package/src/presentation/cfg.ts +48 -32
- package/src/presentation/flow.ts +100 -52
- package/src/types.ts +1 -1
|
@@ -43,48 +43,54 @@ function initEmbeddingsSchema(db) {
|
|
|
43
43
|
`);
|
|
44
44
|
}
|
|
45
45
|
/**
|
|
46
|
-
*
|
|
46
|
+
* Resolve the repo root for embedding. Prefer the root recorded at build time;
|
|
47
|
+
* fall back to `<dbParent>` only when the DB lives at the conventional
|
|
48
|
+
* `<root>/.codegraph/graph.db` layout — otherwise trust the caller's rootDir.
|
|
47
49
|
*/
|
|
48
|
-
|
|
49
|
-
const strategy = options.strategy || 'structured';
|
|
50
|
-
const dbPath = customDbPath || findDbPath(undefined);
|
|
51
|
-
if (!fs.existsSync(dbPath)) {
|
|
52
|
-
throw new DbError(`No codegraph database found at ${dbPath}.\nRun "codegraph build" first to analyze your codebase.`, { file: dbPath });
|
|
53
|
-
}
|
|
54
|
-
const db = openDb(dbPath);
|
|
55
|
-
initEmbeddingsSchema(db);
|
|
56
|
-
// Prefer the repo root recorded at build time — embed may be invoked from a
|
|
57
|
-
// different cwd (e.g. `codegraph embed --db /abs/path/graph.db`) and the
|
|
58
|
-
// positional rootDir will be wrong in that case. For legacy DBs without
|
|
59
|
-
// root_dir metadata, fall back to `<dbParent>` only when the DB lives at
|
|
60
|
-
// the conventional `<root>/.codegraph/graph.db` layout — otherwise trust
|
|
61
|
-
// the caller-provided rootDir (which may be an explicit positional arg).
|
|
62
|
-
// `path.dirname(...)` is always non-empty (`'.'` at minimum), so the
|
|
63
|
-
// conventional-layout check is required to keep the rootDir path reachable.
|
|
50
|
+
function resolveRoot(db, dbPath, rootDir) {
|
|
64
51
|
const metaRoot = getBuildMeta(db, 'root_dir');
|
|
65
52
|
const resolvedDbPath = path.resolve(dbPath);
|
|
66
53
|
const dbDirName = path.basename(path.dirname(resolvedDbPath));
|
|
67
54
|
const dbParent = dbDirName === '.codegraph' ? path.dirname(path.dirname(resolvedDbPath)) : undefined;
|
|
68
|
-
|
|
55
|
+
return metaRoot || dbParent || rootDir;
|
|
56
|
+
}
|
|
57
|
+
/** Reset embedding tables and load eligible symbols grouped by file. */
|
|
58
|
+
function loadNodesByFile(db) {
|
|
69
59
|
db.exec('DELETE FROM embeddings');
|
|
70
60
|
db.exec('DELETE FROM embedding_meta');
|
|
71
61
|
db.exec('DELETE FROM fts_index');
|
|
72
62
|
const nodes = db
|
|
73
63
|
.prepare(`SELECT * FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`)
|
|
74
64
|
.all();
|
|
75
|
-
console.log(`Building embeddings for ${nodes.length} symbols (strategy: ${strategy})...`);
|
|
76
65
|
const byFile = new Map();
|
|
77
66
|
for (const node of nodes) {
|
|
78
67
|
if (!byFile.has(node.file))
|
|
79
68
|
byFile.set(node.file, []);
|
|
80
69
|
byFile.get(node.file)?.push(node);
|
|
81
70
|
}
|
|
71
|
+
return byFile;
|
|
72
|
+
}
|
|
73
|
+
/** Build embedding text for a single node, truncating if it would overflow. */
|
|
74
|
+
function buildNodeText(node, file, lines, db, strategy, contextWindow) {
|
|
75
|
+
let text = strategy === 'structured'
|
|
76
|
+
? buildStructuredText(node, file, lines, db)
|
|
77
|
+
: buildSourceText(node, file, lines);
|
|
78
|
+
const tokens = estimateTokens(text);
|
|
79
|
+
if (tokens > contextWindow) {
|
|
80
|
+
text = text.slice(0, contextWindow * 4);
|
|
81
|
+
return { text, overflowed: true };
|
|
82
|
+
}
|
|
83
|
+
return { text, overflowed: false };
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Walk files in the graph, read source, and produce parallel arrays of
|
|
87
|
+
* texts / nodeIds / nodeNames / previews ready for embedding.
|
|
88
|
+
*/
|
|
89
|
+
function prepareEmbeddingTexts(byFile, db, resolvedRoot, strategy, contextWindow) {
|
|
82
90
|
const texts = [];
|
|
83
91
|
const nodeIds = [];
|
|
84
92
|
const nodeNames = [];
|
|
85
93
|
const previews = [];
|
|
86
|
-
const config = getModelConfig(modelKey);
|
|
87
|
-
const contextWindow = config.contextWindow;
|
|
88
94
|
let overflowCount = 0;
|
|
89
95
|
let filesRead = 0;
|
|
90
96
|
let filesSkipped = 0;
|
|
@@ -101,36 +107,20 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options =
|
|
|
101
107
|
continue;
|
|
102
108
|
}
|
|
103
109
|
for (const node of fileNodes) {
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
: buildSourceText(node, file, lines);
|
|
107
|
-
// Detect and handle context window overflow
|
|
108
|
-
const tokens = estimateTokens(text);
|
|
109
|
-
if (tokens > contextWindow) {
|
|
110
|
+
const { text, overflowed } = buildNodeText(node, file, lines, db, strategy, contextWindow);
|
|
111
|
+
if (overflowed)
|
|
110
112
|
overflowCount++;
|
|
111
|
-
const maxChars = contextWindow * 4;
|
|
112
|
-
text = text.slice(0, maxChars);
|
|
113
|
-
}
|
|
114
113
|
texts.push(text);
|
|
115
114
|
nodeIds.push(node.id);
|
|
116
115
|
nodeNames.push(node.name);
|
|
117
116
|
previews.push(`${node.name} (${node.kind}) -- ${file}:${node.line}`);
|
|
118
117
|
}
|
|
119
118
|
}
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
// Surface this clearly instead of emitting a silent "Stored 0 embeddings".
|
|
126
|
-
if (byFile.size > 0 && filesRead === 0) {
|
|
127
|
-
closeDb(db);
|
|
128
|
-
throw new DbError(`embed: could not read any of the ${filesSkipped} source files recorded in the graph — the DB may have been built from a different location than the current working directory.\n` +
|
|
129
|
-
`Tried resolving against: ${resolvedRoot}\n` +
|
|
130
|
-
'Pass a positional <dir> argument pointing at the original repo root, or re-run "codegraph build" from that directory.', { file: dbPath });
|
|
131
|
-
}
|
|
132
|
-
console.log(`Embedding ${texts.length} symbols...`);
|
|
133
|
-
const { vectors, dim } = await embed(texts, modelKey);
|
|
119
|
+
return { texts, nodeIds, nodeNames, previews, overflowCount, filesRead, filesSkipped };
|
|
120
|
+
}
|
|
121
|
+
/** Persist vectors, FTS rows, and embedding metadata in a single transaction. */
|
|
122
|
+
function persistEmbeddings(db, prepared, vectors, dim, modelName, strategy) {
|
|
123
|
+
const { nodeIds, nodeNames, previews, texts, overflowCount } = prepared;
|
|
134
124
|
const insert = db.prepare('INSERT OR REPLACE INTO embeddings (node_id, vector, text_preview, full_text) VALUES (?, ?, ?, ?)');
|
|
135
125
|
const insertFts = db.prepare('INSERT INTO fts_index(rowid, name, content) VALUES (?, ?, ?)');
|
|
136
126
|
const insertMeta = db.prepare('INSERT OR REPLACE INTO embedding_meta (key, value) VALUES (?, ?)');
|
|
@@ -140,7 +130,7 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options =
|
|
|
140
130
|
insert.run(nodeIds[i], Buffer.from(vec.buffer), previews[i], texts[i]);
|
|
141
131
|
insertFts.run(nodeIds[i], nodeNames[i], texts[i]);
|
|
142
132
|
}
|
|
143
|
-
insertMeta.run('model',
|
|
133
|
+
insertMeta.run('model', modelName);
|
|
144
134
|
insertMeta.run('dim', String(dim));
|
|
145
135
|
insertMeta.run('count', String(vectors.length));
|
|
146
136
|
insertMeta.run('fts_count', String(vectors.length));
|
|
@@ -151,6 +141,39 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options =
|
|
|
151
141
|
}
|
|
152
142
|
});
|
|
153
143
|
insertAll();
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Build embeddings for all functions/methods/classes in the graph.
|
|
147
|
+
*/
|
|
148
|
+
export async function buildEmbeddings(rootDir, modelKey, customDbPath, options = {}) {
|
|
149
|
+
const strategy = options.strategy || 'structured';
|
|
150
|
+
const dbPath = customDbPath || findDbPath(undefined);
|
|
151
|
+
if (!fs.existsSync(dbPath)) {
|
|
152
|
+
throw new DbError(`No codegraph database found at ${dbPath}.\nRun "codegraph build" first to analyze your codebase.`, { file: dbPath });
|
|
153
|
+
}
|
|
154
|
+
const db = openDb(dbPath);
|
|
155
|
+
initEmbeddingsSchema(db);
|
|
156
|
+
const resolvedRoot = resolveRoot(db, dbPath, rootDir);
|
|
157
|
+
const byFile = loadNodesByFile(db);
|
|
158
|
+
const nodeCount = [...byFile.values()].reduce((acc, list) => acc + list.length, 0);
|
|
159
|
+
console.log(`Building embeddings for ${nodeCount} symbols (strategy: ${strategy})...`);
|
|
160
|
+
const config = getModelConfig(modelKey);
|
|
161
|
+
const prepared = prepareEmbeddingTexts(byFile, db, resolvedRoot, strategy, config.contextWindow);
|
|
162
|
+
if (prepared.overflowCount > 0) {
|
|
163
|
+
warn(`${prepared.overflowCount} symbol(s) exceeded model context window (${config.contextWindow} tokens) and were truncated`);
|
|
164
|
+
}
|
|
165
|
+
// If there were symbols to embed but every file failed to read, the DB was
|
|
166
|
+
// almost certainly built from a different location than the current cwd.
|
|
167
|
+
// Surface this clearly instead of emitting a silent "Stored 0 embeddings".
|
|
168
|
+
if (byFile.size > 0 && prepared.filesRead === 0) {
|
|
169
|
+
closeDb(db);
|
|
170
|
+
throw new DbError(`embed: could not read any of the ${prepared.filesSkipped} source files recorded in the graph — the DB may have been built from a different location than the current working directory.\n` +
|
|
171
|
+
`Tried resolving against: ${resolvedRoot}\n` +
|
|
172
|
+
'Pass a positional <dir> argument pointing at the original repo root, or re-run "codegraph build" from that directory.', { file: dbPath });
|
|
173
|
+
}
|
|
174
|
+
console.log(`Embedding ${prepared.texts.length} symbols...`);
|
|
175
|
+
const { vectors, dim } = await embed(prepared.texts, modelKey);
|
|
176
|
+
persistEmbeddings(db, prepared, vectors, dim, config.name, strategy);
|
|
154
177
|
console.log(`\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`);
|
|
155
178
|
closeDb(db);
|
|
156
179
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generator.js","sourceRoot":"","sources":["../../../src/domain/search/generator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAC9E,OAAO,EAAE,IAAI,EAAE,MAAM,gCAAgC,CAAC;AACtD,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AAEjD,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AACzD,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"generator.js","sourceRoot":"","sources":["../../../src/domain/search/generator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAC9E,OAAO,EAAE,IAAI,EAAE,MAAM,gCAAgC,CAAC;AACtD,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AAEjD,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AACzD,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAejE;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC;AAED,SAAS,oBAAoB,CAAC,EAAyB;IACrD,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;GAWP,CAAC,CAAC;IAEH,+DAA+D;IAC/D,IAAI,CAAC;QACH,EAAE,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;IAC9D,CAAC;IAAC,MAAM,CAAC;QACP,2BAA2B;IAC7B,CAAC;IAED,6CAA6C;IAC7C,EAAE,CAAC,IAAI,CAAC;;;;;;GAMP,CAAC,CAAC;AACL,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,EAAyB,EAAE,MAAc,EAAE,OAAe;IAC7E,MAAM,QAAQ,GAAG,YAAY,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC;IAC9C,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC;IAC9D,MAAM,QAAQ,GACZ,SAAS,KAAK,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IACtF,OAAO,QAAQ,IAAI,QAAQ,IAAI,OAAO,CAAC;AACzC,CAAC;AAED,wEAAwE;AACxE,SAAS,eAAe,CAAC,EAAyB;IAChD,EAAE,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;IAClC,EAAE,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;IACtC,EAAE,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;IAEjC,MAAM,KAAK,GAAG,EAAE;SACb,OAAO,CACN,uFAAuF,CACxF;SACA,GAAG,EAAqB,CAAC;IAE5B,MAAM,MAAM,GAAG,IAAI,GAAG,EAA2B,CAAC;IAClD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACtD,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;IACpC,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,+EAA+E;AAC/E,SAAS,aAAa,CACpB,IAAmB,EACnB,IAAY,EACZ,KAAe,EACf,EAAyB,EACzB,QAA2B,EAC3B,aAAqB;IAErB,IAAI,IAAI,GACN,QAAQ,KAAK,YAAY;QACvB,CAAC,CAAC,mBAAmB,CAAC,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QAC5C,CAAC,CAAC,eAAe,CAAC,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IACpC,IAAI,MAAM,GAAG,aAAa,EAAE,CAAC;QAC3B,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,GAAG,CAAC,CAAC,CAAC;QACxC,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IACpC,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;AACrC,CAAC;AAED;;;GAGG;AACH,SAAS,qBAAqB,CAC5B,MAAoC,EACpC,EAAyB,EACzB,YAAoB,EACpB,QAA2B,EAC3B,aAAqB;IAErB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,CAAC,IAAI,EAAE,SAAS,CAAC,IAAI,MAAM,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,IAAI,CAAC,CAAC;QAC9E,IAAI,KAAe,CAAC;QACpB,IAAI,CAAC;YACH,KAAK,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACvD,SAAS,EAAE,CAAC;QACd,CAAC;QAAC,OAAO,GAAY,EAAE,CAAC;YACtB,YAAY,EAAE,CAAC;YACf,IAAI,CAAC,eAAe,IAAI,oBAAqB,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;YACtE,SAAS;QACX,CAAC;QAED,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;YAC7B,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,aAAa,CAAC,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,QAAQ,EAAE,aAAa,CAAC,CAAC;YAC3F,IAAI,UAAU;gBAAE,aAAa,EAAE,CAAC;YAChC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACjB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACtB,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC1B,QAAQ,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,KAAK,IAAI,CAAC,IAAI,QAAQ,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QACvE,CAAC;IACH,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,aAAa,EAAE,SAAS,EAAE,YAAY,EAAE,CAAC;AACzF,CAAC;AAED,iFAAiF;AACjF,SAAS,iBAAiB,CACxB,EAAyB,EACzB,QAA4B,EAC5B,OAAuB,EACvB,GAAW,EACX,SAAiB,EACjB,QAA2B;IAE3B,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,QAAQ,CAAC;IACxE,MAAM,MAAM,GAAG,EAAE,CAAC,OAAO,CACvB,kGAAkG,CACnG,CAAC;IACF,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC,8DAA8D,CAAC,CAAC;IAC7F,MAAM,UAAU,GAAG,EAAE,CAAC,OAAO,CAAC,kEAAkE,CAAC,CAAC;IAClG,MAAM,SAAS,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;QACpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,GAAG,GAAG,OAAO,CAAC,CAAC,CAAiB,CAAC;YACvC,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACvE,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACpD,CAAC;QACD,UAAU,CAAC,GAAG,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QACnC,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;QACnC,UAAU,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;QAChD,UAAU,CAAC,GAAG,CAAC,WAAW,EAAE,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;QACpD,UAAU,CAAC,GAAG,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QACrC,UAAU,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;QACrD,IAAI,aAAa,GAAG,CAAC,EAAE,CAAC;YACtB,UAAU,CAAC,GAAG,CAAC,iBAAiB,EAAE,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC,CAAC,CAAC;IACH,SAAS,EAAE,CAAC;AACd,CAAC;AAMD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,OAAe,EACf,QAAgB,EAChB,YAAqB,EACrB,UAAkC,EAAE;IAEpC,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,YAAY,CAAC;IAClD,MAAM,MAAM,GAAG,YAAY,IAAI,UAAU,CAAC,SAAS,CAAC,CAAC;IAErD,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,OAAO,CACf,kCAAkC,MAAM,0DAA0D,EAClG,EAAE,IAAI,EAAE,MAAM,EAAE,CACjB,CAAC;IACJ,CAAC;IAED,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,CAA0B,CAAC;IACnD,oBAAoB,CAAC,EAAE,CAAC,CAAC;IAEzB,MAAM,YAAY,GAAG,WAAW,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;IACtD,MAAM,MAAM,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;IAEnC,MAAM,SAAS,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACnF,OAAO,CAAC,GAAG,CAAC,2BAA2B,SAAS,uBAAuB,QAAQ,MAAM,CAAC,CAAC;IAEvF,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;IACxC,MAAM,QAAQ,GAAG,qBAAqB,CAAC,MAAM,EAAE,EAAE,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,CAAC,aAAa,CAAC,CAAC;IAEjG,IAAI,QAAQ,CAAC,aAAa,GAAG,CAAC,EAAE,CAAC;QAC/B,IAAI,CACF,GAAG,QAAQ,CAAC,aAAa,6CAA6C,MAAM,CAAC,aAAa,6BAA6B,CACxH,CAAC;IACJ,CAAC;IAED,2EAA2E;IAC3E,yEAAyE;IACzE,2EAA2E;IAC3E,IAAI,MAAM,CAAC,IAAI,GAAG,CAAC,IAAI,QAAQ,CAAC,SAAS,KAAK,CAAC,EAAE,CAAC;QAChD,OAAO,CAAC,EAAE,CAAC,CAAC;QACZ,MAAM,IAAI,OAAO,CACf,oCAAoC,QAAQ,CAAC,YAAY,kIAAkI;YACzL,4BAA4B,YAAY,IAAI;YAC5C,uHAAuH,EACzH,EAAE,IAAI,EAAE,MAAM,EAAE,CACjB,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,aAAa,QAAQ,CAAC,KAAK,CAAC,MAAM,aAAa,CAAC,CAAC;IAC7D,MAAM,EAAE,OAAO,EAAE,GAAG,EAAE,GAAG,MAAM,KAAK,CAAC,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IAE/D,iBAAiB,CAAC,EAAE,EAAE,QAAQ,EAAE,OAAyB,EAAE,GAAG,EAAE,MAAM,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;IAEvF,OAAO,CAAC,GAAG,CACT,YAAY,OAAO,CAAC,MAAM,gBAAgB,GAAG,MAAM,MAAM,CAAC,IAAI,eAAe,QAAQ,eAAe,CACrG,CAAC;IACF,OAAO,CAAC,EAAE,CAAC,CAAC;AACd,CAAC"}
|
|
@@ -20,6 +20,8 @@ export interface ModelConfig {
|
|
|
20
20
|
contextWindow: number;
|
|
21
21
|
desc: string;
|
|
22
22
|
quantized: boolean;
|
|
23
|
+
/** Pooling strategy passed to the transformers pipeline. Defaults to 'mean'. */
|
|
24
|
+
pooling?: 'mean' | 'cls';
|
|
23
25
|
}
|
|
24
26
|
export declare const MODELS: Record<string, ModelConfig>;
|
|
25
27
|
export declare const EMBEDDING_STRATEGIES: readonly string[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/domain/search/models.ts"],"names":[],"mappings":"AASA;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,oBAAoB,IAAI,MAAM,GAAG,SAAS,CAWzD;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,aAAa,EAAE,MAAM,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/domain/search/models.ts"],"names":[],"mappings":"AASA;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,oBAAoB,IAAI,MAAM,GAAG,SAAS,CAWzD;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,aAAa,EAAE,MAAM,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,OAAO,CAAC;IACnB,gFAAgF;IAChF,OAAO,CAAC,EAAE,MAAM,GAAG,KAAK,CAAC;CAC1B;AAUD,eAAO,MAAM,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,WAAW,CAgF9C,CAAC;AAEF,eAAO,MAAM,oBAAoB,EAAE,SAAS,MAAM,EAA6B,CAAC;AAEhF,eAAO,MAAM,aAAa,EAAE,MAAgB,CAAC;AAiB7C,sEAAsE;AACtE,wBAAgB,cAAc,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,WAAW,CAO7D;AAED;;;;;;GAMG;AACH,wBAAgB,aAAa,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CA0CnE;AAED;;;;;GAKG;AACH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,OAAO,CAAC,CAkBzD;AAED;;;GAGG;AACH,wBAAsB,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC,CAMlD;AA2CD;;GAEG;AACH,wBAAsB,KAAK,CACzB,KAAK,EAAE,MAAM,EAAE,EACf,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC;IAAE,OAAO,EAAE,YAAY,EAAE,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,CAAC,CA6BnD"}
|
|
@@ -77,7 +77,7 @@ export const MODELS = {
|
|
|
77
77
|
name: 'nomic-ai/nomic-embed-text-v1.5',
|
|
78
78
|
dim: 768,
|
|
79
79
|
contextWindow: 8192,
|
|
80
|
-
desc: '
|
|
80
|
+
desc: 'Matryoshka MRL trained (~137MB). 8192 context. Codegraph stores full 768d (no truncation); v1 scores higher on our benchmark.',
|
|
81
81
|
quantized: false,
|
|
82
82
|
},
|
|
83
83
|
'bge-large': {
|
|
@@ -87,9 +87,39 @@ export const MODELS = {
|
|
|
87
87
|
desc: 'Best general retrieval (~335MB). Top MTEB scores.',
|
|
88
88
|
quantized: false,
|
|
89
89
|
},
|
|
90
|
+
'mxbai-xsmall': {
|
|
91
|
+
name: 'mixedbread-ai/mxbai-embed-xsmall-v1',
|
|
92
|
+
dim: 384,
|
|
93
|
+
contextWindow: 4096,
|
|
94
|
+
desc: 'Tiny model with long context (~50MB). 4096 ctx.',
|
|
95
|
+
quantized: false,
|
|
96
|
+
pooling: 'cls',
|
|
97
|
+
},
|
|
98
|
+
'mxbai-large': {
|
|
99
|
+
name: 'mixedbread-ai/mxbai-embed-large-v1',
|
|
100
|
+
dim: 1024,
|
|
101
|
+
contextWindow: 512,
|
|
102
|
+
desc: 'Top MTEB BERT-large, Matryoshka dimensions (~400MB). 512 ctx.',
|
|
103
|
+
quantized: false,
|
|
104
|
+
pooling: 'cls',
|
|
105
|
+
},
|
|
106
|
+
'bge-m3': {
|
|
107
|
+
name: 'Xenova/bge-m3',
|
|
108
|
+
dim: 1024,
|
|
109
|
+
contextWindow: 8192,
|
|
110
|
+
desc: 'Multilingual, multi-task (~600MB). 100+ languages, 8192 context.',
|
|
111
|
+
quantized: false,
|
|
112
|
+
},
|
|
113
|
+
modernbert: {
|
|
114
|
+
name: 'nomic-ai/modernbert-embed-base',
|
|
115
|
+
dim: 768,
|
|
116
|
+
contextWindow: 8192,
|
|
117
|
+
desc: 'ModernBERT base (~150MB). Newer architecture, 8192 ctx, English.',
|
|
118
|
+
quantized: false,
|
|
119
|
+
},
|
|
90
120
|
};
|
|
91
121
|
export const EMBEDDING_STRATEGIES = ['structured', 'source'];
|
|
92
|
-
export const DEFAULT_MODEL = 'nomic
|
|
122
|
+
export const DEFAULT_MODEL = 'nomic';
|
|
93
123
|
const NPM_BIN = process.platform === 'win32' ? 'npm.cmd' : 'npm';
|
|
94
124
|
const BATCH_SIZE_MAP = {
|
|
95
125
|
minilm: 32,
|
|
@@ -99,6 +129,10 @@ const BATCH_SIZE_MAP = {
|
|
|
99
129
|
nomic: 8,
|
|
100
130
|
'nomic-v1.5': 8,
|
|
101
131
|
'bge-large': 4,
|
|
132
|
+
'mxbai-xsmall': 32,
|
|
133
|
+
'mxbai-large': 4,
|
|
134
|
+
'bge-m3': 4,
|
|
135
|
+
modernbert: 8,
|
|
102
136
|
};
|
|
103
137
|
const DEFAULT_BATCH_SIZE = 32;
|
|
104
138
|
/** @internal Used by generator.js — not part of the public barrel. */
|
|
@@ -233,7 +267,7 @@ export async function embed(texts, modelKey) {
|
|
|
233
267
|
for (let i = 0; i < texts.length; i += batchSize) {
|
|
234
268
|
const batch = texts.slice(i, i + batchSize);
|
|
235
269
|
const output = (await // biome-ignore lint/complexity/noBannedTypes: dynamically loaded extractor is untyped
|
|
236
|
-
ext(batch, { pooling: 'mean', normalize: true }));
|
|
270
|
+
ext(batch, { pooling: config.pooling ?? 'mean', normalize: true }));
|
|
237
271
|
for (let j = 0; j < batch.length; j++) {
|
|
238
272
|
const start = j * dim;
|
|
239
273
|
const vec = new Float32Array(dim);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models.js","sourceRoot":"","sources":["../../../src/domain/search/models.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,IAAI,EAAE,MAAM,gCAAgC,CAAC;AACtD,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAElE,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAEhD;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,oBAAoB;IAClC,IAAI,CAAC;QACH,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,gCAAgC,CAAC,CAAC;QACvE,mEAAmE;QACnE,yEAAyE;QACzE,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7E,CAAC;IAAC,MAAM,CAAC;QACP,8EAA8E;QAC9E,yDAAyD;QACzD,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;
|
|
1
|
+
{"version":3,"file":"models.js","sourceRoot":"","sources":["../../../src/domain/search/models.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,IAAI,EAAE,MAAM,gCAAgC,CAAC;AACtD,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAElE,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAEhD;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,oBAAoB;IAClC,IAAI,CAAC;QACH,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,gCAAgC,CAAC,CAAC;QACvE,mEAAmE;QACnE,yEAAyE;QACzE,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7E,CAAC;IAAC,MAAM,CAAC;QACP,8EAA8E;QAC9E,yDAAyD;QACzD,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAYD,kDAAkD;AAClD,IAAI,QAAQ,GAAY,IAAI,CAAC;AAC7B,IAAI,SAAS,GAGT,IAAI,CAAC;AACT,IAAI,WAAW,GAAkB,IAAI,CAAC;AAEtC,MAAM,CAAC,MAAM,MAAM,GAAgC;IACjD,MAAM,EAAE;QACN,IAAI,EAAE,yBAAyB;QAC/B,GAAG,EAAE,GAAG;QACR,aAAa,EAAE,GAAG;QAClB,IAAI,EAAE,0CAA0C;QAChD,SAAS,EAAE,IAAI;KAChB;IACD,YAAY,EAAE;QACZ,IAAI,EAAE,oCAAoC;QAC1C,GAAG,EAAE,GAAG;QACR,aAAa,EAAE,IAAI;QACnB,IAAI,EAAE,4CAA4C;QAClD,SAAS,EAAE,KAAK;KACjB;IACD,WAAW,EAAE;QACX,IAAI,EAAE,mCAAmC;QACzC,GAAG,EAAE,GAAG;QACR,aAAa,EAAE,IAAI;QACnB,IAAI,EAAE,0DAA0D;QAChE,SAAS,EAAE,KAAK;KACjB;IACD,WAAW,EAAE;QACX,IAAI,EAAE,qCAAqC;QAC3C,GAAG,EAAE,GAAG;QACR,aAAa,EAAE,IAAI;QACnB,IAAI,EAAE,kEAAkE;QACxE,SAAS,EAAE,KAAK;KACjB;IACD,KAAK,EAAE;QACL,IAAI,EAAE,4BAA4B;QAClC,GAAG,EAAE,GAAG;QACR,aAAa,EAAE,IAAI;QACnB,IAAI,EAAE,4CAA4C;QAClD,SAAS,EAAE,KAAK;KACjB;IACD,YAAY,EAAE;QACZ,IAAI,EAAE,gCAAgC;QACtC,GAAG,EAAE,GAAG;QACR,aAAa,EAAE,IAAI;QACnB,IAAI,EAAE,+HAA+H;QACrI,SAAS,EAAE,KAAK;KACjB;IACD,WAAW,EAAE;QACX,IAAI,EAAE,0BAA0B;QAChC,GAAG,EAAE,IAAI;QACT,aAAa,EAAE,GAAG;QAClB,IAAI,EAAE,mDAAmD;QACzD,SAAS,EAAE,KAAK;KACjB;IACD,cAAc,EAAE;QACd,IAAI,EAAE,qCAAqC;QAC3C,GAAG,EAAE,GAAG;QACR,aAAa,EAAE,IAAI;QACnB,IAAI,EAAE,iDAAiD;QACvD,SAAS,EAAE,KAAK;QAChB,OAAO,EAAE,KAAK;KACf;IACD,aAAa,EAAE;QACb,IAAI,EAAE,oCAAoC;QAC1C,GAAG,EAAE,IAAI;QACT,aAAa,EAAE,GAAG;QAClB,IAAI,EAAE,+DAA+D;QACrE,SAAS,EAAE,KAAK;QAChB,OAAO,EAAE,KAAK;KACf;IACD,QAAQ,EAAE;QACR,IAAI,EAAE,eAAe;QACrB,GAAG,EAAE,IAAI;QACT,aAAa,EAAE,IAAI;QACnB,IAAI,EAAE,kEAAkE;QACxE,SAAS,EAAE,KAAK;KACjB;IACD,UAAU,EAAE;QACV,IAAI,EAAE,gCAAgC;QACtC,GAAG,EAAE,GAAG;QACR,aAAa,EAAE,IAAI;QACnB,IAAI,EAAE,kEAAkE;QACxE,SAAS,EAAE,KAAK;KACjB;CACF,CAAC;AAEF,MAAM,CAAC,MAAM,oBAAoB,GAAsB,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;AAEhF,MAAM,CAAC,MAAM,aAAa,GAAW,OAAO,CAAC;AAC7C,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC;AACjE,MAAM,cAAc,GAA2B;IAC7C,MAAM,EAAE,EAAE;IACV,YAAY,EAAE,EAAE;IAChB,WAAW,EAAE,CAAC;IACd,WAAW,EAAE,CAAC;IACd,KAAK,EAAE,CAAC;IACR,YAAY,EAAE,CAAC;IACf,WAAW,EAAE,CAAC;IACd,cAAc,EAAE,EAAE;IAClB,aAAa,EAAE,CAAC;IAChB,QAAQ,EAAE,CAAC;IACX,UAAU,EAAE,CAAC;CACd,CAAC;AACF,MAAM,kBAAkB,GAAG,EAAE,CAAC;AAE9B,sEAAsE;AACtE,MAAM,UAAU,cAAc,CAAC,QAAiB;IAC9C,MAAM,GAAG,GAAG,QAAQ,IAAI,aAAa,CAAC;IACtC,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;IAC3B,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,WAAW,CAAC,kBAAkB,GAAG,gBAAgB,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC/F,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,aAAa,CAAC,WAAmB;IAC/C,MAAM,UAAU,GAAG,oBAAoB,EAAE,CAAC;IAC1C,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,cAAc,WAAW,6CAA6C,CAAC,CAAC;QAC7E,IAAI,CAAC;YACH,YAAY,CAAC,OAAO,EAAE,CAAC,SAAS,EAAE,WAAW,EAAE,WAAW,CAAC,EAAE;gBAC3D,KAAK,EAAE,SAAS;gBAChB,OAAO,EAAE,OAAO;gBAChB,GAAG,EAAE,UAAU;aAChB,CAAC,CAAC;YACH,OAAO,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC/B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAI,CACF,mBAAmB,WAAW,YAAY,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,+CAA+C,WAAW,EAAE,CACvJ,CAAC;YACF,OAAO,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,MAAM,EAAE,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;QAC7E,EAAE,CAAC,QAAQ,CACT,4BAA4B,WAAW,0BAA0B,EACjE,CAAC,MAAc,EAAE,EAAE;YACjB,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,KAAK,GAAG;gBAAE,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC;YAC/D,IAAI,CAAC;gBACH,YAAY,CAAC,OAAO,EAAE,CAAC,SAAS,EAAE,WAAW,EAAE,WAAW,CAAC,EAAE;oBAC3D,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,OAAO;oBAChB,GAAG,EAAE,UAAU;iBAChB,CAAC,CAAC;gBACH,OAAO,CAAC,IAAI,CAAC,CAAC;YAChB,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,IAAI,CACF,cAAc,WAAW,YAAY,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,+CAA+C,WAAW,EAAE,CAClJ,CAAC;gBACF,OAAO,CAAC,KAAK,CAAC,CAAC;YACjB,CAAC;QACH,CAAC,CACF,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB;IACpC,IAAI,CAAC;QACH,OAAO,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;IACnD,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,GAAG,GAAG,2BAA2B,CAAC;QACxC,MAAM,SAAS,GAAG,MAAM,aAAa,CAAC,GAAG,CAAC,CAAC;QAC3C,IAAI,SAAS,EAAE,CAAC;YACd,IAAI,CAAC;gBACH,OAAO,MAAM,MAAM,CAAC,GAAG,CAAC,CAAC;YAC3B,CAAC;YAAC,OAAO,OAAO,EAAE,CAAC;gBACjB,MAAM,IAAI,WAAW,CACnB,GAAG,GAAG,mEAAmE,EACzE,EAAE,KAAK,EAAE,OAAO,YAAY,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,EAAE,CAC1D,CAAC;YACJ,CAAC;QACH,CAAC;QACD,MAAM,IAAI,WAAW,CAAC,4BAA4B,GAAG,mCAAmC,GAAG,EAAE,CAAC,CAAC;IACjG,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY;IAChC,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,SAAS,CAAC,OAAO,EAAE,CAAC;QAC1B,SAAS,GAAG,IAAI,CAAC;IACnB,CAAC;IACD,WAAW,GAAG,IAAI,CAAC;AACrB,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,QAAiB;IACxC,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;IAExC,IAAI,SAAS,IAAI,WAAW,KAAK,MAAM,CAAC,IAAI;QAAE,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC;IAE3E,wDAAwD;IACxD,MAAM,YAAY,EAAE,CAAC;IAErB,MAAM,YAAY,GAAG,CAAC,MAAM,gBAAgB,EAAE,CAA0B,CAAC;IACzE,QAAQ,GAAG,YAAY,CAAC,QAAQ,CAAC;IAEjC,IAAI,CAAC,4BAA4B,MAAM,CAAC,IAAI,KAAK,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC;IACpE,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAC7D,IAAI,CAAC;QACH,SAAS;YACP,MAAM,kGAAkG;aACvG,QAAqB,CAAC,oBAAoB,EAAE,MAAM,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;IAC5E,CAAC;IAAC,OAAO,GAAY,EAAE,CAAC;QACtB,MAAM,KAAK,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC;QACrD,MAAM,GAAG,GAAG,KAAK,EAAE,OAAO,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC;QAC1C,IAAI,GAAG,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YACjF,MAAM,IAAI,WAAW,CACnB,UAAU,MAAM,CAAC,IAAI,8BAA8B;gBACjD,mEAAmE;gBACnE,YAAY;gBACZ,qDAAqD;gBACrD,iEAAiE,EACnE,EAAE,KAAK,EAAE,CACV,CAAC;QACJ,CAAC;QACD,MAAM,IAAI,WAAW,CACnB,yBAAyB,MAAM,CAAC,IAAI,MAAM,GAAG,IAAI;YAC/C,uDAAuD,EACzD,EAAE,KAAK,EAAE,CACV,CAAC;IACJ,CAAC;IACD,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC;IAC1B,IAAI,CAAC,eAAe,CAAC,CAAC;IACtB,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC;AAC/B,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CACzB,KAAe,EACf,QAAiB;IAEjB,MAAM,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM,SAAS,CAAC,QAAQ,CAAC,CAAC;IAC7D,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC;IACvB,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,MAAM,SAAS,GAAG,cAAc,CAAC,QAAQ,IAAI,aAAa,CAAC,IAAI,kBAAkB,CAAC;IAElF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QACjD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;QAC5C,MAAM,MAAM,GACV,CAAC,MAAM,sFAAsF;SAC5F,GAAgB,CAAC,KAAK,EAAE,EAAE,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAE/E,CAAC;QAEJ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,KAAK,GAAG,CAAC,GAAG,GAAG,CAAC;YACtB,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC;YAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC7B,GAAG,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;YACvC,CAAC;YACD,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACpB,CAAC;QAED,IAAI,KAAK,CAAC,MAAM,GAAG,SAAS,EAAE,CAAC;YAC7B,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,CAAC;QAChG,CAAC;IACH,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;AACnC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../../../../src/domain/search/search/hybrid.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AAGxD,UAAU,YAAY;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,kBAAkB;IACjC,OAAO,EAAE,YAAY,EAAE,CAAC;CACzB;
|
|
1
|
+
{"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../../../../src/domain/search/search/hybrid.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AAGxD,UAAU,YAAY;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,kBAAkB;IACjC,OAAO,EAAE,YAAY,EAAE,CAAC;CACzB;AAsJD,wBAAsB,gBAAgB,CACpC,KAAK,EAAE,MAAM,EACb,YAAY,EAAE,MAAM,GAAG,SAAS,EAChC,IAAI,GAAE,kBAAuB,GAC5B,OAAO,CAAC,kBAAkB,GAAG,IAAI,CAAC,CAiBpC"}
|
|
@@ -39,47 +39,40 @@ async function collectRankedLists(queries, customDbPath, opts, topK) {
|
|
|
39
39
|
}
|
|
40
40
|
return rankedLists;
|
|
41
41
|
}
|
|
42
|
-
/**
|
|
43
|
-
function
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
entry.
|
|
65
|
-
|
|
66
|
-
if (entry.bm25Rank === null || item.rank < entry.bm25Rank) {
|
|
67
|
-
entry.bm25Score = item.bm25Score ?? null;
|
|
68
|
-
entry.bm25Rank = item.rank;
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
else {
|
|
72
|
-
if (entry.semanticRank === null || item.rank < entry.semanticRank) {
|
|
73
|
-
entry.similarity = item.similarity ?? null;
|
|
74
|
-
entry.semanticRank = item.rank;
|
|
75
|
-
}
|
|
76
|
-
}
|
|
42
|
+
/** Initialise a fusion entry seeded from the first ranked item we see for a key. */
|
|
43
|
+
function createFusionEntry(item) {
|
|
44
|
+
return {
|
|
45
|
+
name: item.name,
|
|
46
|
+
kind: item.kind,
|
|
47
|
+
file: item.file,
|
|
48
|
+
line: item.line,
|
|
49
|
+
endLine: item.endLine ?? null,
|
|
50
|
+
role: item.role ?? null,
|
|
51
|
+
fileHash: item.fileHash ?? null,
|
|
52
|
+
rrfScore: 0,
|
|
53
|
+
bm25Score: null,
|
|
54
|
+
bm25Rank: null,
|
|
55
|
+
similarity: null,
|
|
56
|
+
semanticRank: null,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
/** Merge a single ranked item into its fusion entry: update RRF and best per-source rank. */
|
|
60
|
+
function mergeRankedItem(entry, item, k) {
|
|
61
|
+
entry.rrfScore += 1 / (k + item.rank);
|
|
62
|
+
if (item.source === 'bm25') {
|
|
63
|
+
if (entry.bm25Rank === null || item.rank < entry.bm25Rank) {
|
|
64
|
+
entry.bm25Score = item.bm25Score ?? null;
|
|
65
|
+
entry.bm25Rank = item.rank;
|
|
77
66
|
}
|
|
78
67
|
}
|
|
79
|
-
|
|
80
|
-
.
|
|
81
|
-
.
|
|
82
|
-
|
|
68
|
+
else if (entry.semanticRank === null || item.rank < entry.semanticRank) {
|
|
69
|
+
entry.similarity = item.similarity ?? null;
|
|
70
|
+
entry.semanticRank = item.rank;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
/** Flatten a fusion entry into the public-facing hybrid result shape. */
|
|
74
|
+
function toHybridResult(e) {
|
|
75
|
+
return {
|
|
83
76
|
name: e.name,
|
|
84
77
|
kind: e.kind,
|
|
85
78
|
file: e.file,
|
|
@@ -92,7 +85,23 @@ function fuseResults(rankedLists, k, limit) {
|
|
|
92
85
|
bm25Rank: e.bm25Rank,
|
|
93
86
|
similarity: e.similarity,
|
|
94
87
|
semanticRank: e.semanticRank,
|
|
95
|
-
}
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
/** Reciprocal Rank Fusion: merge ranked lists into a single scored result set. */
|
|
91
|
+
function fuseResults(rankedLists, k, limit) {
|
|
92
|
+
const fusionMap = new Map();
|
|
93
|
+
for (const list of rankedLists) {
|
|
94
|
+
for (const item of list) {
|
|
95
|
+
if (!fusionMap.has(item.key)) {
|
|
96
|
+
fusionMap.set(item.key, createFusionEntry(item));
|
|
97
|
+
}
|
|
98
|
+
mergeRankedItem(fusionMap.get(item.key), item, k);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return [...fusionMap.values()]
|
|
102
|
+
.sort((a, b) => b.rrfScore - a.rrfScore)
|
|
103
|
+
.slice(0, limit)
|
|
104
|
+
.map(toHybridResult);
|
|
96
105
|
}
|
|
97
106
|
export async function hybridSearchData(query, customDbPath, opts = {}) {
|
|
98
107
|
const config = opts.config || loadConfig();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"hybrid.js","sourceRoot":"","sources":["../../../../src/domain/search/search/hybrid.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAC1D,OAAO,EAAE,UAAU,EAAE,MAAM,mCAAmC,CAAC;AAE/D,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAE7C,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAmD3C,wEAAwE;AACxE,SAAS,YAAY,CAAC,KAAa;IACjC,OAAO,KAAK;SACT,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACjC,CAAC;AAED,6DAA6D;AAC7D,KAAK,UAAU,kBAAkB,CAC/B,OAAiB,EACjB,YAAgC,EAChC,IAAwB,EACxB,IAAY;IAEZ,MAAM,WAAW,GAAmB,EAAE,CAAC;IAEvC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,QAAQ,GAAG,aAAa,CAAC,CAAC,EAAE,YAAY,EAAE,EAAE,GAAG,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1E,IAAI,QAAQ,EAAE,OAAO,EAAE,CAAC;YACtB,WAAW,CAAC,IAAI,CACd,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;gBAChC,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,EAAE;gBACpC,IAAI,EAAE,GAAG,GAAG,CAAC;gBACb,MAAM,EAAE,MAAe;gBACvB,GAAG,CAAC;aACL,CAAC,CAAC,CACJ,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,CAAC,EAAE,YAAY,EAAE;YAChD,GAAG,IAAI;YACP,KAAK,EAAE,IAAI;YACX,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,GAAG;SAC/B,CAAC,CAAC;QACH,IAAI,OAAO,EAAE,OAAO,EAAE,CAAC;YACrB,WAAW,CAAC,IAAI,CACd,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;gBAC/B,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,EAAE;gBACpC,IAAI,EAAE,GAAG,GAAG,CAAC;gBACb,MAAM,EAAE,UAAmB;gBAC3B,GAAG,CAAC;aACL,CAAC,CAAC,CACJ,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,
|
|
1
|
+
{"version":3,"file":"hybrid.js","sourceRoot":"","sources":["../../../../src/domain/search/search/hybrid.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAC1D,OAAO,EAAE,UAAU,EAAE,MAAM,mCAAmC,CAAC;AAE/D,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAE7C,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAmD3C,wEAAwE;AACxE,SAAS,YAAY,CAAC,KAAa;IACjC,OAAO,KAAK;SACT,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACjC,CAAC;AAED,6DAA6D;AAC7D,KAAK,UAAU,kBAAkB,CAC/B,OAAiB,EACjB,YAAgC,EAChC,IAAwB,EACxB,IAAY;IAEZ,MAAM,WAAW,GAAmB,EAAE,CAAC;IAEvC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,QAAQ,GAAG,aAAa,CAAC,CAAC,EAAE,YAAY,EAAE,EAAE,GAAG,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1E,IAAI,QAAQ,EAAE,OAAO,EAAE,CAAC;YACtB,WAAW,CAAC,IAAI,CACd,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;gBAChC,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,EAAE;gBACpC,IAAI,EAAE,GAAG,GAAG,CAAC;gBACb,MAAM,EAAE,MAAe;gBACvB,GAAG,CAAC;aACL,CAAC,CAAC,CACJ,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,CAAC,EAAE,YAAY,EAAE;YAChD,GAAG,IAAI;YACP,KAAK,EAAE,IAAI;YACX,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,GAAG;SAC/B,CAAC,CAAC;QACH,IAAI,OAAO,EAAE,OAAO,EAAE,CAAC;YACrB,WAAW,CAAC,IAAI,CACd,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;gBAC/B,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,EAAE;gBACpC,IAAI,EAAE,GAAG,GAAG,CAAC;gBACb,MAAM,EAAE,UAAmB;gBAC3B,GAAG,CAAC;aACL,CAAC,CAAC,CACJ,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,oFAAoF;AACpF,SAAS,iBAAiB,CAAC,IAAgB;IACzC,OAAO;QACL,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,OAAO,EAAG,IAAI,CAAC,OAAyB,IAAI,IAAI;QAChD,IAAI,EAAG,IAAI,CAAC,IAAsB,IAAI,IAAI;QAC1C,QAAQ,EAAG,IAAI,CAAC,QAA0B,IAAI,IAAI;QAClD,QAAQ,EAAE,CAAC;QACX,SAAS,EAAE,IAAI;QACf,QAAQ,EAAE,IAAI;QACd,UAAU,EAAE,IAAI;QAChB,YAAY,EAAE,IAAI;KACnB,CAAC;AACJ,CAAC;AAED,6FAA6F;AAC7F,SAAS,eAAe,CAAC,KAAkB,EAAE,IAAgB,EAAE,CAAS;IACtE,KAAK,CAAC,QAAQ,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACtC,IAAI,IAAI,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,QAAQ,KAAK,IAAI,IAAI,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC;YAC1D,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC;YACzC,KAAK,CAAC,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC;QAC7B,CAAC;IACH,CAAC;SAAM,IAAI,KAAK,CAAC,YAAY,KAAK,IAAI,IAAI,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC,YAAY,EAAE,CAAC;QACzE,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC;QAC3C,KAAK,CAAC,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC;IACjC,CAAC;AACH,CAAC;AAED,yEAAyE;AACzE,SAAS,cAAc,CAAC,CAAc;IACpC,OAAO;QACL,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,GAAG,EAAE,CAAC,CAAC,QAAQ;QACf,SAAS,EAAE,CAAC,CAAC,SAAS;QACtB,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,YAAY,EAAE,CAAC,CAAC,YAAY;KAC7B,CAAC;AACJ,CAAC;AAED,kFAAkF;AAClF,SAAS,WAAW,CAAC,WAA2B,EAAE,CAAS,EAAE,KAAa;IACxE,MAAM,SAAS,GAAG,IAAI,GAAG,EAAuB,CAAC;IACjD,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC/B,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC7B,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;YACnD,CAAC;YACD,eAAe,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC;SAC3B,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC;SACvC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;SACf,GAAG,CAAC,cAAc,CAAC,CAAC;AACzB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,KAAa,EACb,YAAgC,EAChC,OAA2B,EAAE;IAE7B,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,UAAU,EAAE,CAAC;IAC3C,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,IAAK,EAAgC,CAAC;IACrE,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,SAAS,CAAC,IAAI,IAAI,EAAE,CAAC;IACjD,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,IAAI,SAAS,CAAC,IAAI,IAAI,EAAE,CAAC;IAC5C,MAAM,IAAI,GAAG,CAAC,IAAI,CAAC,KAAK,IAAI,SAAS,CAAC,IAAI,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC;IAEtD,MAAM,OAAO,GAAG,kBAAkB,CAAC,YAAY,CAA0B,CAAC;IAC1E,MAAM,YAAY,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;IAC1C,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,IAAI,CAAC,YAAY;QAAE,OAAO,IAAI,CAAC;IAE/B,MAAM,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;IACpC,MAAM,WAAW,GAAG,MAAM,kBAAkB,CAAC,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;IAChF,MAAM,OAAO,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC;IAEnD,OAAO,EAAE,OAAO,EAAE,CAAC;AACrB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"semantic.d.ts","sourceRoot":"","sources":["../../../../src/domain/search/search/semantic.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAyB,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAMhF,MAAM,WAAW,kBAAkB;IACjC,MAAM,CAAC,EAAE,eAAe,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAChC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,UAAU,cAAc;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,cAAc,EAAE,CAAC;CAC3B;
|
|
1
|
+
{"version":3,"file":"semantic.d.ts","sourceRoot":"","sources":["../../../../src/domain/search/search/semantic.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAyB,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAMhF,MAAM,WAAW,kBAAkB;IACjC,MAAM,CAAC,EAAE,eAAe,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAChC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,UAAU,cAAc;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,cAAc,EAAE,CAAC;CAC3B;AAqBD,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EACb,YAAY,EAAE,MAAM,GAAG,SAAS,EAChC,IAAI,GAAE,kBAAuB,GAC5B,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAmClC;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,KAAK,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,WAAW,EAAE,KAAK,CAAC;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,UAAU,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;QACxE,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;KACxB,CAAC,CAAC;CACJ;AA8ED,wBAAsB,eAAe,CACnC,OAAO,EAAE,MAAM,EAAE,EACjB,YAAY,EAAE,MAAM,GAAG,SAAS,EAChC,IAAI,GAAE,kBAAuB,GAC5B,OAAO,CAAC,iBAAiB,GAAG,IAAI,CAAC,CAyCnC"}
|
|
@@ -4,6 +4,19 @@ import { normalizeSymbol } from '../../queries.js';
|
|
|
4
4
|
import { embed } from '../models.js';
|
|
5
5
|
import { cosineSim } from '../stores/sqlite-blob.js';
|
|
6
6
|
import { prepareSearch } from './prepare.js';
|
|
7
|
+
/** Reconstitute a stored embedding row's vector blob into a Float32Array. */
|
|
8
|
+
function rowVector(row) {
|
|
9
|
+
return new Float32Array(new Uint8Array(row.vector).buffer);
|
|
10
|
+
}
|
|
11
|
+
/** Warn when stored embeddings and the query model use different dimensions. */
|
|
12
|
+
function checkDimensionMismatch(storedDim, dim) {
|
|
13
|
+
if (storedDim && dim !== storedDim) {
|
|
14
|
+
console.log(`Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`);
|
|
15
|
+
console.log(` Re-run \`codegraph embed\` with the same model, or use --model to match.`);
|
|
16
|
+
return true;
|
|
17
|
+
}
|
|
18
|
+
return false;
|
|
19
|
+
}
|
|
7
20
|
export async function searchData(query, customDbPath, opts = {}) {
|
|
8
21
|
const config = opts.config || loadConfig();
|
|
9
22
|
const searchCfg = config.search || {};
|
|
@@ -15,16 +28,12 @@ export async function searchData(query, customDbPath, opts = {}) {
|
|
|
15
28
|
const { db, rows, modelKey, storedDim } = prepared;
|
|
16
29
|
try {
|
|
17
30
|
const { vectors: [queryVec], dim, } = await embed([query], modelKey ?? undefined);
|
|
18
|
-
if (storedDim
|
|
19
|
-
console.log(`Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`);
|
|
20
|
-
console.log(` Re-run \`codegraph embed\` with the same model, or use --model to match.`);
|
|
31
|
+
if (checkDimensionMismatch(storedDim, dim))
|
|
21
32
|
return null;
|
|
22
|
-
}
|
|
23
33
|
const hc = new Map();
|
|
24
34
|
const results = [];
|
|
25
35
|
for (const row of rows) {
|
|
26
|
-
const
|
|
27
|
-
const sim = cosineSim(queryVec, vec);
|
|
36
|
+
const sim = cosineSim(queryVec, rowVector(row));
|
|
28
37
|
if (sim >= minScore) {
|
|
29
38
|
results.push({
|
|
30
39
|
...normalizeSymbol(row, db, hc),
|
|
@@ -39,62 +48,73 @@ export async function searchData(query, customDbPath, opts = {}) {
|
|
|
39
48
|
db.close();
|
|
40
49
|
}
|
|
41
50
|
}
|
|
51
|
+
/**
|
|
52
|
+
* Emit a warning for any query pair whose embeddings are nearly identical,
|
|
53
|
+
* since RRF would over-weight matches shared between them.
|
|
54
|
+
*/
|
|
55
|
+
function warnOnSimilarQueries(queries, queryVecs, threshold) {
|
|
56
|
+
for (let i = 0; i < queryVecs.length; i++) {
|
|
57
|
+
for (let j = i + 1; j < queryVecs.length; j++) {
|
|
58
|
+
const sim = cosineSim(queryVecs[i], queryVecs[j]);
|
|
59
|
+
if (sim >= threshold) {
|
|
60
|
+
warn(`Queries "${queries[i]}" and "${queries[j]}" are very similar ` +
|
|
61
|
+
`(${(sim * 100).toFixed(0)}% cosine similarity). ` +
|
|
62
|
+
`This may bias RRF results toward their shared matches. ` +
|
|
63
|
+
`Consider using more distinct queries.`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
/** Rank stored rows for a single query, keeping only those above minScore. */
|
|
69
|
+
function rankRowsForQuery(queryVec, rowVecs, minScore) {
|
|
70
|
+
const scored = [];
|
|
71
|
+
for (let ri = 0; ri < rowVecs.length; ri++) {
|
|
72
|
+
const sim = cosineSim(queryVec, rowVecs[ri]);
|
|
73
|
+
if (sim >= minScore) {
|
|
74
|
+
scored.push({ rowIndex: ri, similarity: sim });
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
scored.sort((a, b) => b.similarity - a.similarity);
|
|
78
|
+
return scored.map((item, rank) => ({ ...item, rank: rank + 1 }));
|
|
79
|
+
}
|
|
80
|
+
/** Reciprocal Rank Fusion across each query's ranked hits. */
|
|
81
|
+
function fuseRankedHits(queries, perQueryRanked, k) {
|
|
82
|
+
const fusionMap = new Map();
|
|
83
|
+
for (let qi = 0; qi < queries.length; qi++) {
|
|
84
|
+
for (const item of perQueryRanked[qi]) {
|
|
85
|
+
if (!fusionMap.has(item.rowIndex)) {
|
|
86
|
+
fusionMap.set(item.rowIndex, { rrfScore: 0, queryScores: [] });
|
|
87
|
+
}
|
|
88
|
+
const entry = fusionMap.get(item.rowIndex);
|
|
89
|
+
entry.rrfScore += 1 / (k + item.rank);
|
|
90
|
+
entry.queryScores.push({
|
|
91
|
+
query: queries[qi],
|
|
92
|
+
similarity: item.similarity,
|
|
93
|
+
rank: item.rank,
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return fusionMap;
|
|
98
|
+
}
|
|
42
99
|
export async function multiSearchData(queries, customDbPath, opts = {}) {
|
|
43
100
|
const config = opts.config || loadConfig();
|
|
44
101
|
const searchCfg = config.search || {};
|
|
45
102
|
const limit = opts.limit ?? searchCfg.topK ?? 15;
|
|
46
103
|
const minScore = opts.minScore ?? searchCfg.defaultMinScore ?? 0.2;
|
|
47
104
|
const k = opts.rrfK ?? searchCfg.rrfK ?? 60;
|
|
105
|
+
const similarityWarnThreshold = searchCfg.similarityWarnThreshold ?? 0.85;
|
|
48
106
|
const prepared = prepareSearch(customDbPath, opts);
|
|
49
107
|
if (!prepared)
|
|
50
108
|
return null;
|
|
51
109
|
const { db, rows, modelKey, storedDim } = prepared;
|
|
52
110
|
try {
|
|
53
111
|
const { vectors: queryVecs, dim } = await embed(queries, modelKey ?? undefined);
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
for (let j = i + 1; j < queryVecs.length; j++) {
|
|
57
|
-
const sim = cosineSim(queryVecs[i], queryVecs[j]);
|
|
58
|
-
if (sim >= SIMILARITY_WARN_THRESHOLD) {
|
|
59
|
-
warn(`Queries "${queries[i]}" and "${queries[j]}" are very similar ` +
|
|
60
|
-
`(${(sim * 100).toFixed(0)}% cosine similarity). ` +
|
|
61
|
-
`This may bias RRF results toward their shared matches. ` +
|
|
62
|
-
`Consider using more distinct queries.`);
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
if (storedDim && dim !== storedDim) {
|
|
67
|
-
console.log(`Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`);
|
|
68
|
-
console.log(` Re-run \`codegraph embed\` with the same model, or use --model to match.`);
|
|
112
|
+
warnOnSimilarQueries(queries, queryVecs, similarityWarnThreshold);
|
|
113
|
+
if (checkDimensionMismatch(storedDim, dim))
|
|
69
114
|
return null;
|
|
70
|
-
|
|
71
|
-
const
|
|
72
|
-
const
|
|
73
|
-
const scored = [];
|
|
74
|
-
for (let ri = 0; ri < rows.length; ri++) {
|
|
75
|
-
const sim = cosineSim(queryVecs[qi], rowVecs[ri]);
|
|
76
|
-
if (sim >= minScore) {
|
|
77
|
-
scored.push({ rowIndex: ri, similarity: sim });
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
scored.sort((a, b) => b.similarity - a.similarity);
|
|
81
|
-
return scored.map((item, rank) => ({ ...item, rank: rank + 1 }));
|
|
82
|
-
});
|
|
83
|
-
const fusionMap = new Map();
|
|
84
|
-
for (let qi = 0; qi < queries.length; qi++) {
|
|
85
|
-
for (const item of perQueryRanked[qi]) {
|
|
86
|
-
if (!fusionMap.has(item.rowIndex)) {
|
|
87
|
-
fusionMap.set(item.rowIndex, { rrfScore: 0, queryScores: [] });
|
|
88
|
-
}
|
|
89
|
-
const entry = fusionMap.get(item.rowIndex);
|
|
90
|
-
entry.rrfScore += 1 / (k + item.rank);
|
|
91
|
-
entry.queryScores.push({
|
|
92
|
-
query: queries[qi],
|
|
93
|
-
similarity: item.similarity,
|
|
94
|
-
rank: item.rank,
|
|
95
|
-
});
|
|
96
|
-
}
|
|
97
|
-
}
|
|
115
|
+
const rowVecs = rows.map(rowVector);
|
|
116
|
+
const perQueryRanked = queries.map((_q, qi) => rankRowsForQuery(queryVecs[qi], rowVecs, minScore));
|
|
117
|
+
const fusionMap = fuseRankedHits(queries, perQueryRanked, k);
|
|
98
118
|
const hc = new Map();
|
|
99
119
|
const results = [];
|
|
100
120
|
for (const [rowIndex, entry] of fusionMap) {
|