@optave/codegraph 2.5.1 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +216 -89
- package/package.json +8 -7
- package/src/ast.js +392 -0
- package/src/audit.js +423 -0
- package/src/batch.js +180 -0
- package/src/boundaries.js +346 -0
- package/src/builder.js +375 -92
- package/src/cfg.js +1451 -0
- package/src/change-journal.js +130 -0
- package/src/check.js +432 -0
- package/src/cli.js +734 -107
- package/src/cochange.js +5 -2
- package/src/communities.js +7 -1
- package/src/complexity.js +124 -17
- package/src/config.js +10 -0
- package/src/dataflow.js +1187 -0
- package/src/db.js +96 -0
- package/src/embedder.js +359 -47
- package/src/export.js +305 -0
- package/src/extractors/csharp.js +64 -1
- package/src/extractors/go.js +66 -1
- package/src/extractors/hcl.js +22 -0
- package/src/extractors/java.js +61 -1
- package/src/extractors/javascript.js +142 -0
- package/src/extractors/php.js +79 -0
- package/src/extractors/python.js +134 -0
- package/src/extractors/ruby.js +89 -0
- package/src/extractors/rust.js +71 -1
- package/src/flow.js +4 -4
- package/src/index.js +78 -3
- package/src/manifesto.js +69 -1
- package/src/mcp.js +702 -193
- package/src/owners.js +359 -0
- package/src/paginate.js +37 -2
- package/src/parser.js +8 -0
- package/src/queries.js +590 -50
- package/src/snapshot.js +149 -0
- package/src/structure.js +9 -3
- package/src/triage.js +273 -0
- package/src/viewer.js +948 -0
- package/src/watcher.js +36 -1
package/src/db.js
CHANGED
|
@@ -144,6 +144,87 @@ export const MIGRATIONS = [
|
|
|
144
144
|
CREATE INDEX IF NOT EXISTS idx_fc_mi ON function_complexity(maintainability_index ASC);
|
|
145
145
|
`,
|
|
146
146
|
},
|
|
147
|
+
{
|
|
148
|
+
version: 10,
|
|
149
|
+
up: `
|
|
150
|
+
CREATE TABLE IF NOT EXISTS dataflow (
|
|
151
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
152
|
+
source_id INTEGER NOT NULL,
|
|
153
|
+
target_id INTEGER NOT NULL,
|
|
154
|
+
kind TEXT NOT NULL,
|
|
155
|
+
param_index INTEGER,
|
|
156
|
+
expression TEXT,
|
|
157
|
+
line INTEGER,
|
|
158
|
+
confidence REAL DEFAULT 1.0,
|
|
159
|
+
FOREIGN KEY(source_id) REFERENCES nodes(id),
|
|
160
|
+
FOREIGN KEY(target_id) REFERENCES nodes(id)
|
|
161
|
+
);
|
|
162
|
+
CREATE INDEX IF NOT EXISTS idx_dataflow_source ON dataflow(source_id);
|
|
163
|
+
CREATE INDEX IF NOT EXISTS idx_dataflow_target ON dataflow(target_id);
|
|
164
|
+
CREATE INDEX IF NOT EXISTS idx_dataflow_kind ON dataflow(kind);
|
|
165
|
+
CREATE INDEX IF NOT EXISTS idx_dataflow_source_kind ON dataflow(source_id, kind);
|
|
166
|
+
`,
|
|
167
|
+
},
|
|
168
|
+
{
|
|
169
|
+
version: 11,
|
|
170
|
+
up: `
|
|
171
|
+
ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id);
|
|
172
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id);
|
|
173
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id);
|
|
174
|
+
`,
|
|
175
|
+
},
|
|
176
|
+
{
|
|
177
|
+
version: 12,
|
|
178
|
+
up: `
|
|
179
|
+
CREATE TABLE IF NOT EXISTS cfg_blocks (
|
|
180
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
181
|
+
function_node_id INTEGER NOT NULL,
|
|
182
|
+
block_index INTEGER NOT NULL,
|
|
183
|
+
block_type TEXT NOT NULL,
|
|
184
|
+
start_line INTEGER,
|
|
185
|
+
end_line INTEGER,
|
|
186
|
+
label TEXT,
|
|
187
|
+
FOREIGN KEY(function_node_id) REFERENCES nodes(id),
|
|
188
|
+
UNIQUE(function_node_id, block_index)
|
|
189
|
+
);
|
|
190
|
+
CREATE INDEX IF NOT EXISTS idx_cfg_blocks_fn ON cfg_blocks(function_node_id);
|
|
191
|
+
|
|
192
|
+
CREATE TABLE IF NOT EXISTS cfg_edges (
|
|
193
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
194
|
+
function_node_id INTEGER NOT NULL,
|
|
195
|
+
source_block_id INTEGER NOT NULL,
|
|
196
|
+
target_block_id INTEGER NOT NULL,
|
|
197
|
+
kind TEXT NOT NULL,
|
|
198
|
+
FOREIGN KEY(function_node_id) REFERENCES nodes(id),
|
|
199
|
+
FOREIGN KEY(source_block_id) REFERENCES cfg_blocks(id),
|
|
200
|
+
FOREIGN KEY(target_block_id) REFERENCES cfg_blocks(id)
|
|
201
|
+
);
|
|
202
|
+
CREATE INDEX IF NOT EXISTS idx_cfg_edges_fn ON cfg_edges(function_node_id);
|
|
203
|
+
CREATE INDEX IF NOT EXISTS idx_cfg_edges_src ON cfg_edges(source_block_id);
|
|
204
|
+
CREATE INDEX IF NOT EXISTS idx_cfg_edges_tgt ON cfg_edges(target_block_id);
|
|
205
|
+
`,
|
|
206
|
+
},
|
|
207
|
+
{
|
|
208
|
+
version: 13,
|
|
209
|
+
up: `
|
|
210
|
+
CREATE TABLE IF NOT EXISTS ast_nodes (
|
|
211
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
212
|
+
file TEXT NOT NULL,
|
|
213
|
+
line INTEGER NOT NULL,
|
|
214
|
+
kind TEXT NOT NULL,
|
|
215
|
+
name TEXT NOT NULL,
|
|
216
|
+
text TEXT,
|
|
217
|
+
receiver TEXT,
|
|
218
|
+
parent_node_id INTEGER,
|
|
219
|
+
FOREIGN KEY(parent_node_id) REFERENCES nodes(id)
|
|
220
|
+
);
|
|
221
|
+
CREATE INDEX IF NOT EXISTS idx_ast_kind ON ast_nodes(kind);
|
|
222
|
+
CREATE INDEX IF NOT EXISTS idx_ast_name ON ast_nodes(name);
|
|
223
|
+
CREATE INDEX IF NOT EXISTS idx_ast_file ON ast_nodes(file);
|
|
224
|
+
CREATE INDEX IF NOT EXISTS idx_ast_parent ON ast_nodes(parent_node_id);
|
|
225
|
+
CREATE INDEX IF NOT EXISTS idx_ast_kind_name ON ast_nodes(kind, name);
|
|
226
|
+
`,
|
|
227
|
+
},
|
|
147
228
|
];
|
|
148
229
|
|
|
149
230
|
export function getBuildMeta(db, key) {
|
|
@@ -265,6 +346,21 @@ export function initSchema(db) {
|
|
|
265
346
|
} catch {
|
|
266
347
|
/* already exists */
|
|
267
348
|
}
|
|
349
|
+
try {
|
|
350
|
+
db.exec('ALTER TABLE nodes ADD COLUMN parent_id INTEGER REFERENCES nodes(id)');
|
|
351
|
+
} catch {
|
|
352
|
+
/* already exists */
|
|
353
|
+
}
|
|
354
|
+
try {
|
|
355
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_parent ON nodes(parent_id)');
|
|
356
|
+
} catch {
|
|
357
|
+
/* already exists */
|
|
358
|
+
}
|
|
359
|
+
try {
|
|
360
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_kind_parent ON nodes(kind, parent_id)');
|
|
361
|
+
} catch {
|
|
362
|
+
/* already exists */
|
|
363
|
+
}
|
|
268
364
|
}
|
|
269
365
|
|
|
270
366
|
export function findDbPath(customPath) {
|
package/src/embedder.js
CHANGED
|
@@ -4,6 +4,7 @@ import path from 'node:path';
|
|
|
4
4
|
import { createInterface } from 'node:readline';
|
|
5
5
|
import { closeDb, findDbPath, openDb, openReadonlyOrFail } from './db.js';
|
|
6
6
|
import { info, warn } from './logger.js';
|
|
7
|
+
import { normalizeSymbol } from './queries.js';
|
|
7
8
|
|
|
8
9
|
/**
|
|
9
10
|
* Split an identifier into readable words.
|
|
@@ -384,6 +385,22 @@ function initEmbeddingsSchema(db) {
|
|
|
384
385
|
value TEXT
|
|
385
386
|
);
|
|
386
387
|
`);
|
|
388
|
+
|
|
389
|
+
// Add full_text column (idempotent — ignore if already exists)
|
|
390
|
+
try {
|
|
391
|
+
db.exec('ALTER TABLE embeddings ADD COLUMN full_text TEXT');
|
|
392
|
+
} catch {
|
|
393
|
+
/* column already exists */
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// FTS5 virtual table for BM25 keyword search
|
|
397
|
+
db.exec(`
|
|
398
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS fts_index USING fts5(
|
|
399
|
+
name,
|
|
400
|
+
content,
|
|
401
|
+
tokenize='unicode61'
|
|
402
|
+
);
|
|
403
|
+
`);
|
|
387
404
|
}
|
|
388
405
|
|
|
389
406
|
/**
|
|
@@ -411,6 +428,7 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options =
|
|
|
411
428
|
|
|
412
429
|
db.exec('DELETE FROM embeddings');
|
|
413
430
|
db.exec('DELETE FROM embedding_meta');
|
|
431
|
+
db.exec('DELETE FROM fts_index');
|
|
414
432
|
|
|
415
433
|
const nodes = db
|
|
416
434
|
.prepare(
|
|
@@ -445,6 +463,7 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options =
|
|
|
445
463
|
|
|
446
464
|
const texts = [];
|
|
447
465
|
const nodeIds = [];
|
|
466
|
+
const nodeNames = [];
|
|
448
467
|
const previews = [];
|
|
449
468
|
const config = getModelConfig(modelKey);
|
|
450
469
|
const contextWindow = config.contextWindow;
|
|
@@ -476,6 +495,7 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options =
|
|
|
476
495
|
|
|
477
496
|
texts.push(text);
|
|
478
497
|
nodeIds.push(node.id);
|
|
498
|
+
nodeNames.push(node.name);
|
|
479
499
|
previews.push(`${node.name} (${node.kind}) -- ${file}:${node.line}`);
|
|
480
500
|
}
|
|
481
501
|
}
|
|
@@ -490,16 +510,19 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options =
|
|
|
490
510
|
const { vectors, dim } = await embed(texts, modelKey);
|
|
491
511
|
|
|
492
512
|
const insert = db.prepare(
|
|
493
|
-
'INSERT OR REPLACE INTO embeddings (node_id, vector, text_preview) VALUES (?, ?, ?)',
|
|
513
|
+
'INSERT OR REPLACE INTO embeddings (node_id, vector, text_preview, full_text) VALUES (?, ?, ?, ?)',
|
|
494
514
|
);
|
|
515
|
+
const insertFts = db.prepare('INSERT INTO fts_index(rowid, name, content) VALUES (?, ?, ?)');
|
|
495
516
|
const insertMeta = db.prepare('INSERT OR REPLACE INTO embedding_meta (key, value) VALUES (?, ?)');
|
|
496
517
|
const insertAll = db.transaction(() => {
|
|
497
518
|
for (let i = 0; i < vectors.length; i++) {
|
|
498
|
-
insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i]);
|
|
519
|
+
insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i], texts[i]);
|
|
520
|
+
insertFts.run(nodeIds[i], nodeNames[i], texts[i]);
|
|
499
521
|
}
|
|
500
522
|
insertMeta.run('model', config.name);
|
|
501
523
|
insertMeta.run('dim', String(dim));
|
|
502
524
|
insertMeta.run('count', String(vectors.length));
|
|
525
|
+
insertMeta.run('fts_count', String(vectors.length));
|
|
503
526
|
insertMeta.run('strategy', strategy);
|
|
504
527
|
insertMeta.run('built_at', new Date().toISOString());
|
|
505
528
|
if (overflowCount > 0) {
|
|
@@ -560,7 +583,7 @@ function _prepareSearch(customDbPath, opts = {}) {
|
|
|
560
583
|
const noTests = opts.noTests || false;
|
|
561
584
|
const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./;
|
|
562
585
|
let sql = `
|
|
563
|
-
SELECT e.node_id, e.vector, e.text_preview, n.name, n.kind, n.file, n.line
|
|
586
|
+
SELECT e.node_id, e.vector, e.text_preview, n.name, n.kind, n.file, n.line, n.end_line, n.role
|
|
564
587
|
FROM embeddings e
|
|
565
588
|
JOIN nodes n ON e.node_id = n.id
|
|
566
589
|
`;
|
|
@@ -616,6 +639,7 @@ export async function searchData(query, customDbPath, opts = {}) {
|
|
|
616
639
|
return null;
|
|
617
640
|
}
|
|
618
641
|
|
|
642
|
+
const hc = new Map();
|
|
619
643
|
const results = [];
|
|
620
644
|
for (const row of rows) {
|
|
621
645
|
const vec = new Float32Array(new Uint8Array(row.vector).buffer);
|
|
@@ -623,10 +647,7 @@ export async function searchData(query, customDbPath, opts = {}) {
|
|
|
623
647
|
|
|
624
648
|
if (sim >= minScore) {
|
|
625
649
|
results.push({
|
|
626
|
-
|
|
627
|
-
kind: row.kind,
|
|
628
|
-
file: row.file,
|
|
629
|
-
line: row.line,
|
|
650
|
+
...normalizeSymbol(row, db, hc),
|
|
630
651
|
similarity: sim,
|
|
631
652
|
});
|
|
632
653
|
}
|
|
@@ -712,14 +733,12 @@ export async function multiSearchData(queries, customDbPath, opts = {}) {
|
|
|
712
733
|
}
|
|
713
734
|
|
|
714
735
|
// Build results sorted by RRF score
|
|
736
|
+
const hc = new Map();
|
|
715
737
|
const results = [];
|
|
716
738
|
for (const [rowIndex, entry] of fusionMap) {
|
|
717
739
|
const row = rows[rowIndex];
|
|
718
740
|
results.push({
|
|
719
|
-
|
|
720
|
-
kind: row.kind,
|
|
721
|
-
file: row.file,
|
|
722
|
-
line: row.line,
|
|
741
|
+
...normalizeSymbol(row, db, hc),
|
|
723
742
|
rrf: entry.rrfScore,
|
|
724
743
|
queryScores: entry.queryScores,
|
|
725
744
|
});
|
|
@@ -731,71 +750,364 @@ export async function multiSearchData(queries, customDbPath, opts = {}) {
|
|
|
731
750
|
}
|
|
732
751
|
|
|
733
752
|
/**
|
|
734
|
-
*
|
|
753
|
+
* Sanitize a user query for FTS5 MATCH syntax.
|
|
754
|
+
* Wraps each token as an implicit OR and escapes special FTS5 characters.
|
|
755
|
+
*/
|
|
756
|
+
function sanitizeFtsQuery(query) {
|
|
757
|
+
// Remove FTS5 special chars that could cause syntax errors
|
|
758
|
+
const cleaned = query.replace(/[*"():^{}~<>]/g, ' ').trim();
|
|
759
|
+
if (!cleaned) return null;
|
|
760
|
+
// Split into tokens, wrap with OR for multi-token queries
|
|
761
|
+
const tokens = cleaned.split(/\s+/).filter((t) => t.length > 0);
|
|
762
|
+
if (tokens.length === 0) return null;
|
|
763
|
+
if (tokens.length === 1) return `"${tokens[0]}"`;
|
|
764
|
+
return tokens.map((t) => `"${t}"`).join(' OR ');
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
/**
|
|
768
|
+
* Check if the FTS5 index exists in the database.
|
|
769
|
+
* Returns true if fts_index table exists and has rows, false otherwise.
|
|
770
|
+
*/
|
|
771
|
+
function hasFtsIndex(db) {
|
|
772
|
+
try {
|
|
773
|
+
const row = db.prepare('SELECT COUNT(*) as c FROM fts_index').get();
|
|
774
|
+
return row.c > 0;
|
|
775
|
+
} catch {
|
|
776
|
+
return false;
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
/**
|
|
781
|
+
* BM25 keyword search via FTS5.
|
|
782
|
+
* Returns { results: [{ name, kind, file, line, bm25Score }] } or null if no FTS5 index.
|
|
783
|
+
*/
|
|
784
|
+
export function ftsSearchData(query, customDbPath, opts = {}) {
|
|
785
|
+
const limit = opts.limit || 15;
|
|
786
|
+
const noTests = opts.noTests || false;
|
|
787
|
+
const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./;
|
|
788
|
+
|
|
789
|
+
const db = openReadonlyOrFail(customDbPath);
|
|
790
|
+
|
|
791
|
+
if (!hasFtsIndex(db)) {
|
|
792
|
+
db.close();
|
|
793
|
+
return null;
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
const ftsQuery = sanitizeFtsQuery(query);
|
|
797
|
+
if (!ftsQuery) {
|
|
798
|
+
db.close();
|
|
799
|
+
return { results: [] };
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
let sql = `
|
|
803
|
+
SELECT f.rowid AS node_id, rank AS bm25_score,
|
|
804
|
+
n.name, n.kind, n.file, n.line, n.end_line, n.role
|
|
805
|
+
FROM fts_index f
|
|
806
|
+
JOIN nodes n ON f.rowid = n.id
|
|
807
|
+
WHERE fts_index MATCH ?
|
|
808
|
+
`;
|
|
809
|
+
const params = [ftsQuery];
|
|
810
|
+
|
|
811
|
+
if (opts.kind) {
|
|
812
|
+
sql += ' AND n.kind = ?';
|
|
813
|
+
params.push(opts.kind);
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
const isGlob = opts.filePattern && /[*?[\]]/.test(opts.filePattern);
|
|
817
|
+
if (opts.filePattern && !isGlob) {
|
|
818
|
+
sql += ' AND n.file LIKE ?';
|
|
819
|
+
params.push(`%${opts.filePattern}%`);
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
sql += ' ORDER BY rank LIMIT ?';
|
|
823
|
+
params.push(limit * 5); // fetch generous set for post-filtering
|
|
824
|
+
|
|
825
|
+
let rows;
|
|
826
|
+
try {
|
|
827
|
+
rows = db.prepare(sql).all(...params);
|
|
828
|
+
} catch {
|
|
829
|
+
// Invalid FTS5 query syntax — return empty
|
|
830
|
+
db.close();
|
|
831
|
+
return { results: [] };
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
if (isGlob) {
|
|
835
|
+
rows = rows.filter((row) => globMatch(row.file, opts.filePattern));
|
|
836
|
+
}
|
|
837
|
+
if (noTests) {
|
|
838
|
+
rows = rows.filter((row) => !TEST_PATTERN.test(row.file));
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
const hc = new Map();
|
|
842
|
+
const results = rows.slice(0, limit).map((row) => ({
|
|
843
|
+
...normalizeSymbol(row, db, hc),
|
|
844
|
+
bm25Score: -row.bm25_score, // FTS5 rank is negative; negate for display
|
|
845
|
+
}));
|
|
846
|
+
|
|
847
|
+
db.close();
|
|
848
|
+
return { results };
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
/**
|
|
852
|
+
* Hybrid BM25 + semantic search with RRF fusion.
|
|
853
|
+
* Returns { results: [{ name, kind, file, line, rrf, bm25Score, bm25Rank, similarity, semanticRank }] }
|
|
854
|
+
* or null if no FTS5 index (caller should fall back to semantic-only).
|
|
855
|
+
*/
|
|
856
|
+
export async function hybridSearchData(query, customDbPath, opts = {}) {
|
|
857
|
+
const limit = opts.limit || 15;
|
|
858
|
+
const k = opts.rrfK || 60;
|
|
859
|
+
const topK = (opts.limit || 15) * 5;
|
|
860
|
+
|
|
861
|
+
// Split semicolons for multi-query support
|
|
862
|
+
const queries =
|
|
863
|
+
typeof query === 'string'
|
|
864
|
+
? query
|
|
865
|
+
.split(';')
|
|
866
|
+
.map((q) => q.trim())
|
|
867
|
+
.filter((q) => q.length > 0)
|
|
868
|
+
: [query];
|
|
869
|
+
|
|
870
|
+
// Check FTS5 availability first (sync, cheap)
|
|
871
|
+
const checkDb = openReadonlyOrFail(customDbPath);
|
|
872
|
+
const ftsAvailable = hasFtsIndex(checkDb);
|
|
873
|
+
checkDb.close();
|
|
874
|
+
if (!ftsAvailable) return null;
|
|
875
|
+
|
|
876
|
+
// Collect ranked lists: for each query, one BM25 list + one semantic list
|
|
877
|
+
const rankedLists = [];
|
|
878
|
+
|
|
879
|
+
for (const q of queries) {
|
|
880
|
+
// BM25 ranked list (sync)
|
|
881
|
+
const bm25Data = ftsSearchData(q, customDbPath, { ...opts, limit: topK });
|
|
882
|
+
if (bm25Data?.results) {
|
|
883
|
+
rankedLists.push(
|
|
884
|
+
bm25Data.results.map((r, idx) => ({
|
|
885
|
+
key: `${r.name}:${r.file}:${r.line}`,
|
|
886
|
+
rank: idx + 1,
|
|
887
|
+
source: 'bm25',
|
|
888
|
+
...r,
|
|
889
|
+
})),
|
|
890
|
+
);
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
// Semantic ranked list (async)
|
|
894
|
+
const semData = await searchData(q, customDbPath, {
|
|
895
|
+
...opts,
|
|
896
|
+
limit: topK,
|
|
897
|
+
minScore: opts.minScore || 0.2,
|
|
898
|
+
});
|
|
899
|
+
if (semData?.results) {
|
|
900
|
+
rankedLists.push(
|
|
901
|
+
semData.results.map((r, idx) => ({
|
|
902
|
+
key: `${r.name}:${r.file}:${r.line}`,
|
|
903
|
+
rank: idx + 1,
|
|
904
|
+
source: 'semantic',
|
|
905
|
+
...r,
|
|
906
|
+
})),
|
|
907
|
+
);
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
// RRF fusion across all ranked lists
|
|
912
|
+
const fusionMap = new Map();
|
|
913
|
+
for (const list of rankedLists) {
|
|
914
|
+
for (const item of list) {
|
|
915
|
+
if (!fusionMap.has(item.key)) {
|
|
916
|
+
fusionMap.set(item.key, {
|
|
917
|
+
name: item.name,
|
|
918
|
+
kind: item.kind,
|
|
919
|
+
file: item.file,
|
|
920
|
+
line: item.line,
|
|
921
|
+
endLine: item.endLine ?? null,
|
|
922
|
+
role: item.role ?? null,
|
|
923
|
+
fileHash: item.fileHash ?? null,
|
|
924
|
+
rrfScore: 0,
|
|
925
|
+
bm25Score: null,
|
|
926
|
+
bm25Rank: null,
|
|
927
|
+
similarity: null,
|
|
928
|
+
semanticRank: null,
|
|
929
|
+
});
|
|
930
|
+
}
|
|
931
|
+
const entry = fusionMap.get(item.key);
|
|
932
|
+
entry.rrfScore += 1 / (k + item.rank);
|
|
933
|
+
if (item.source === 'bm25') {
|
|
934
|
+
if (entry.bm25Rank === null || item.rank < entry.bm25Rank) {
|
|
935
|
+
entry.bm25Score = item.bm25Score;
|
|
936
|
+
entry.bm25Rank = item.rank;
|
|
937
|
+
}
|
|
938
|
+
} else {
|
|
939
|
+
if (entry.semanticRank === null || item.rank < entry.semanticRank) {
|
|
940
|
+
entry.similarity = item.similarity;
|
|
941
|
+
entry.semanticRank = item.rank;
|
|
942
|
+
}
|
|
943
|
+
}
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
const results = [...fusionMap.values()]
|
|
948
|
+
.sort((a, b) => b.rrfScore - a.rrfScore)
|
|
949
|
+
.slice(0, limit)
|
|
950
|
+
.map((e) => ({
|
|
951
|
+
name: e.name,
|
|
952
|
+
kind: e.kind,
|
|
953
|
+
file: e.file,
|
|
954
|
+
line: e.line,
|
|
955
|
+
endLine: e.endLine,
|
|
956
|
+
role: e.role,
|
|
957
|
+
fileHash: e.fileHash,
|
|
958
|
+
rrf: e.rrfScore,
|
|
959
|
+
bm25Score: e.bm25Score,
|
|
960
|
+
bm25Rank: e.bm25Rank,
|
|
961
|
+
similarity: e.similarity,
|
|
962
|
+
semanticRank: e.semanticRank,
|
|
963
|
+
}));
|
|
964
|
+
|
|
965
|
+
return { results };
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
/**
|
|
969
|
+
* Search with mode support — CLI wrapper with multi-query detection.
|
|
970
|
+
* Modes: 'hybrid' (default), 'semantic', 'keyword'
|
|
735
971
|
*/
|
|
736
972
|
export async function search(query, customDbPath, opts = {}) {
|
|
973
|
+
const mode = opts.mode || 'hybrid';
|
|
974
|
+
|
|
737
975
|
// Split by semicolons, trim, filter empties
|
|
738
976
|
const queries = query
|
|
739
977
|
.split(';')
|
|
740
978
|
.map((q) => q.trim())
|
|
741
979
|
.filter((q) => q.length > 0);
|
|
742
980
|
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
981
|
+
const kindIcon = (kind) => (kind === 'function' ? 'f' : kind === 'class' ? '*' : 'o');
|
|
982
|
+
|
|
983
|
+
// ─── Keyword-only mode ──────────────────────────────────────────────
|
|
984
|
+
if (mode === 'keyword') {
|
|
985
|
+
const singleQuery = queries.length === 1 ? queries[0] : query;
|
|
986
|
+
const data = ftsSearchData(singleQuery, customDbPath, opts);
|
|
987
|
+
if (!data) {
|
|
988
|
+
console.log('No FTS5 index found. Run `codegraph embed` to build the keyword index.');
|
|
989
|
+
return;
|
|
990
|
+
}
|
|
748
991
|
|
|
749
992
|
if (opts.json) {
|
|
750
993
|
console.log(JSON.stringify(data, null, 2));
|
|
751
994
|
return;
|
|
752
995
|
}
|
|
753
996
|
|
|
754
|
-
console.log(`\
|
|
755
|
-
|
|
997
|
+
console.log(`\nKeyword search: "${singleQuery}" (BM25)\n`);
|
|
756
998
|
if (data.results.length === 0) {
|
|
757
|
-
console.log(' No results
|
|
999
|
+
console.log(' No results found.');
|
|
758
1000
|
} else {
|
|
759
1001
|
for (const r of data.results) {
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
console.log(` ${kindIcon} ${r.name} -- ${r.file}:${r.line}`);
|
|
1002
|
+
console.log(
|
|
1003
|
+
` BM25 ${r.bm25Score.toFixed(2)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
|
|
1004
|
+
);
|
|
764
1005
|
}
|
|
765
1006
|
}
|
|
766
|
-
|
|
767
1007
|
console.log(`\n ${data.results.length} results shown\n`);
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
const data = await multiSearchData(queries, customDbPath, opts);
|
|
771
|
-
if (!data) return;
|
|
1008
|
+
return;
|
|
1009
|
+
}
|
|
772
1010
|
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
1011
|
+
// ─── Semantic-only mode ─────────────────────────────────────────────
|
|
1012
|
+
if (mode === 'semantic') {
|
|
1013
|
+
if (queries.length <= 1) {
|
|
1014
|
+
const singleQuery = queries[0] || query;
|
|
1015
|
+
const data = await searchData(singleQuery, customDbPath, opts);
|
|
1016
|
+
if (!data) return;
|
|
777
1017
|
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
console.log();
|
|
1018
|
+
if (opts.json) {
|
|
1019
|
+
console.log(JSON.stringify(data, null, 2));
|
|
1020
|
+
return;
|
|
1021
|
+
}
|
|
783
1022
|
|
|
784
|
-
|
|
785
|
-
|
|
1023
|
+
console.log(`\nSemantic search: "${singleQuery}"\n`);
|
|
1024
|
+
if (data.results.length === 0) {
|
|
1025
|
+
console.log(' No results above threshold.');
|
|
1026
|
+
} else {
|
|
1027
|
+
for (const r of data.results) {
|
|
1028
|
+
const bar = '#'.repeat(Math.round(r.similarity * 20));
|
|
1029
|
+
console.log(` ${(r.similarity * 100).toFixed(1)}% ${bar}`);
|
|
1030
|
+
console.log(` ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`);
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
console.log(`\n ${data.results.length} results shown\n`);
|
|
786
1034
|
} else {
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
1035
|
+
const data = await multiSearchData(queries, customDbPath, opts);
|
|
1036
|
+
if (!data) return;
|
|
1037
|
+
|
|
1038
|
+
if (opts.json) {
|
|
1039
|
+
console.log(JSON.stringify(data, null, 2));
|
|
1040
|
+
return;
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`);
|
|
1044
|
+
for (let i = 0; i < queries.length; i++) console.log(` [${i + 1}] "${queries[i]}"`);
|
|
1045
|
+
console.log();
|
|
1046
|
+
if (data.results.length === 0) {
|
|
1047
|
+
console.log(' No results above threshold.');
|
|
1048
|
+
} else {
|
|
1049
|
+
for (const r of data.results) {
|
|
792
1050
|
console.log(
|
|
793
|
-
`
|
|
1051
|
+
` RRF ${r.rrf.toFixed(4)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
|
|
794
1052
|
);
|
|
1053
|
+
for (const qs of r.queryScores) {
|
|
1054
|
+
const bar = '#'.repeat(Math.round(qs.similarity * 20));
|
|
1055
|
+
console.log(
|
|
1056
|
+
` [${queries.indexOf(qs.query) + 1}] ${(qs.similarity * 100).toFixed(1)}% ${bar} (rank ${qs.rank})`,
|
|
1057
|
+
);
|
|
1058
|
+
}
|
|
795
1059
|
}
|
|
796
1060
|
}
|
|
1061
|
+
console.log(`\n ${data.results.length} results shown\n`);
|
|
797
1062
|
}
|
|
1063
|
+
return;
|
|
1064
|
+
}
|
|
798
1065
|
|
|
799
|
-
|
|
1066
|
+
// ─── Hybrid mode (default) ──────────────────────────────────────────
|
|
1067
|
+
const data = await hybridSearchData(query, customDbPath, opts);
|
|
1068
|
+
|
|
1069
|
+
if (!data) {
|
|
1070
|
+
// No FTS5 index — fall back to semantic-only
|
|
1071
|
+
warn(
|
|
1072
|
+
'FTS5 index not found — using semantic search only. Re-run `codegraph embed` to enable hybrid mode.',
|
|
1073
|
+
);
|
|
1074
|
+
return search(query, customDbPath, { ...opts, mode: 'semantic' });
|
|
800
1075
|
}
|
|
1076
|
+
|
|
1077
|
+
if (opts.json) {
|
|
1078
|
+
console.log(JSON.stringify(data, null, 2));
|
|
1079
|
+
return;
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
const rrfK = opts.rrfK || 60;
|
|
1083
|
+
if (queries.length <= 1) {
|
|
1084
|
+
const singleQuery = queries[0] || query;
|
|
1085
|
+
console.log(`\nHybrid search: "${singleQuery}" (BM25 + semantic, RRF k=${rrfK})\n`);
|
|
1086
|
+
} else {
|
|
1087
|
+
console.log(`\nHybrid multi-query search (BM25 + semantic, RRF k=${rrfK}):`);
|
|
1088
|
+
for (let i = 0; i < queries.length; i++) console.log(` [${i + 1}] "${queries[i]}"`);
|
|
1089
|
+
console.log();
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
if (data.results.length === 0) {
|
|
1093
|
+
console.log(' No results found.');
|
|
1094
|
+
} else {
|
|
1095
|
+
for (const r of data.results) {
|
|
1096
|
+
console.log(
|
|
1097
|
+
` RRF ${r.rrf.toFixed(4)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
|
|
1098
|
+
);
|
|
1099
|
+
const parts = [];
|
|
1100
|
+
if (r.bm25Rank != null) {
|
|
1101
|
+
parts.push(`BM25: rank ${r.bm25Rank} (score ${r.bm25Score.toFixed(2)})`);
|
|
1102
|
+
}
|
|
1103
|
+
if (r.semanticRank != null) {
|
|
1104
|
+
parts.push(`Semantic: rank ${r.semanticRank} (${(r.similarity * 100).toFixed(1)}%)`);
|
|
1105
|
+
}
|
|
1106
|
+
if (parts.length > 0) {
|
|
1107
|
+
console.log(` ${parts.join(' | ')}`);
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
console.log(`\n ${data.results.length} results shown\n`);
|
|
801
1113
|
}
|