@zuvia-software-solutions/code-mapper 2.3.12 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/analyze.js +59 -1
- package/dist/core/db/adapter.d.ts +35 -1
- package/dist/core/db/adapter.js +82 -1
- package/dist/core/db/schema.d.ts +12 -1
- package/dist/core/db/schema.js +34 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +3 -1
- package/dist/core/embeddings/embedding-pipeline.js +55 -2
- package/dist/core/embeddings/text-generator.js +10 -2
- package/dist/core/embeddings/types.d.ts +1 -1
- package/dist/core/embeddings/types.js +2 -4
- package/dist/core/incremental/refresh.js +39 -3
- package/dist/mcp/local/local-backend.d.ts +26 -0
- package/dist/mcp/local/local-backend.js +310 -17
- package/models/mlx-embedder.py +29 -2
- package/package.json +1 -1
package/dist/cli/analyze.js
CHANGED
|
@@ -269,10 +269,68 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
269
269
|
recordPhase('search-text');
|
|
270
270
|
updateBar(84, 'Building search index...');
|
|
271
271
|
populateSearchText(db);
|
|
272
|
+
// Phase 2.7: Build refs table (identifier occurrence index)
|
|
273
|
+
recordPhase('refs');
|
|
274
|
+
updateBar(85, 'Building refs index...');
|
|
275
|
+
{
|
|
276
|
+
const { clearRefs, insertRefsBatch, clearFileWords, upsertFileWords } = await import('../core/db/adapter.js');
|
|
277
|
+
const fsRef = await import('fs/promises');
|
|
278
|
+
clearRefs(db);
|
|
279
|
+
clearFileWords(db);
|
|
280
|
+
// Scan all source files for identifier occurrences
|
|
281
|
+
const STOP_WORDS = new Set(['the', 'and', 'for', 'from', 'with', 'this', 'that', 'have', 'has', 'not', 'are', 'was', 'were', 'been', 'being', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'does', 'did', 'let', 'var', 'const', 'new', 'return', 'function', 'class', 'import', 'export', 'default', 'void', 'null', 'undefined', 'true', 'false', 'else', 'case', 'break', 'continue', 'while', 'throw', 'catch', 'try', 'finally', 'async', 'await', 'yield', 'typeof', 'instanceof', 'delete', 'switch', 'interface', 'type', 'enum', 'extends', 'implements', 'static', 'private', 'public', 'protected', 'abstract', 'readonly', 'override', 'declare', 'module', 'namespace', 'require', 'string', 'number', 'boolean', 'object', 'any', 'never', 'unknown', 'symbol']);
|
|
282
|
+
const SRC_EXTENSIONS = new Set(['.ts', '.tsx', '.js', '.jsx', '.py', '.go', '.rs', '.java', '.c', '.h', '.cpp', '.hpp', '.cs', '.rb', '.php', '.kt', '.swift', '.mts', '.mjs', '.cts', '.cjs']);
|
|
283
|
+
const identRegex = /\b[a-zA-Z_]\w{2,}\b/g;
|
|
284
|
+
const wordRegex = /\b[a-zA-Z]\w{2,}\b/g;
|
|
285
|
+
// Get all file paths from the nodes table
|
|
286
|
+
const fileRows = db.prepare("SELECT DISTINCT filePath FROM nodes WHERE label = 'File'").all();
|
|
287
|
+
let refsBuilt = 0;
|
|
288
|
+
for (const { filePath } of fileRows) {
|
|
289
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
290
|
+
if (!SRC_EXTENSIONS.has(ext))
|
|
291
|
+
continue;
|
|
292
|
+
let content;
|
|
293
|
+
try {
|
|
294
|
+
content = await fsRef.readFile(path.resolve(repoPath, filePath), 'utf-8');
|
|
295
|
+
}
|
|
296
|
+
catch {
|
|
297
|
+
continue;
|
|
298
|
+
}
|
|
299
|
+
// Build refs (identifier occurrences — skip language keywords)
|
|
300
|
+
const refs = [];
|
|
301
|
+
const lines = content.split('\n');
|
|
302
|
+
for (let lineIdx = 0; lineIdx < lines.length; lineIdx++) {
|
|
303
|
+
let match;
|
|
304
|
+
identRegex.lastIndex = 0;
|
|
305
|
+
while ((match = identRegex.exec(lines[lineIdx])) !== null) {
|
|
306
|
+
if (!STOP_WORDS.has(match[0].toLowerCase())) {
|
|
307
|
+
refs.push({ symbol: match[0], filePath, line: lineIdx });
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
if (refs.length > 0)
|
|
312
|
+
insertRefsBatch(db, refs);
|
|
313
|
+
// Build file_words (conceptual search)
|
|
314
|
+
const wordSet = new Set();
|
|
315
|
+
let wMatch;
|
|
316
|
+
wordRegex.lastIndex = 0;
|
|
317
|
+
while ((wMatch = wordRegex.exec(content)) !== null) {
|
|
318
|
+
const w = wMatch[0].toLowerCase();
|
|
319
|
+
if (!STOP_WORDS.has(w))
|
|
320
|
+
wordSet.add(w);
|
|
321
|
+
}
|
|
322
|
+
if (wordSet.size > 0)
|
|
323
|
+
upsertFileWords(db, filePath, [...wordSet].join(' '));
|
|
324
|
+
refsBuilt++;
|
|
325
|
+
if (refsBuilt % 500 === 0) {
|
|
326
|
+
updateBar(85, `Building refs index... (${refsBuilt}/${fileRows.length})`);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
272
330
|
// Phase 3: FTS (85-90%)
|
|
273
331
|
// FTS5 is auto-created by schema triggers — no manual index creation needed
|
|
274
332
|
recordPhase('fts');
|
|
275
|
-
updateBar(
|
|
333
|
+
updateBar(87, 'Search indexes ready');
|
|
276
334
|
// Phase 3.5: Re-insert cached embeddings
|
|
277
335
|
recordPhase('restore-embeddings');
|
|
278
336
|
if (cachedEmbeddings.length > 0) {
|
|
@@ -80,11 +80,12 @@ export declare function searchFTS(db: Database.Database, query: string, limit?:
|
|
|
80
80
|
filePath: string;
|
|
81
81
|
score: number;
|
|
82
82
|
}>;
|
|
83
|
-
/** Get node count, edge count, and
|
|
83
|
+
/** Get node count, edge count, embedding count, and refs count. */
|
|
84
84
|
export declare function getStats(db: Database.Database): {
|
|
85
85
|
nodes: number;
|
|
86
86
|
edges: number;
|
|
87
87
|
embeddings: number;
|
|
88
|
+
refs: number;
|
|
88
89
|
};
|
|
89
90
|
/** Batch insert nodes in a single transaction. */
|
|
90
91
|
export declare function insertNodesBatch(db: Database.Database, nodes: readonly NodeInsert[]): void;
|
|
@@ -104,6 +105,39 @@ export declare function insertEmbeddingsBatch(db: Database.Database, items: read
|
|
|
104
105
|
}[]): void;
|
|
105
106
|
/** Get all textHashes from the embeddings table for hash-based skip on re-index */
|
|
106
107
|
export declare function getEmbeddingHashes(db: Database.Database): Map<string, string>;
|
|
108
|
+
/** Bulk-insert identifier references */
|
|
109
|
+
export declare function insertRefsBatch(db: Database.Database, refs: ReadonlyArray<{
|
|
110
|
+
symbol: string;
|
|
111
|
+
filePath: string;
|
|
112
|
+
line: number;
|
|
113
|
+
}>): void;
|
|
114
|
+
/** Delete all refs for a given file (used by incremental refresh) */
|
|
115
|
+
export declare function deleteRefsByFile(db: Database.Database, filePath: string): void;
|
|
116
|
+
/** Find all files referencing a symbol name */
|
|
117
|
+
export declare function findRefsBySymbol(db: Database.Database, symbol: string, limit?: number): Array<{
|
|
118
|
+
filePath: string;
|
|
119
|
+
line: number;
|
|
120
|
+
}>;
|
|
121
|
+
/** Count total refs in the index */
|
|
122
|
+
export declare function countRefs(db: Database.Database): number;
|
|
123
|
+
/** Delete all refs (used before full rebuild) */
|
|
124
|
+
export declare function clearRefs(db: Database.Database): void;
|
|
125
|
+
/** Insert or replace file-level word index */
|
|
126
|
+
export declare function upsertFileWords(db: Database.Database, filePath: string, words: string): void;
|
|
127
|
+
/** Bulk insert file words in a transaction */
|
|
128
|
+
export declare function insertFileWordsBatch(db: Database.Database, entries: ReadonlyArray<{
|
|
129
|
+
filePath: string;
|
|
130
|
+
words: string;
|
|
131
|
+
}>): void;
|
|
132
|
+
/** Delete file words for a given file */
|
|
133
|
+
export declare function deleteFileWordsByFile(db: Database.Database, filePath: string): void;
|
|
134
|
+
/** Search file_words_fts for conceptual matches, returns file paths ranked by relevance */
|
|
135
|
+
export declare function searchFileWords(db: Database.Database, query: string, limit?: number): Array<{
|
|
136
|
+
filePath: string;
|
|
137
|
+
score: number;
|
|
138
|
+
}>;
|
|
139
|
+
/** Clear all file words (used before full rebuild) */
|
|
140
|
+
export declare function clearFileWords(db: Database.Database): void;
|
|
107
141
|
/** Escape a string for use in SQL single-quoted literals. */
|
|
108
142
|
export declare function escapeSql(value: string): string;
|
|
109
143
|
/** Execute a raw SQL query and return rows. */
|
package/dist/core/db/adapter.js
CHANGED
|
@@ -347,12 +347,13 @@ export function searchFTS(db, query, limit = 20) {
|
|
|
347
347
|
// ---------------------------------------------------------------------------
|
|
348
348
|
// Stats
|
|
349
349
|
// ---------------------------------------------------------------------------
|
|
350
|
-
/** Get node count, edge count, and
|
|
350
|
+
/** Get node count, edge count, embedding count, and refs count. */
|
|
351
351
|
export function getStats(db) {
|
|
352
352
|
return {
|
|
353
353
|
nodes: countNodes(db),
|
|
354
354
|
edges: countEdges(db),
|
|
355
355
|
embeddings: countEmbeddings(db),
|
|
356
|
+
refs: countRefs(db),
|
|
356
357
|
};
|
|
357
358
|
}
|
|
358
359
|
// ---------------------------------------------------------------------------
|
|
@@ -513,6 +514,86 @@ export function getEmbeddingHashes(db) {
|
|
|
513
514
|
return map;
|
|
514
515
|
}
|
|
515
516
|
// ---------------------------------------------------------------------------
|
|
517
|
+
// Refs (identifier occurrence index)
|
|
518
|
+
// ---------------------------------------------------------------------------
|
|
519
|
+
/** Bulk-insert identifier references */
|
|
520
|
+
export function insertRefsBatch(db, refs) {
|
|
521
|
+
if (refs.length === 0)
|
|
522
|
+
return;
|
|
523
|
+
const stmt = db.prepare('INSERT INTO refs (symbol, filePath, line) VALUES (?, ?, ?)');
|
|
524
|
+
const tx = db.transaction(() => {
|
|
525
|
+
for (const ref of refs) {
|
|
526
|
+
stmt.run(ref.symbol, ref.filePath, ref.line);
|
|
527
|
+
}
|
|
528
|
+
});
|
|
529
|
+
tx();
|
|
530
|
+
}
|
|
531
|
+
/** Delete all refs for a given file (used by incremental refresh) */
|
|
532
|
+
export function deleteRefsByFile(db, filePath) {
|
|
533
|
+
db.prepare('DELETE FROM refs WHERE filePath = ?').run(filePath);
|
|
534
|
+
}
|
|
535
|
+
/** Find all files referencing a symbol name */
|
|
536
|
+
export function findRefsBySymbol(db, symbol, limit = 200) {
|
|
537
|
+
return db.prepare('SELECT DISTINCT filePath, line FROM refs WHERE symbol = ? LIMIT ?').all(symbol, limit);
|
|
538
|
+
}
|
|
539
|
+
/** Count total refs in the index */
|
|
540
|
+
export function countRefs(db) {
|
|
541
|
+
const row = db.prepare('SELECT COUNT(*) as cnt FROM refs').get();
|
|
542
|
+
return row?.cnt ?? 0;
|
|
543
|
+
}
|
|
544
|
+
/** Delete all refs (used before full rebuild) */
|
|
545
|
+
export function clearRefs(db) {
|
|
546
|
+
db.prepare('DELETE FROM refs').run();
|
|
547
|
+
}
|
|
548
|
+
// ---------------------------------------------------------------------------
|
|
549
|
+
// File Words (conceptual search index)
|
|
550
|
+
// ---------------------------------------------------------------------------
|
|
551
|
+
/** Insert or replace file-level word index */
|
|
552
|
+
export function upsertFileWords(db, filePath, words) {
|
|
553
|
+
db.prepare('INSERT OR REPLACE INTO file_words (filePath, words) VALUES (?, ?)').run(filePath, words);
|
|
554
|
+
}
|
|
555
|
+
/** Bulk insert file words in a transaction */
|
|
556
|
+
export function insertFileWordsBatch(db, entries) {
|
|
557
|
+
if (entries.length === 0)
|
|
558
|
+
return;
|
|
559
|
+
const stmt = db.prepare('INSERT OR REPLACE INTO file_words (filePath, words) VALUES (?, ?)');
|
|
560
|
+
const tx = db.transaction(() => {
|
|
561
|
+
for (const entry of entries) {
|
|
562
|
+
stmt.run(entry.filePath, entry.words);
|
|
563
|
+
}
|
|
564
|
+
});
|
|
565
|
+
tx();
|
|
566
|
+
}
|
|
567
|
+
/** Delete file words for a given file */
|
|
568
|
+
export function deleteFileWordsByFile(db, filePath) {
|
|
569
|
+
db.prepare('DELETE FROM file_words WHERE filePath = ?').run(filePath);
|
|
570
|
+
}
|
|
571
|
+
/** Search file_words_fts for conceptual matches, returns file paths ranked by relevance */
|
|
572
|
+
export function searchFileWords(db, query, limit = 20) {
|
|
573
|
+
let safeQuery = query.replace(/"/g, '""').replace(/[*(){}[\]^~\\:]/g, ' ').trim();
|
|
574
|
+
if (!safeQuery)
|
|
575
|
+
return [];
|
|
576
|
+
const words = safeQuery.split(/\s+/).filter(w => w.length > 2);
|
|
577
|
+
if (words.length === 0)
|
|
578
|
+
return [];
|
|
579
|
+
safeQuery = words.join(' OR ');
|
|
580
|
+
try {
|
|
581
|
+
return db.prepare(`SELECT fw.filePath, rank as score
|
|
582
|
+
FROM file_words_fts fts
|
|
583
|
+
JOIN file_words fw ON fw.rowid = fts.rowid
|
|
584
|
+
WHERE file_words_fts MATCH ?
|
|
585
|
+
ORDER BY rank
|
|
586
|
+
LIMIT ?`).all(safeQuery, limit);
|
|
587
|
+
}
|
|
588
|
+
catch {
|
|
589
|
+
return [];
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
/** Clear all file words (used before full rebuild) */
|
|
593
|
+
export function clearFileWords(db) {
|
|
594
|
+
db.prepare('DELETE FROM file_words').run();
|
|
595
|
+
}
|
|
596
|
+
// ---------------------------------------------------------------------------
|
|
516
597
|
// Raw SQL escape (for dynamic queries in local-backend.ts)
|
|
517
598
|
// ---------------------------------------------------------------------------
|
|
518
599
|
/** Escape a string for use in SQL single-quoted literals. */
|
package/dist/core/db/schema.d.ts
CHANGED
|
@@ -68,6 +68,17 @@ export interface EmbeddingRow {
|
|
|
68
68
|
readonly embedding: Buffer;
|
|
69
69
|
readonly textHash: string | null;
|
|
70
70
|
}
|
|
71
|
+
/** A reference occurrence as stored in the `refs` table */
|
|
72
|
+
export interface RefsRow {
|
|
73
|
+
readonly symbol: string;
|
|
74
|
+
readonly filePath: string;
|
|
75
|
+
readonly line: number;
|
|
76
|
+
}
|
|
77
|
+
/** A file-level word index row as stored in the `file_words` table */
|
|
78
|
+
export interface FileWordsRow {
|
|
79
|
+
readonly filePath: string;
|
|
80
|
+
readonly words: string;
|
|
81
|
+
}
|
|
71
82
|
/** Fields required to insert a node */
|
|
72
83
|
export interface NodeInsert {
|
|
73
84
|
readonly id: NodeId;
|
|
@@ -107,4 +118,4 @@ export interface EdgeInsert {
|
|
|
107
118
|
}
|
|
108
119
|
/** Legacy edge table name constant (kept for compatibility) */
|
|
109
120
|
export declare const REL_TABLE_NAME = "CodeRelation";
|
|
110
|
-
export declare const SCHEMA_SQL = "\n-- Nodes: unified table for all code elements\nCREATE TABLE IF NOT EXISTS nodes (\n id TEXT PRIMARY KEY,\n label TEXT NOT NULL,\n name TEXT NOT NULL DEFAULT '',\n filePath TEXT NOT NULL DEFAULT '',\n startLine INTEGER,\n endLine INTEGER,\n isExported INTEGER,\n content TEXT NOT NULL DEFAULT '',\n description TEXT NOT NULL DEFAULT '',\n heuristicLabel TEXT,\n cohesion REAL,\n symbolCount INTEGER,\n keywords TEXT,\n enrichedBy TEXT,\n processType TEXT,\n stepCount INTEGER,\n communities TEXT,\n entryPointId TEXT,\n terminalId TEXT,\n parameterCount INTEGER,\n returnType TEXT,\n nameExpanded TEXT DEFAULT '',\n searchText TEXT DEFAULT ''\n);\n\nCREATE INDEX IF NOT EXISTS idx_nodes_label ON nodes(label);\nCREATE INDEX IF NOT EXISTS idx_nodes_name ON nodes(name);\nCREATE INDEX IF NOT EXISTS idx_nodes_filePath ON nodes(filePath);\nCREATE INDEX IF NOT EXISTS idx_nodes_label_name ON nodes(label, name);\nCREATE INDEX IF NOT EXISTS idx_nodes_filePath_lines ON nodes(filePath, startLine, endLine);\n\n-- Edges: single table for all relationships\nCREATE TABLE IF NOT EXISTS edges (\n id TEXT PRIMARY KEY,\n sourceId TEXT NOT NULL,\n targetId TEXT NOT NULL,\n type TEXT NOT NULL,\n confidence REAL NOT NULL DEFAULT 1.0,\n reason TEXT NOT NULL DEFAULT '',\n step INTEGER NOT NULL DEFAULT 0,\n callLine INTEGER\n);\n\nCREATE INDEX IF NOT EXISTS idx_edges_sourceId ON edges(sourceId);\nCREATE INDEX IF NOT EXISTS idx_edges_targetId ON edges(targetId);\nCREATE INDEX IF NOT EXISTS idx_edges_type ON edges(type);\nCREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(sourceId, type);\nCREATE INDEX IF NOT EXISTS idx_edges_target_type ON edges(targetId, type);\n\n-- Embeddings: vector storage\nCREATE TABLE IF NOT EXISTS embeddings (\n nodeId TEXT PRIMARY KEY,\n embedding BLOB NOT NULL,\n textHash TEXT\n);\n\n-- FTS5 virtual table (auto-updated via triggers)\nCREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5(\n name,\n nameExpanded,\n searchText,\n filePath,\n content,\n content='nodes',\n content_rowid='rowid'\n);\n\nCREATE TRIGGER IF NOT EXISTS nodes_fts_ai AFTER INSERT ON nodes BEGIN\n INSERT INTO nodes_fts(rowid, name, nameExpanded, searchText, filePath, content) VALUES (new.rowid, new.name, new.nameExpanded, new.searchText, new.filePath, new.content);\nEND;\nCREATE TRIGGER IF NOT EXISTS nodes_fts_ad AFTER DELETE ON nodes BEGIN\n INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, searchText, filePath, content) VALUES ('delete', old.rowid, old.name, old.nameExpanded, old.searchText, old.filePath, old.content);\nEND;\nCREATE TRIGGER IF NOT EXISTS nodes_fts_au AFTER UPDATE ON nodes BEGIN\n INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, searchText, filePath, content) VALUES ('delete', old.rowid, old.name, old.nameExpanded, old.searchText, old.filePath, old.content);\n INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, searchText, filePath, content) VALUES (new.rowid, new.name, new.nameExpanded, new.searchText, new.filePath, new.content);\nEND;\n";
|
|
121
|
+
export declare const SCHEMA_SQL = "\n-- Nodes: unified table for all code elements\nCREATE TABLE IF NOT EXISTS nodes (\n id TEXT PRIMARY KEY,\n label TEXT NOT NULL,\n name TEXT NOT NULL DEFAULT '',\n filePath TEXT NOT NULL DEFAULT '',\n startLine INTEGER,\n endLine INTEGER,\n isExported INTEGER,\n content TEXT NOT NULL DEFAULT '',\n description TEXT NOT NULL DEFAULT '',\n heuristicLabel TEXT,\n cohesion REAL,\n symbolCount INTEGER,\n keywords TEXT,\n enrichedBy TEXT,\n processType TEXT,\n stepCount INTEGER,\n communities TEXT,\n entryPointId TEXT,\n terminalId TEXT,\n parameterCount INTEGER,\n returnType TEXT,\n nameExpanded TEXT DEFAULT '',\n searchText TEXT DEFAULT ''\n);\n\nCREATE INDEX IF NOT EXISTS idx_nodes_label ON nodes(label);\nCREATE INDEX IF NOT EXISTS idx_nodes_name ON nodes(name);\nCREATE INDEX IF NOT EXISTS idx_nodes_filePath ON nodes(filePath);\nCREATE INDEX IF NOT EXISTS idx_nodes_label_name ON nodes(label, name);\nCREATE INDEX IF NOT EXISTS idx_nodes_filePath_lines ON nodes(filePath, startLine, endLine);\n\n-- Edges: single table for all relationships\nCREATE TABLE IF NOT EXISTS edges (\n id TEXT PRIMARY KEY,\n sourceId TEXT NOT NULL,\n targetId TEXT NOT NULL,\n type TEXT NOT NULL,\n confidence REAL NOT NULL DEFAULT 1.0,\n reason TEXT NOT NULL DEFAULT '',\n step INTEGER NOT NULL DEFAULT 0,\n callLine INTEGER\n);\n\nCREATE INDEX IF NOT EXISTS idx_edges_sourceId ON edges(sourceId);\nCREATE INDEX IF NOT EXISTS idx_edges_targetId ON edges(targetId);\nCREATE INDEX IF NOT EXISTS idx_edges_type ON edges(type);\nCREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(sourceId, type);\nCREATE INDEX IF NOT EXISTS idx_edges_target_type ON edges(targetId, type);\n\n-- Embeddings: vector storage\nCREATE TABLE IF NOT EXISTS embeddings (\n nodeId TEXT PRIMARY KEY,\n embedding BLOB NOT NULL,\n textHash TEXT\n);\n\n-- FTS5 virtual table (auto-updated via triggers)\nCREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5(\n name,\n nameExpanded,\n searchText,\n filePath,\n content,\n content='nodes',\n content_rowid='rowid'\n);\n\nCREATE TRIGGER IF NOT EXISTS nodes_fts_ai AFTER INSERT ON nodes BEGIN\n INSERT INTO nodes_fts(rowid, name, nameExpanded, searchText, filePath, content) VALUES (new.rowid, new.name, new.nameExpanded, new.searchText, new.filePath, new.content);\nEND;\nCREATE TRIGGER IF NOT EXISTS nodes_fts_ad AFTER DELETE ON nodes BEGIN\n INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, searchText, filePath, content) VALUES ('delete', old.rowid, old.name, old.nameExpanded, old.searchText, old.filePath, old.content);\nEND;\nCREATE TRIGGER IF NOT EXISTS nodes_fts_au AFTER UPDATE ON nodes BEGIN\n INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, searchText, filePath, content) VALUES ('delete', old.rowid, old.name, old.nameExpanded, old.searchText, old.filePath, old.content);\n INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, searchText, filePath, content) VALUES (new.rowid, new.name, new.nameExpanded, new.searchText, new.filePath, new.content);\nEND;\n\n-- Refs: identifier occurrence index (pre-computed grep)\nCREATE TABLE IF NOT EXISTS refs (\n symbol TEXT NOT NULL,\n filePath TEXT NOT NULL,\n line INTEGER NOT NULL\n);\n\nCREATE INDEX IF NOT EXISTS idx_refs_symbol ON refs(symbol);\nCREATE INDEX IF NOT EXISTS idx_refs_filePath ON refs(filePath);\nCREATE INDEX IF NOT EXISTS idx_refs_symbol_file ON refs(symbol, filePath);\n\n-- File-level word index for conceptual search\nCREATE TABLE IF NOT EXISTS file_words (\n filePath TEXT PRIMARY KEY,\n words TEXT NOT NULL DEFAULT ''\n);\n\nCREATE VIRTUAL TABLE IF NOT EXISTS file_words_fts USING fts5(\n words,\n content='file_words',\n content_rowid='rowid'\n);\n\nCREATE TRIGGER IF NOT EXISTS file_words_fts_ai AFTER INSERT ON file_words BEGIN\n INSERT INTO file_words_fts(rowid, words) VALUES (new.rowid, new.words);\nEND;\nCREATE TRIGGER IF NOT EXISTS file_words_fts_ad AFTER DELETE ON file_words BEGIN\n INSERT INTO file_words_fts(file_words_fts, rowid, words) VALUES ('delete', old.rowid, old.words);\nEND;\nCREATE TRIGGER IF NOT EXISTS file_words_fts_au AFTER UPDATE ON file_words BEGIN\n INSERT INTO file_words_fts(file_words_fts, rowid, words) VALUES ('delete', old.rowid, old.words);\n INSERT INTO file_words_fts(file_words_fts, rowid, words) VALUES (new.rowid, new.words);\nEND;\n";
|
package/dist/core/db/schema.js
CHANGED
|
@@ -135,4 +135,38 @@ CREATE TRIGGER IF NOT EXISTS nodes_fts_au AFTER UPDATE ON nodes BEGIN
|
|
|
135
135
|
INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, searchText, filePath, content) VALUES ('delete', old.rowid, old.name, old.nameExpanded, old.searchText, old.filePath, old.content);
|
|
136
136
|
INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, searchText, filePath, content) VALUES (new.rowid, new.name, new.nameExpanded, new.searchText, new.filePath, new.content);
|
|
137
137
|
END;
|
|
138
|
+
|
|
139
|
+
-- Refs: identifier occurrence index (pre-computed grep)
|
|
140
|
+
CREATE TABLE IF NOT EXISTS refs (
|
|
141
|
+
symbol TEXT NOT NULL,
|
|
142
|
+
filePath TEXT NOT NULL,
|
|
143
|
+
line INTEGER NOT NULL
|
|
144
|
+
);
|
|
145
|
+
|
|
146
|
+
CREATE INDEX IF NOT EXISTS idx_refs_symbol ON refs(symbol);
|
|
147
|
+
CREATE INDEX IF NOT EXISTS idx_refs_filePath ON refs(filePath);
|
|
148
|
+
CREATE INDEX IF NOT EXISTS idx_refs_symbol_file ON refs(symbol, filePath);
|
|
149
|
+
|
|
150
|
+
-- File-level word index for conceptual search
|
|
151
|
+
CREATE TABLE IF NOT EXISTS file_words (
|
|
152
|
+
filePath TEXT PRIMARY KEY,
|
|
153
|
+
words TEXT NOT NULL DEFAULT ''
|
|
154
|
+
);
|
|
155
|
+
|
|
156
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS file_words_fts USING fts5(
|
|
157
|
+
words,
|
|
158
|
+
content='file_words',
|
|
159
|
+
content_rowid='rowid'
|
|
160
|
+
);
|
|
161
|
+
|
|
162
|
+
CREATE TRIGGER IF NOT EXISTS file_words_fts_ai AFTER INSERT ON file_words BEGIN
|
|
163
|
+
INSERT INTO file_words_fts(rowid, words) VALUES (new.rowid, new.words);
|
|
164
|
+
END;
|
|
165
|
+
CREATE TRIGGER IF NOT EXISTS file_words_fts_ad AFTER DELETE ON file_words BEGIN
|
|
166
|
+
INSERT INTO file_words_fts(file_words_fts, rowid, words) VALUES ('delete', old.rowid, old.words);
|
|
167
|
+
END;
|
|
168
|
+
CREATE TRIGGER IF NOT EXISTS file_words_fts_au AFTER UPDATE ON file_words BEGIN
|
|
169
|
+
INSERT INTO file_words_fts(file_words_fts, rowid, words) VALUES ('delete', old.rowid, old.words);
|
|
170
|
+
INSERT INTO file_words_fts(file_words_fts, rowid, words) VALUES (new.rowid, new.words);
|
|
171
|
+
END;
|
|
138
172
|
`;
|
|
@@ -11,11 +11,13 @@ import { type EmbeddingProgress, type EmbeddingConfig, type SemanticSearchResult
|
|
|
11
11
|
import type Database from 'better-sqlite3';
|
|
12
12
|
/** Progress callback type */
|
|
13
13
|
export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
|
|
14
|
-
/** Graph context for a node: callers, callees,
|
|
14
|
+
/** Graph context for a node: callers, callees, community module, imports, and siblings */
|
|
15
15
|
export interface GraphContext {
|
|
16
16
|
callers: string[];
|
|
17
17
|
callees: string[];
|
|
18
18
|
module: string;
|
|
19
|
+
importNames: string[];
|
|
20
|
+
siblingNames: string[];
|
|
19
21
|
}
|
|
20
22
|
/**
|
|
21
23
|
* Fetch graph context (callers, callees, community module) for a set of nodes.
|
|
@@ -92,12 +92,51 @@ export function fetchGraphContext(db, nodes) {
|
|
|
92
92
|
for (const r of moduleRows) {
|
|
93
93
|
moduleMap.set(r.nid, r.module ?? '');
|
|
94
94
|
}
|
|
95
|
+
// Batch fetch import names (what this file imports)
|
|
96
|
+
const chunkSize = 500;
|
|
97
|
+
const importMap = {};
|
|
98
|
+
for (let ci = 0; ci < nodeIds.length; ci += chunkSize) {
|
|
99
|
+
const chunk = nodeIds.slice(ci, ci + chunkSize);
|
|
100
|
+
const ph = chunk.map(() => '?').join(',');
|
|
101
|
+
const importRows = db.prepare(`SELECT DISTINCT n.filePath, tn.name
|
|
102
|
+
FROM nodes n
|
|
103
|
+
JOIN edges e ON e.sourceId = n.id AND e.type = 'IMPORTS'
|
|
104
|
+
JOIN nodes tn ON tn.id = e.targetId
|
|
105
|
+
WHERE n.id IN (${ph})`).all(...chunk);
|
|
106
|
+
for (const row of importRows) {
|
|
107
|
+
if (!importMap[row.filePath])
|
|
108
|
+
importMap[row.filePath] = [];
|
|
109
|
+
if (importMap[row.filePath].length < 10)
|
|
110
|
+
importMap[row.filePath].push(row.name);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
// Batch fetch sibling symbol names (other symbols in same file)
|
|
114
|
+
const siblingMap = {};
|
|
115
|
+
for (let ci = 0; ci < nodeIds.length; ci += chunkSize) {
|
|
116
|
+
const chunk = nodeIds.slice(ci, ci + chunkSize);
|
|
117
|
+
const ph = chunk.map(() => '?').join(',');
|
|
118
|
+
const sibRows = db.prepare(`SELECT n1.id as sourceId, n2.name as sibName
|
|
119
|
+
FROM nodes n1
|
|
120
|
+
JOIN nodes n2 ON n2.filePath = n1.filePath AND n2.id != n1.id
|
|
121
|
+
WHERE n1.id IN (${ph})
|
|
122
|
+
AND n2.label NOT IN ('File', 'Folder', 'Community', 'Process')
|
|
123
|
+
LIMIT ${chunk.length * 5}`).all(...chunk);
|
|
124
|
+
for (const row of sibRows) {
|
|
125
|
+
if (!siblingMap[row.sourceId])
|
|
126
|
+
siblingMap[row.sourceId] = [];
|
|
127
|
+
if (siblingMap[row.sourceId].length < 5)
|
|
128
|
+
siblingMap[row.sourceId].push(row.sibName);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
95
131
|
// Assemble
|
|
96
132
|
for (const node of nodes) {
|
|
133
|
+
const n = node;
|
|
97
134
|
graphContext.set(node.id, {
|
|
98
135
|
callers: (callerMap.get(node.id) || []).slice(0, 3),
|
|
99
136
|
callees: (calleeMap.get(node.id) || []).slice(0, 3),
|
|
100
137
|
module: moduleMap.get(node.id) || '',
|
|
138
|
+
importNames: n.filePath ? (importMap[n.filePath] || []) : [],
|
|
139
|
+
siblingNames: siblingMap[node.id] || [],
|
|
101
140
|
});
|
|
102
141
|
}
|
|
103
142
|
}
|
|
@@ -115,7 +154,7 @@ export function fetchGraphContext(db, nodes) {
|
|
|
115
154
|
* @returns Enriched text
|
|
116
155
|
*/
|
|
117
156
|
export function enrichTextWithGraphContext(text, ctx) {
|
|
118
|
-
if (!ctx.module && ctx.callers.length === 0 && ctx.callees.length === 0)
|
|
157
|
+
if (!ctx.module && ctx.callers.length === 0 && ctx.callees.length === 0 && ctx.importNames.length === 0 && ctx.siblingNames.length === 0)
|
|
119
158
|
return text;
|
|
120
159
|
const lines = text.split('\n');
|
|
121
160
|
// Append Module to the File: line (matches Python batch format)
|
|
@@ -125,7 +164,17 @@ export function enrichTextWithGraphContext(text, ctx) {
|
|
|
125
164
|
lines[fileIdx] += ` | Module: ${ctx.module}`;
|
|
126
165
|
}
|
|
127
166
|
}
|
|
128
|
-
// Insert
|
|
167
|
+
// Insert imports after the File: line
|
|
168
|
+
if (ctx.importNames.length > 0) {
|
|
169
|
+
const importLine = `Imports: ${ctx.importNames.join(', ')}`;
|
|
170
|
+
// Insert after the File line
|
|
171
|
+
const fileIdx = lines.findIndex(l => l.startsWith('File:'));
|
|
172
|
+
if (fileIdx >= 0)
|
|
173
|
+
lines.splice(fileIdx + 1, 0, importLine);
|
|
174
|
+
else
|
|
175
|
+
lines.push(importLine);
|
|
176
|
+
}
|
|
177
|
+
// Insert callers/callees after the File: line (and imports if present)
|
|
129
178
|
const insertParts = [];
|
|
130
179
|
if (ctx.callers.length > 0)
|
|
131
180
|
insertParts.push(`Called by: ${ctx.callers.join(', ')}`);
|
|
@@ -138,6 +187,10 @@ export function enrichTextWithGraphContext(text, ctx) {
|
|
|
138
187
|
lines.splice(insertAt, 0, insertParts[i] ?? '');
|
|
139
188
|
}
|
|
140
189
|
}
|
|
190
|
+
// Append siblings at the end
|
|
191
|
+
if (ctx.siblingNames.length > 0) {
|
|
192
|
+
lines.push(`Siblings: ${ctx.siblingNames.join(', ')}`);
|
|
193
|
+
}
|
|
141
194
|
return lines.join('\n');
|
|
142
195
|
}
|
|
143
196
|
/**
|
|
@@ -16,6 +16,13 @@ const getFileName = (filePath) => {
|
|
|
16
16
|
const parts = filePath.split('/');
|
|
17
17
|
return parts[parts.length - 1] || filePath;
|
|
18
18
|
};
|
|
19
|
+
/** Extract directory context from file path (last 2-3 segments) */
|
|
20
|
+
const getDirectoryContext = (filePath) => {
|
|
21
|
+
const parts = filePath.split('/');
|
|
22
|
+
// Remove filename, take last 2 directory segments
|
|
23
|
+
parts.pop();
|
|
24
|
+
return parts.slice(-2).join('/');
|
|
25
|
+
};
|
|
19
26
|
/**
|
|
20
27
|
* Extract the first JSDoc/comment block as a natural language description.
|
|
21
28
|
* This bridges natural language queries to code — "blast radius analysis"
|
|
@@ -154,8 +161,9 @@ export const generateEmbeddingText = (node, _config = {}) => {
|
|
|
154
161
|
const comment = extractFirstComment(node.content);
|
|
155
162
|
if (comment)
|
|
156
163
|
parts.push(comment);
|
|
157
|
-
// 3. File location
|
|
158
|
-
|
|
164
|
+
// 3. File location with directory context
|
|
165
|
+
const dir = getDirectoryContext(node.filePath);
|
|
166
|
+
parts.push(`File: ${getFileName(node.filePath)}${dir ? ` in ${dir}` : ''}`);
|
|
159
167
|
// 4. Code signature (not full body)
|
|
160
168
|
const sig = extractSignature(node.content, label);
|
|
161
169
|
if (sig)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/** @file types.ts @description Type definitions for embedding generation and semantic search */
|
|
2
|
-
export declare const EMBEDDABLE_LABELS: readonly ["Function", "Class", "Method", "Interface"];
|
|
2
|
+
export declare const EMBEDDABLE_LABELS: readonly ["Function", "Class", "Method", "Interface", "Const", "Enum", "TypeAlias", "Namespace", "Module", "Struct"];
|
|
3
3
|
export type EmbeddableLabel = typeof EMBEDDABLE_LABELS[number];
|
|
4
4
|
/** Check if a label is embeddable */
|
|
5
5
|
export declare const isEmbeddableLabel: (label: string) => label is EmbeddableLabel;
|
|
@@ -4,10 +4,8 @@
|
|
|
4
4
|
// File nodes removed — their embeddings were low quality (import headers, license text)
|
|
5
5
|
// and polluted semantic search. BM25 FTS already searches file content effectively.
|
|
6
6
|
export const EMBEDDABLE_LABELS = [
|
|
7
|
-
'Function',
|
|
8
|
-
'
|
|
9
|
-
'Method',
|
|
10
|
-
'Interface',
|
|
7
|
+
'Function', 'Class', 'Method', 'Interface',
|
|
8
|
+
'Const', 'Enum', 'TypeAlias', 'Namespace', 'Module', 'Struct',
|
|
11
9
|
];
|
|
12
10
|
/** Check if a label is embeddable */
|
|
13
11
|
export const isEmbeddableLabel = (label) => EMBEDDABLE_LABELS.includes(label);
|
|
@@ -21,7 +21,7 @@ import { getLanguageFromFilename, getDefinitionNodeFromCaptures } from '../inges
|
|
|
21
21
|
import { loadParser, loadLanguage, isLanguageAvailable } from '../tree-sitter/parser-loader.js';
|
|
22
22
|
import { getTreeSitterBufferSize, TREE_SITTER_MAX_BUFFER } from '../ingestion/constants.js';
|
|
23
23
|
import { generateId } from '../../lib/utils.js';
|
|
24
|
-
import { deleteNodesByFile, insertNode, insertEdge, findNodeAtLine, findNodesByFile, deleteEmbeddingsByFile, insertEmbeddingsBatch, countEmbeddings } from '../db/adapter.js';
|
|
24
|
+
import { deleteNodesByFile, insertNode, insertEdge, findNodeAtLine, findNodesByFile, deleteEmbeddingsByFile, insertEmbeddingsBatch, countEmbeddings, deleteRefsByFile, insertRefsBatch, deleteFileWordsByFile, upsertFileWords } from '../db/adapter.js';
|
|
25
25
|
import { assertNodeLabel, toNodeId, toEdgeId } from '../db/schema.js';
|
|
26
26
|
import {} from './types.js';
|
|
27
27
|
import { getTsgoService } from '../semantic/tsgo-service.js';
|
|
@@ -79,10 +79,13 @@ export async function refreshFiles(db, repoPath, dirtyFiles) {
|
|
|
79
79
|
let nodesInserted = 0;
|
|
80
80
|
let edgesInserted = 0;
|
|
81
81
|
let filesSkipped = 0;
|
|
82
|
-
// Phase 1: Delete old nodes for all dirty files
|
|
82
|
+
// Phase 1: Delete old nodes + refs + file_words for all dirty files
|
|
83
83
|
for (const entry of dirtyFiles) {
|
|
84
|
-
const
|
|
84
|
+
const relPath = entry.relativePath;
|
|
85
|
+
const deleted = deleteNodesByFile(db, relPath);
|
|
85
86
|
nodesDeleted += deleted;
|
|
87
|
+
deleteRefsByFile(db, relPath);
|
|
88
|
+
deleteFileWordsByFile(db, relPath);
|
|
86
89
|
}
|
|
87
90
|
// Phase 2: Parse modified/created files with tree-sitter
|
|
88
91
|
const parser = await loadParser();
|
|
@@ -90,6 +93,7 @@ export async function refreshFiles(db, repoPath, dirtyFiles) {
|
|
|
90
93
|
const allDefinitions = [];
|
|
91
94
|
const callSites = [];
|
|
92
95
|
const insertedFilePaths = new Set();
|
|
96
|
+
const fileContents = new Map(); // for file_words rebuild
|
|
93
97
|
for (const entry of filesToProcess) {
|
|
94
98
|
const relPath = entry.relativePath;
|
|
95
99
|
const absPath = path.resolve(repoPath, relPath);
|
|
@@ -110,6 +114,7 @@ export async function refreshFiles(db, repoPath, dirtyFiles) {
|
|
|
110
114
|
filesSkipped++;
|
|
111
115
|
continue;
|
|
112
116
|
}
|
|
117
|
+
fileContents.set(relPath, content);
|
|
113
118
|
try {
|
|
114
119
|
await loadLanguage(language, relPath);
|
|
115
120
|
}
|
|
@@ -247,6 +252,37 @@ export async function refreshFiles(db, repoPath, dirtyFiles) {
|
|
|
247
252
|
});
|
|
248
253
|
edgesInserted++;
|
|
249
254
|
}
|
|
255
|
+
// Phase 3b+3c: Rebuild refs + file_words for dirty files
|
|
256
|
+
const STOP_WORDS = new Set(['the', 'and', 'for', 'from', 'with', 'this', 'that', 'have', 'has', 'not', 'are', 'was', 'were', 'been', 'being', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'does', 'did', 'let', 'var', 'const', 'new', 'return', 'function', 'class', 'import', 'export', 'default', 'void', 'null', 'undefined', 'true', 'false', 'else', 'case', 'break', 'continue', 'while', 'throw', 'catch', 'try', 'finally', 'async', 'await', 'yield', 'typeof', 'instanceof', 'delete', 'switch', 'interface', 'type', 'enum', 'extends', 'implements', 'static', 'private', 'public', 'protected', 'abstract', 'readonly', 'override', 'declare', 'module', 'namespace', 'require', 'string', 'number', 'boolean', 'object', 'any', 'never', 'unknown', 'symbol']);
|
|
257
|
+
// Phase 3b: Rebuild refs for dirty files (identifier occurrence index)
|
|
258
|
+
for (const [relPath, content] of fileContents) {
|
|
259
|
+
const refs = [];
|
|
260
|
+
const lines = content.split('\n');
|
|
261
|
+
const identRegex = /\b[a-zA-Z_]\w{2,}\b/g;
|
|
262
|
+
for (let lineIdx = 0; lineIdx < lines.length; lineIdx++) {
|
|
263
|
+
let match;
|
|
264
|
+
while ((match = identRegex.exec(lines[lineIdx])) !== null) {
|
|
265
|
+
if (!STOP_WORDS.has(match[0].toLowerCase())) {
|
|
266
|
+
refs.push({ symbol: match[0], filePath: relPath, line: lineIdx });
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
if (refs.length > 0)
|
|
271
|
+
insertRefsBatch(db, refs);
|
|
272
|
+
}
|
|
273
|
+
// Phase 3c: Rebuild file_words for dirty files (conceptual search index)
|
|
274
|
+
for (const [relPath, content] of fileContents) {
|
|
275
|
+
const wordSet = new Set();
|
|
276
|
+
const wordRegex = /\b[a-zA-Z]\w{2,}\b/g;
|
|
277
|
+
let match;
|
|
278
|
+
while ((match = wordRegex.exec(content)) !== null) {
|
|
279
|
+
const w = match[0].toLowerCase();
|
|
280
|
+
if (!STOP_WORDS.has(w))
|
|
281
|
+
wordSet.add(w);
|
|
282
|
+
}
|
|
283
|
+
if (wordSet.size > 0)
|
|
284
|
+
upsertFileWords(db, relPath, [...wordSet].join(' '));
|
|
285
|
+
}
|
|
250
286
|
// Phase 4 + 5: Resolve call edges and cross-file edges using tsgo LSP
|
|
251
287
|
// (TS/JS files only — tsgo is optional, skip if unavailable)
|
|
252
288
|
console.error(`Code Mapper: refresh tsgo init with repoPath=${repoPath}`);
|
|
@@ -40,10 +40,16 @@ export declare class LocalBackend {
|
|
|
40
40
|
private refreshLocks;
|
|
41
41
|
/** Per-repo tsgo LSP service instances for live semantic enrichment */
|
|
42
42
|
private tsgoServices;
|
|
43
|
+
/** Per-repo in-memory embedding cache: nodeId → Float32Array (256-dim) */
|
|
44
|
+
private embeddingCaches;
|
|
43
45
|
/** Get (or lazily start) a tsgo LSP service for a repo. Returns null if unavailable. */
|
|
44
46
|
private getTsgo;
|
|
45
47
|
/** Get (or lazily open) the SQLite database for a repo. */
|
|
46
48
|
private getDb;
|
|
49
|
+
/** Load all embeddings into memory for fast vector search */
|
|
50
|
+
private loadEmbeddingCache;
|
|
51
|
+
/** Search embeddings in memory — O(N) dot products, no disk I/O */
|
|
52
|
+
private searchEmbeddingsInMemory;
|
|
47
53
|
/** Hard ceiling — beyond this, incremental is unreliable, warn prominently */
|
|
48
54
|
private static readonly MAX_INCREMENTAL_FILES;
|
|
49
55
|
/** Start file system watcher for a repo to detect source changes */
|
|
@@ -125,6 +131,26 @@ export declare class LocalBackend {
|
|
|
125
131
|
* Semantic vector search helper
|
|
126
132
|
*/
|
|
127
133
|
private semanticSearch;
|
|
134
|
+
/**
|
|
135
|
+
* Refs-based search: find symbols referenced in files that contain the query identifiers.
|
|
136
|
+
* Bridges the gap between graph edges (incomplete) and grep (complete for exact names).
|
|
137
|
+
*/
|
|
138
|
+
private refsSearch;
|
|
139
|
+
/**
|
|
140
|
+
* File-words FTS search: find files whose content contains conceptual terms,
|
|
141
|
+
* then return the best symbol from each file. Bridges NL → code gap.
|
|
142
|
+
*/
|
|
143
|
+
private fileWordsSearch;
|
|
144
|
+
/**
|
|
145
|
+
* Query expansion via embedding nearest neighbors: embed the query,
|
|
146
|
+
* find 5 closest symbols, extract their names as BM25 expansion terms.
|
|
147
|
+
*/
|
|
148
|
+
private expandQueryViaNearestNeighbors;
|
|
149
|
+
/**
|
|
150
|
+
* Ripgrep fallback: when all search signals return sparse results,
|
|
151
|
+
* grep the repo for query terms to find any relevant files.
|
|
152
|
+
*/
|
|
153
|
+
private ripgrepFallback;
|
|
128
154
|
executeSql(repoName: string, query: string): Promise<any>;
|
|
129
155
|
private sqlQuery;
|
|
130
156
|
/** Format raw SQL result rows as a markdown table, with raw fallback */
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
import fs from 'fs/promises';
|
|
7
7
|
import path from 'path';
|
|
8
8
|
import { execFileSync } from 'child_process';
|
|
9
|
-
import { openDb, closeDb, getNode, findNodesByName, findNodesByFile, rawQuery,
|
|
9
|
+
import { openDb, closeDb, getNode, findNodesByName, findNodesByFile, rawQuery, countEmbeddings, searchFTS, queryChunked, findRefsBySymbol, searchFileWords } from '../../core/db/adapter.js';
|
|
10
10
|
import { toNodeId, assertEdgeType } from '../../core/db/schema.js';
|
|
11
11
|
import * as queries from '../../core/db/queries.js';
|
|
12
12
|
import { refreshFiles, refreshEmbeddings } from '../../core/incremental/refresh.js';
|
|
@@ -55,6 +55,8 @@ export class LocalBackend {
|
|
|
55
55
|
refreshLocks = new Map();
|
|
56
56
|
/** Per-repo tsgo LSP service instances for live semantic enrichment */
|
|
57
57
|
tsgoServices = new Map();
|
|
58
|
+
/** Per-repo in-memory embedding cache: nodeId → Float32Array (256-dim) */
|
|
59
|
+
embeddingCaches = new Map();
|
|
58
60
|
/** Get (or lazily start) a tsgo LSP service for a repo. Returns null if unavailable. */
|
|
59
61
|
async getTsgo(repo) {
|
|
60
62
|
const existing = this.tsgoServices.get(repo.id);
|
|
@@ -80,6 +82,64 @@ export class LocalBackend {
|
|
|
80
82
|
const dbPath = path.join(handle.storagePath, 'index.db');
|
|
81
83
|
return openDb(dbPath);
|
|
82
84
|
}
|
|
85
|
+
/** Load all embeddings into memory for fast vector search */
|
|
86
|
+
loadEmbeddingCache(repoId) {
|
|
87
|
+
try {
|
|
88
|
+
const db = this.getDb(repoId);
|
|
89
|
+
const rows = db.prepare('SELECT nodeId, embedding FROM embeddings').all();
|
|
90
|
+
if (rows.length === 0) {
|
|
91
|
+
this.embeddingCaches.delete(repoId);
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
const dims = rows[0].embedding.byteLength / 4;
|
|
95
|
+
const nodeIds = [];
|
|
96
|
+
const matrix = new Float32Array(rows.length * dims);
|
|
97
|
+
const norms = new Float32Array(rows.length);
|
|
98
|
+
for (let i = 0; i < rows.length; i++) {
|
|
99
|
+
const row = rows[i];
|
|
100
|
+
nodeIds.push(row.nodeId);
|
|
101
|
+
const vec = new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4);
|
|
102
|
+
matrix.set(vec, i * dims);
|
|
103
|
+
// Pre-compute norm for fast cosine similarity
|
|
104
|
+
let norm = 0;
|
|
105
|
+
for (let d = 0; d < dims; d++)
|
|
106
|
+
norm += vec[d] * vec[d];
|
|
107
|
+
norms[i] = Math.sqrt(norm);
|
|
108
|
+
}
|
|
109
|
+
this.embeddingCaches.set(repoId, { nodeIds, matrix, norms });
|
|
110
|
+
}
|
|
111
|
+
catch (err) {
|
|
112
|
+
console.error(`Code Mapper: Failed to load embedding cache: ${err instanceof Error ? err.message : err}`);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
/** Search embeddings in memory — O(N) dot products, no disk I/O */
|
|
116
|
+
searchEmbeddingsInMemory(repoId, queryVec, limit = 10, maxDistance = 0.5) {
|
|
117
|
+
const cache = this.embeddingCaches.get(repoId);
|
|
118
|
+
if (!cache || cache.nodeIds.length === 0)
|
|
119
|
+
return [];
|
|
120
|
+
const dims = queryVec.length;
|
|
121
|
+
const results = [];
|
|
122
|
+
// Pre-compute query norm
|
|
123
|
+
let qNorm = 0;
|
|
124
|
+
for (let d = 0; d < dims; d++)
|
|
125
|
+
qNorm += queryVec[d] * queryVec[d];
|
|
126
|
+
qNorm = Math.sqrt(qNorm);
|
|
127
|
+
if (qNorm === 0)
|
|
128
|
+
return [];
|
|
129
|
+
for (let i = 0; i < cache.nodeIds.length; i++) {
|
|
130
|
+
const offset = i * dims;
|
|
131
|
+
let dot = 0;
|
|
132
|
+
for (let d = 0; d < dims; d++)
|
|
133
|
+
dot += queryVec[d] * cache.matrix[offset + d];
|
|
134
|
+
const similarity = dot / (qNorm * cache.norms[i]);
|
|
135
|
+
const distance = 1 - similarity;
|
|
136
|
+
if (distance < maxDistance) {
|
|
137
|
+
results.push({ nodeId: cache.nodeIds[i], distance });
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
results.sort((a, b) => a.distance - b.distance);
|
|
141
|
+
return results.slice(0, limit);
|
|
142
|
+
}
|
|
83
143
|
/** Hard ceiling — beyond this, incremental is unreliable, warn prominently */
|
|
84
144
|
static MAX_INCREMENTAL_FILES = 200;
|
|
85
145
|
/** Start file system watcher for a repo to detect source changes */
|
|
@@ -188,6 +248,9 @@ export class LocalBackend {
|
|
|
188
248
|
const db = this.getDb(repo.id);
|
|
189
249
|
const hasEmb = (repo.stats?.embeddings ?? 0) > 0;
|
|
190
250
|
await refreshEmbeddings(db, dirtyFiles, hasEmb);
|
|
251
|
+
// Reload embedding cache after refresh
|
|
252
|
+
if (hasEmb)
|
|
253
|
+
this.loadEmbeddingCache(repo.id);
|
|
191
254
|
}
|
|
192
255
|
catch (err) {
|
|
193
256
|
watcher.inject(dirtyFiles);
|
|
@@ -206,10 +269,21 @@ export class LocalBackend {
|
|
|
206
269
|
async init() {
|
|
207
270
|
await this.refreshRepos();
|
|
208
271
|
// Start file watchers for incremental refresh
|
|
272
|
+
let anyEmbeddings = false;
|
|
209
273
|
for (const [id, handle] of this.repos) {
|
|
210
274
|
this.startWatcher(id, handle);
|
|
211
|
-
// Seed watcher with changes that happened while the server was down
|
|
212
275
|
this.seedWatcherFromGit(id, handle);
|
|
276
|
+
// Load embedding cache into memory for fast vector search
|
|
277
|
+
if ((handle.stats?.embeddings ?? 0) > 0) {
|
|
278
|
+
this.loadEmbeddingCache(id);
|
|
279
|
+
anyEmbeddings = true;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
// Pre-warm MLX embedder so first query has zero model-load latency
|
|
283
|
+
if (anyEmbeddings) {
|
|
284
|
+
import('../../core/embeddings/embedder.js').then(({ initEmbedder }) => {
|
|
285
|
+
initEmbedder().catch(() => { });
|
|
286
|
+
}).catch(() => { });
|
|
213
287
|
}
|
|
214
288
|
return this.repos.size > 0;
|
|
215
289
|
}
|
|
@@ -918,15 +992,23 @@ export class LocalBackend {
|
|
|
918
992
|
// Enrich semantic query with task_context/goal for better embeddings
|
|
919
993
|
const semanticQuery = [searchQuery, params.goal, params.task_context]
|
|
920
994
|
.filter(Boolean).join(' — ');
|
|
921
|
-
// Step
|
|
922
|
-
//
|
|
995
|
+
// Step 0: Query expansion via nearest-neighbor embeddings
|
|
996
|
+
// "double charging" → finds "idempotencyKey" nearby → BM25 now searches for it
|
|
997
|
+
const { expandQuery } = await import('../../core/search/query-expansion.js');
|
|
998
|
+
const nnExpansionTerms = await this.expandQueryViaNearestNeighbors(repo, semanticQuery);
|
|
999
|
+
const expandedSearchQuery = nnExpansionTerms.length > 0
|
|
1000
|
+
? expandQuery(searchQuery, nnExpansionTerms)
|
|
1001
|
+
: searchQuery;
|
|
1002
|
+
// Step 1: Four-signal search in parallel
|
|
1003
|
+
// BM25 uses expanded query; semantic uses enriched query; refs + file_words use raw query
|
|
923
1004
|
const searchLimit = processLimit * maxSymbolsPerProcess;
|
|
924
|
-
const [bm25Results, semanticResults] = await Promise.all([
|
|
925
|
-
this.bm25Search(repo,
|
|
1005
|
+
const [bm25Results, semanticResults, refsResults, fileWordsResults] = await Promise.all([
|
|
1006
|
+
this.bm25Search(repo, expandedSearchQuery, searchLimit),
|
|
926
1007
|
this.semanticSearch(repo, semanticQuery, searchLimit),
|
|
1008
|
+
Promise.resolve(this.refsSearch(repo, searchQuery, searchLimit)),
|
|
1009
|
+
Promise.resolve(this.fileWordsSearch(repo, searchQuery, searchLimit)),
|
|
927
1010
|
]);
|
|
928
|
-
// Step 2: Weighted RRF merge (
|
|
929
|
-
// BM25 now returns symbol-level results with nodeId, name, type
|
|
1011
|
+
// Step 2: Weighted RRF merge (4 signals)
|
|
930
1012
|
const bm25ForRRF = bm25Results.map((r, i) => ({
|
|
931
1013
|
nodeId: String(r.nodeId ?? ''),
|
|
932
1014
|
name: String(r.name ?? ''),
|
|
@@ -943,7 +1025,52 @@ export class LocalBackend {
|
|
|
943
1025
|
...(r.startLine != null ? { startLine: r.startLine } : {}),
|
|
944
1026
|
...(r.endLine != null ? { endLine: r.endLine } : {}),
|
|
945
1027
|
}));
|
|
946
|
-
|
|
1028
|
+
// Refs and file_words use BM25-compatible format for RRF
|
|
1029
|
+
const refsForRRF = refsResults.map((r, i) => ({
|
|
1030
|
+
nodeId: String(r.nodeId ?? ''), name: String(r.name ?? ''), type: String(r.type ?? 'File'),
|
|
1031
|
+
filePath: String(r.filePath ?? ''), score: 1.0, rank: i + 1,
|
|
1032
|
+
...(r.startLine != null ? { startLine: r.startLine } : {}),
|
|
1033
|
+
...(r.endLine != null ? { endLine: r.endLine } : {}),
|
|
1034
|
+
}));
|
|
1035
|
+
const fileWordsForRRF = fileWordsResults.map((r, i) => ({
|
|
1036
|
+
nodeId: String(r.nodeId ?? ''), name: String(r.name ?? ''), type: String(r.type ?? 'File'),
|
|
1037
|
+
filePath: String(r.filePath ?? ''), score: 1.0, rank: i + 1,
|
|
1038
|
+
...(r.startLine != null ? { startLine: r.startLine } : {}),
|
|
1039
|
+
...(r.endLine != null ? { endLine: r.endLine } : {}),
|
|
1040
|
+
}));
|
|
1041
|
+
let rrfMerged = mergeWithRRF(bm25ForRRF, semanticForRRF, { limit: searchLimit });
|
|
1042
|
+
// Merge refs + fileWords into the RRF results (lower weight)
|
|
1043
|
+
if (refsForRRF.length > 0 || fileWordsForRRF.length > 0) {
|
|
1044
|
+
const supplemental = mergeWithRRF(refsForRRF, fileWordsForRRF.map((r) => ({
|
|
1045
|
+
nodeId: r.nodeId, name: r.name, label: r.type, filePath: r.filePath, distance: 0.5,
|
|
1046
|
+
...(r.startLine != null ? { startLine: r.startLine } : {}),
|
|
1047
|
+
...(r.endLine != null ? { endLine: r.endLine } : {}),
|
|
1048
|
+
})), { limit: searchLimit });
|
|
1049
|
+
// Add supplemental results not already in main merge
|
|
1050
|
+
const mainIds = new Set(rrfMerged.map(r => r.nodeId || r.filePath));
|
|
1051
|
+
for (const s of supplemental) {
|
|
1052
|
+
const key = s.nodeId || s.filePath;
|
|
1053
|
+
if (!mainIds.has(key)) {
|
|
1054
|
+
rrfMerged.push({ ...s, score: s.score * 0.5 }); // lower weight for supplemental
|
|
1055
|
+
mainIds.add(key);
|
|
1056
|
+
}
|
|
1057
|
+
}
|
|
1058
|
+
rrfMerged.sort((a, b) => b.score - a.score);
|
|
1059
|
+
rrfMerged = rrfMerged.slice(0, searchLimit);
|
|
1060
|
+
}
|
|
1061
|
+
// Step 2b: Ripgrep fallback when all signals return sparse results
|
|
1062
|
+
if (rrfMerged.length < 3) {
|
|
1063
|
+
const rgResults = this.ripgrepFallback(repo, searchQuery, 10);
|
|
1064
|
+
for (const rg of rgResults) {
|
|
1065
|
+
if (!rrfMerged.some(m => m.filePath === rg.filePath)) {
|
|
1066
|
+
rrfMerged.push({
|
|
1067
|
+
filePath: rg.filePath, score: 0.01, rank: rrfMerged.length + 1,
|
|
1068
|
+
sources: ['bm25'], nodeId: rg.nodeId, name: rg.name, label: rg.type,
|
|
1069
|
+
startLine: rg.startLine, endLine: rg.endLine,
|
|
1070
|
+
});
|
|
1071
|
+
}
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
947
1074
|
// Build lookup from original search data (keyed by both nodeId and filePath for cross-referencing)
|
|
948
1075
|
const searchDataMap = new Map();
|
|
949
1076
|
for (const r of bm25Results) {
|
|
@@ -1341,25 +1468,34 @@ export class LocalBackend {
|
|
|
1341
1468
|
*/
|
|
1342
1469
|
async semanticSearch(repo, query, limit) {
|
|
1343
1470
|
try {
|
|
1344
|
-
//
|
|
1345
|
-
const
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1471
|
+
// Use in-memory cache if available (10-100x faster than SQLite scan)
|
|
1472
|
+
const cache = this.embeddingCaches.get(repo.id);
|
|
1473
|
+
if (!cache || cache.nodeIds.length === 0) {
|
|
1474
|
+
// Fallback: check DB directly
|
|
1475
|
+
const checkDb = this.getDb(repo.id);
|
|
1476
|
+
const embCount = countEmbeddings(checkDb);
|
|
1477
|
+
if (embCount === 0)
|
|
1478
|
+
return [];
|
|
1479
|
+
// Load cache on demand
|
|
1480
|
+
this.loadEmbeddingCache(repo.id);
|
|
1481
|
+
if (!this.embeddingCaches.get(repo.id))
|
|
1482
|
+
return [];
|
|
1483
|
+
}
|
|
1349
1484
|
const { DEFAULT_MAX_SEMANTIC_DISTANCE } = await import('../../core/search/types.js');
|
|
1350
1485
|
const { embedQuery } = await import('../../core/embeddings/embedder.js');
|
|
1351
1486
|
const queryVec = await embedQuery(query);
|
|
1352
|
-
//
|
|
1353
|
-
const vecResults =
|
|
1487
|
+
// In-memory cosine search — no disk I/O
|
|
1488
|
+
const vecResults = this.searchEmbeddingsInMemory(repo.id, queryVec, limit, DEFAULT_MAX_SEMANTIC_DISTANCE);
|
|
1354
1489
|
if (vecResults.length === 0)
|
|
1355
1490
|
return [];
|
|
1356
1491
|
// Batch metadata fetch
|
|
1492
|
+
const metaDb = this.getDb(repo.id);
|
|
1357
1493
|
const vecNodeIds = vecResults.map(r => r.nodeId);
|
|
1358
1494
|
const distanceMap = new Map();
|
|
1359
1495
|
for (const r of vecResults) {
|
|
1360
1496
|
distanceMap.set(r.nodeId, r.distance);
|
|
1361
1497
|
}
|
|
1362
|
-
const metaNodes = queries.findNodesByIds(
|
|
1498
|
+
const metaNodes = queries.findNodesByIds(metaDb, vecNodeIds);
|
|
1363
1499
|
return metaNodes.map(node => ({
|
|
1364
1500
|
nodeId: node.id,
|
|
1365
1501
|
name: node.name,
|
|
@@ -1375,6 +1511,143 @@ export class LocalBackend {
|
|
|
1375
1511
|
return [];
|
|
1376
1512
|
}
|
|
1377
1513
|
}
|
|
1514
|
+
/**
|
|
1515
|
+
* Refs-based search: find symbols referenced in files that contain the query identifiers.
|
|
1516
|
+
* Bridges the gap between graph edges (incomplete) and grep (complete for exact names).
|
|
1517
|
+
*/
|
|
1518
|
+
refsSearch(repo, query, limit) {
|
|
1519
|
+
try {
|
|
1520
|
+
const db = this.getDb(repo.id);
|
|
1521
|
+
// Tokenize query into potential identifier names
|
|
1522
|
+
const tokens = query.match(/\b[a-zA-Z_]\w{2,}\b/g) || [];
|
|
1523
|
+
if (tokens.length === 0)
|
|
1524
|
+
return [];
|
|
1525
|
+
const seen = new Set();
|
|
1526
|
+
const results = [];
|
|
1527
|
+
for (const token of tokens) {
|
|
1528
|
+
const refs = findRefsBySymbol(db, token, 50);
|
|
1529
|
+
for (const ref of refs) {
|
|
1530
|
+
if (seen.has(ref.filePath))
|
|
1531
|
+
continue;
|
|
1532
|
+
seen.add(ref.filePath);
|
|
1533
|
+
// Find the best symbol in this file
|
|
1534
|
+
const fileNodes = findNodesByFile(db, ref.filePath);
|
|
1535
|
+
const bestNode = fileNodes.find(n => n.label !== 'File') || fileNodes[0];
|
|
1536
|
+
if (bestNode) {
|
|
1537
|
+
results.push({
|
|
1538
|
+
nodeId: bestNode.id,
|
|
1539
|
+
name: bestNode.name,
|
|
1540
|
+
type: bestNode.label,
|
|
1541
|
+
filePath: bestNode.filePath,
|
|
1542
|
+
startLine: bestNode.startLine,
|
|
1543
|
+
endLine: bestNode.endLine,
|
|
1544
|
+
refsScore: -results.length, // rank by order found
|
|
1545
|
+
});
|
|
1546
|
+
}
|
|
1547
|
+
if (results.length >= limit)
|
|
1548
|
+
break;
|
|
1549
|
+
}
|
|
1550
|
+
if (results.length >= limit)
|
|
1551
|
+
break;
|
|
1552
|
+
}
|
|
1553
|
+
return results;
|
|
1554
|
+
}
|
|
1555
|
+
catch {
|
|
1556
|
+
return [];
|
|
1557
|
+
}
|
|
1558
|
+
}
|
|
1559
|
+
/**
|
|
1560
|
+
* File-words FTS search: find files whose content contains conceptual terms,
|
|
1561
|
+
* then return the best symbol from each file. Bridges NL → code gap.
|
|
1562
|
+
*/
|
|
1563
|
+
fileWordsSearch(repo, query, limit) {
|
|
1564
|
+
try {
|
|
1565
|
+
const db = this.getDb(repo.id);
|
|
1566
|
+
const fileResults = searchFileWords(db, query, limit);
|
|
1567
|
+
if (fileResults.length === 0)
|
|
1568
|
+
return [];
|
|
1569
|
+
const results = [];
|
|
1570
|
+
for (const fr of fileResults) {
|
|
1571
|
+
const fileNodes = findNodesByFile(db, fr.filePath);
|
|
1572
|
+
const bestNode = fileNodes.find(n => n.label !== 'File') || fileNodes[0];
|
|
1573
|
+
if (bestNode) {
|
|
1574
|
+
results.push({
|
|
1575
|
+
nodeId: bestNode.id,
|
|
1576
|
+
name: bestNode.name,
|
|
1577
|
+
type: bestNode.label,
|
|
1578
|
+
filePath: bestNode.filePath,
|
|
1579
|
+
startLine: bestNode.startLine,
|
|
1580
|
+
endLine: bestNode.endLine,
|
|
1581
|
+
fileWordsScore: fr.score,
|
|
1582
|
+
});
|
|
1583
|
+
}
|
|
1584
|
+
}
|
|
1585
|
+
return results;
|
|
1586
|
+
}
|
|
1587
|
+
catch {
|
|
1588
|
+
return [];
|
|
1589
|
+
}
|
|
1590
|
+
}
|
|
1591
|
+
/**
|
|
1592
|
+
* Query expansion via embedding nearest neighbors: embed the query,
|
|
1593
|
+
* find 5 closest symbols, extract their names as BM25 expansion terms.
|
|
1594
|
+
*/
|
|
1595
|
+
async expandQueryViaNearestNeighbors(repo, query) {
|
|
1596
|
+
try {
|
|
1597
|
+
const cache = this.embeddingCaches.get(repo.id);
|
|
1598
|
+
if (!cache || cache.nodeIds.length === 0)
|
|
1599
|
+
return [];
|
|
1600
|
+
const { embedQuery } = await import('../../core/embeddings/embedder.js');
|
|
1601
|
+
const queryVec = await embedQuery(query);
|
|
1602
|
+
const neighbors = this.searchEmbeddingsInMemory(repo.id, queryVec, 5, 0.7);
|
|
1603
|
+
// Extract symbol names from nodeIds (format: "Label:filePath:name")
|
|
1604
|
+
return neighbors.map(n => {
|
|
1605
|
+
const parts = n.nodeId.split(':');
|
|
1606
|
+
return parts[parts.length - 1] || '';
|
|
1607
|
+
}).filter(Boolean);
|
|
1608
|
+
}
|
|
1609
|
+
catch {
|
|
1610
|
+
return [];
|
|
1611
|
+
}
|
|
1612
|
+
}
|
|
1613
|
+
/**
|
|
1614
|
+
* Ripgrep fallback: when all search signals return sparse results,
|
|
1615
|
+
* grep the repo for query terms to find any relevant files.
|
|
1616
|
+
*/
|
|
1617
|
+
ripgrepFallback(repo, query, limit) {
|
|
1618
|
+
try {
|
|
1619
|
+
const words = query.split(/\s+/).filter(w => w.length > 2).slice(0, 3);
|
|
1620
|
+
if (words.length === 0)
|
|
1621
|
+
return [];
|
|
1622
|
+
const pattern = words.join('|');
|
|
1623
|
+
const output = execFileSync('rg', ['-l', '-i', '--max-count', '1', pattern, '.'], {
|
|
1624
|
+
cwd: repo.repoPath, encoding: 'utf-8', timeout: 3000,
|
|
1625
|
+
});
|
|
1626
|
+
const db = this.getDb(repo.id);
|
|
1627
|
+
const results = [];
|
|
1628
|
+
const files = output.trim().split('\n').filter(Boolean).slice(0, limit);
|
|
1629
|
+
for (const f of files) {
|
|
1630
|
+
const relPath = f.replace(/^\.\//, '');
|
|
1631
|
+
const fileNodes = findNodesByFile(db, relPath);
|
|
1632
|
+
const bestNode = fileNodes.find(n => n.label !== 'File') || fileNodes[0];
|
|
1633
|
+
if (bestNode) {
|
|
1634
|
+
results.push({
|
|
1635
|
+
nodeId: bestNode.id,
|
|
1636
|
+
name: bestNode.name,
|
|
1637
|
+
type: bestNode.label,
|
|
1638
|
+
filePath: bestNode.filePath,
|
|
1639
|
+
startLine: bestNode.startLine,
|
|
1640
|
+
endLine: bestNode.endLine,
|
|
1641
|
+
matched_by: 'ripgrep',
|
|
1642
|
+
});
|
|
1643
|
+
}
|
|
1644
|
+
}
|
|
1645
|
+
return results;
|
|
1646
|
+
}
|
|
1647
|
+
catch {
|
|
1648
|
+
return [];
|
|
1649
|
+
}
|
|
1650
|
+
}
|
|
1378
1651
|
async executeSql(repoName, query) {
|
|
1379
1652
|
const repo = await this.resolveRepo(repoName);
|
|
1380
1653
|
return this.sqlQuery(repo, { query });
|
|
@@ -1758,6 +2031,26 @@ export class LocalBackend {
|
|
|
1758
2031
|
// tsgo reference lookup failed — non-fatal, graph results still available
|
|
1759
2032
|
}
|
|
1760
2033
|
}
|
|
2034
|
+
// Supplement callers from refs table (catches callers the graph missed)
|
|
2035
|
+
try {
|
|
2036
|
+
const refCallers = findRefsBySymbol(db, sym.name, 100);
|
|
2037
|
+
const knownFiles = new Set(incomingRows.map(r => r.filePath));
|
|
2038
|
+
for (const ref of refCallers) {
|
|
2039
|
+
if (ref.filePath === sym.filePath)
|
|
2040
|
+
continue; // skip self-file
|
|
2041
|
+
if (knownFiles.has(ref.filePath))
|
|
2042
|
+
continue; // already known
|
|
2043
|
+
knownFiles.add(ref.filePath);
|
|
2044
|
+
const enclosing = this.findNodeAtPosition(db, ref.filePath, ref.line);
|
|
2045
|
+
if (enclosing) {
|
|
2046
|
+
incomingRows.push({
|
|
2047
|
+
relType: 'CALLS', uid: '', name: enclosing.name, filePath: ref.filePath,
|
|
2048
|
+
kind: enclosing.label, startLine: ref.line, reason: 'refs-index',
|
|
2049
|
+
});
|
|
2050
|
+
}
|
|
2051
|
+
}
|
|
2052
|
+
}
|
|
2053
|
+
catch { /* refs table may not exist yet */ }
|
|
1761
2054
|
// Outgoing refs — exclude generic method names that produce false positives at low confidence
|
|
1762
2055
|
const GENERIC_NAMES_EXCLUDE = new Set(['has', 'get', 'set', 'add', 'remove', 'delete', 'close', 'stop', 'clear', 'reset', 'toString', 'valueOf', 'push', 'pop', 'entries', 'keys', 'values']);
|
|
1763
2056
|
let outgoingRows = [];
|
package/models/mlx-embedder.py
CHANGED
|
@@ -228,7 +228,7 @@ def batch_mode(db_path, dims=256, max_tokens=2048):
|
|
|
228
228
|
db.execute("ALTER TABLE embeddings ADD COLUMN textHash TEXT")
|
|
229
229
|
|
|
230
230
|
# Query embeddable nodes — skip test/fixture files (BM25 covers them)
|
|
231
|
-
labels = ('Function', 'Class', 'Method', 'Interface')
|
|
231
|
+
labels = ('Function', 'Class', 'Method', 'Interface', 'Const', 'Enum', 'TypeAlias', 'Namespace', 'Module', 'Struct')
|
|
232
232
|
placeholders = ','.join('?' * len(labels))
|
|
233
233
|
all_rows = db.execute(
|
|
234
234
|
f"SELECT id, name, label, filePath, content, startLine, endLine, nameExpanded FROM nodes WHERE label IN ({placeholders})",
|
|
@@ -272,7 +272,23 @@ def batch_mode(db_path, dims=256, max_tokens=2048):
|
|
|
272
272
|
for row in db.execute(f"SELECT e.sourceId, c.heuristicLabel FROM edges e JOIN nodes c ON c.id = e.targetId WHERE e.sourceId IN ({ph}) AND e.type = 'MEMBER_OF' AND c.label = 'Community' LIMIT {len(chunk_ids)}", chunk_ids):
|
|
273
273
|
module_map[row[0]] = row[1]
|
|
274
274
|
|
|
275
|
-
|
|
275
|
+
# Batch fetch import names per file
|
|
276
|
+
import_map = {}
|
|
277
|
+
for ci in range(0, len(node_ids), CHUNK):
|
|
278
|
+
chunk_ids = node_ids[ci:ci+CHUNK]
|
|
279
|
+
ph = ','.join('?' * len(chunk_ids))
|
|
280
|
+
# Get unique file paths for these nodes
|
|
281
|
+
file_paths = [r[3] for r in rows if r[0] in set(chunk_ids)]
|
|
282
|
+
unique_files = list(set(file_paths))
|
|
283
|
+
if unique_files:
|
|
284
|
+
fph = ','.join('?' * len(unique_files))
|
|
285
|
+
for row in db.execute(
|
|
286
|
+
f"SELECT DISTINCT n.filePath, tn.name FROM nodes n JOIN edges e ON e.sourceId = n.id AND e.type = 'IMPORTS' JOIN nodes tn ON tn.id = e.targetId WHERE n.filePath IN ({fph}) LIMIT {len(unique_files)*10}",
|
|
287
|
+
unique_files
|
|
288
|
+
):
|
|
289
|
+
import_map.setdefault(row[0], []).append(row[1])
|
|
290
|
+
|
|
291
|
+
print(json.dumps({"phase": "context", "with_callers": len(caller_map), "with_module": len(module_map), "with_imports": len(import_map)}), flush=True)
|
|
276
292
|
|
|
277
293
|
# Get existing text hashes for skip detection
|
|
278
294
|
existing_hashes = {}
|
|
@@ -367,13 +383,24 @@ def batch_mode(db_path, dims=256, max_tokens=2048):
|
|
|
367
383
|
if comment:
|
|
368
384
|
parts.append(comment)
|
|
369
385
|
|
|
386
|
+
# Directory context
|
|
387
|
+
dir_parts = filePath.rsplit('/', 2)
|
|
388
|
+
dir_context = '/'.join(dir_parts[:-1])[-40:] if '/' in filePath else ''
|
|
389
|
+
|
|
370
390
|
# File + module location
|
|
371
391
|
loc = f"File: {file_name}"
|
|
392
|
+
if dir_context:
|
|
393
|
+
loc += f" in {dir_context}"
|
|
372
394
|
module = module_map.get(nid, "")
|
|
373
395
|
if module:
|
|
374
396
|
loc += f" | Module: {module}"
|
|
375
397
|
parts.append(loc)
|
|
376
398
|
|
|
399
|
+
# Import context
|
|
400
|
+
file_imports = import_map.get(filePath, [])[:5]
|
|
401
|
+
if file_imports:
|
|
402
|
+
parts.append(f"Imports: {', '.join(file_imports)}")
|
|
403
|
+
|
|
377
404
|
# Graph context
|
|
378
405
|
callers = caller_map.get(nid, [])[:5]
|
|
379
406
|
callees = callee_map.get(nid, [])[:5]
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@zuvia-software-solutions/code-mapper",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.4.0",
|
|
4
4
|
"description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
|
|
5
5
|
"author": "Abhigyan Patwari",
|
|
6
6
|
"license": "PolyForm-Noncommercial-1.0.0",
|