@zuvia-software-solutions/code-mapper 2.3.12 → 2.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@
6
6
  import fs from 'fs/promises';
7
7
  import path from 'path';
8
8
  import { execFileSync } from 'child_process';
9
- import { openDb, closeDb, getNode, findNodesByName, findNodesByFile, rawQuery, searchVector, countEmbeddings, searchFTS, queryChunked } from '../../core/db/adapter.js';
9
+ import { openDb, closeDb, getNode, findNodesByName, findNodesByFile, rawQuery, countEmbeddings, searchFTS, queryChunked, findRefsBySymbol, searchFileWords } from '../../core/db/adapter.js';
10
10
  import { toNodeId, assertEdgeType } from '../../core/db/schema.js';
11
11
  import * as queries from '../../core/db/queries.js';
12
12
  import { refreshFiles, refreshEmbeddings } from '../../core/incremental/refresh.js';
@@ -55,6 +55,10 @@ export class LocalBackend {
55
55
  refreshLocks = new Map();
56
56
  /** Per-repo tsgo LSP service instances for live semantic enrichment */
57
57
  tsgoServices = new Map();
58
+ /** Per-repo in-memory embedding cache: nodeId → Float32Array (256-dim) */
59
+ embeddingCaches = new Map();
60
+ /** Per-repo in-memory NL embedding cache: includes source text for match_reason */
61
+ nlEmbeddingCaches = new Map();
58
62
  /** Get (or lazily start) a tsgo LSP service for a repo. Returns null if unavailable. */
59
63
  async getTsgo(repo) {
60
64
  const existing = this.tsgoServices.get(repo.id);
@@ -80,6 +84,128 @@ export class LocalBackend {
80
84
  const dbPath = path.join(handle.storagePath, 'index.db');
81
85
  return openDb(dbPath);
82
86
  }
87
+ /** Load all embeddings into memory for fast vector search */
88
+ loadEmbeddingCache(repoId) {
89
+ try {
90
+ const db = this.getDb(repoId);
91
+ const rows = db.prepare('SELECT nodeId, embedding FROM embeddings').all();
92
+ if (rows.length === 0) {
93
+ this.embeddingCaches.delete(repoId);
94
+ return;
95
+ }
96
+ const dims = rows[0].embedding.byteLength / 4;
97
+ const nodeIds = [];
98
+ const matrix = new Float32Array(rows.length * dims);
99
+ const norms = new Float32Array(rows.length);
100
+ for (let i = 0; i < rows.length; i++) {
101
+ const row = rows[i];
102
+ nodeIds.push(row.nodeId);
103
+ const vec = new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4);
104
+ matrix.set(vec, i * dims);
105
+ // Pre-compute norm for fast cosine similarity
106
+ let norm = 0;
107
+ for (let d = 0; d < dims; d++)
108
+ norm += vec[d] * vec[d];
109
+ norms[i] = Math.sqrt(norm);
110
+ }
111
+ this.embeddingCaches.set(repoId, { nodeIds, matrix, norms });
112
+ }
113
+ catch (err) {
114
+ console.error(`Code Mapper: Failed to load embedding cache: ${err instanceof Error ? err.message : err}`);
115
+ }
116
+ }
117
+ /** Search embeddings in memory — O(N) dot products, no disk I/O */
118
+ searchEmbeddingsInMemory(repoId, queryVec, limit = 10, maxDistance = 0.5) {
119
+ const cache = this.embeddingCaches.get(repoId);
120
+ if (!cache || cache.nodeIds.length === 0)
121
+ return [];
122
+ const dims = queryVec.length;
123
+ const results = [];
124
+ // Pre-compute query norm
125
+ let qNorm = 0;
126
+ for (let d = 0; d < dims; d++)
127
+ qNorm += queryVec[d] * queryVec[d];
128
+ qNorm = Math.sqrt(qNorm);
129
+ if (qNorm === 0)
130
+ return [];
131
+ for (let i = 0; i < cache.nodeIds.length; i++) {
132
+ const offset = i * dims;
133
+ let dot = 0;
134
+ for (let d = 0; d < dims; d++)
135
+ dot += queryVec[d] * cache.matrix[offset + d];
136
+ const similarity = dot / (qNorm * cache.norms[i]);
137
+ const distance = 1 - similarity;
138
+ if (distance < maxDistance) {
139
+ results.push({ nodeId: cache.nodeIds[i], distance });
140
+ }
141
+ }
142
+ results.sort((a, b) => a.distance - b.distance);
143
+ return results.slice(0, limit);
144
+ }
145
+ /** Load NL embeddings into memory for fast conceptual search */
146
+ loadNlEmbeddingCache(repoId) {
147
+ try {
148
+ const db = this.getDb(repoId);
149
+ let rows;
150
+ try {
151
+ rows = db.prepare('SELECT nodeId, embedding, text FROM nl_embeddings').all();
152
+ }
153
+ catch {
154
+ return;
155
+ } // table might not exist
156
+ if (rows.length === 0) {
157
+ this.nlEmbeddingCaches.delete(repoId);
158
+ return;
159
+ }
160
+ const dims = rows[0].embedding.byteLength / 4;
161
+ const nodeIds = [];
162
+ const texts = [];
163
+ const matrix = new Float32Array(rows.length * dims);
164
+ const norms = new Float32Array(rows.length);
165
+ for (let i = 0; i < rows.length; i++) {
166
+ const row = rows[i];
167
+ nodeIds.push(row.nodeId);
168
+ texts.push(row.text);
169
+ const vec = new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4);
170
+ matrix.set(vec, i * dims);
171
+ let norm = 0;
172
+ for (let d = 0; d < dims; d++)
173
+ norm += vec[d] * vec[d];
174
+ norms[i] = Math.sqrt(norm);
175
+ }
176
+ this.nlEmbeddingCaches.set(repoId, { nodeIds, texts, matrix, norms });
177
+ }
178
+ catch { /* NL embeddings not available */ }
179
+ }
180
+ /** Search NL embeddings in memory, returns match_reason text */
181
+ searchNlEmbeddingsInMemory(repoId, queryVec, limit = 10, maxDistance = 0.5) {
182
+ const cache = this.nlEmbeddingCaches.get(repoId);
183
+ if (!cache || cache.nodeIds.length === 0)
184
+ return [];
185
+ const dims = queryVec.length;
186
+ const results = [];
187
+ let qNorm = 0;
188
+ for (let d = 0; d < dims; d++)
189
+ qNorm += queryVec[d] * queryVec[d];
190
+ qNorm = Math.sqrt(qNorm);
191
+ if (qNorm === 0)
192
+ return [];
193
+ const cacheDims = cache.matrix.length / cache.nodeIds.length;
194
+ for (let i = 0; i < cache.nodeIds.length; i++) {
195
+ const offset = i * cacheDims;
196
+ let dot = 0;
197
+ const minDims = Math.min(dims, cacheDims);
198
+ for (let d = 0; d < minDims; d++)
199
+ dot += queryVec[d] * cache.matrix[offset + d];
200
+ const similarity = dot / (qNorm * cache.norms[i]);
201
+ const distance = 1 - similarity;
202
+ if (distance < maxDistance) {
203
+ results.push({ nodeId: cache.nodeIds[i], distance, text: cache.texts[i] });
204
+ }
205
+ }
206
+ results.sort((a, b) => a.distance - b.distance);
207
+ return results.slice(0, limit);
208
+ }
83
209
  /** Hard ceiling — beyond this, incremental is unreliable, warn prominently */
84
210
  static MAX_INCREMENTAL_FILES = 200;
85
211
  /** Start file system watcher for a repo to detect source changes */
@@ -188,6 +314,9 @@ export class LocalBackend {
188
314
  const db = this.getDb(repo.id);
189
315
  const hasEmb = (repo.stats?.embeddings ?? 0) > 0;
190
316
  await refreshEmbeddings(db, dirtyFiles, hasEmb);
317
+ // Reload embedding cache after refresh
318
+ if (hasEmb)
319
+ this.loadEmbeddingCache(repo.id);
191
320
  }
192
321
  catch (err) {
193
322
  watcher.inject(dirtyFiles);
@@ -206,10 +335,22 @@ export class LocalBackend {
206
335
  async init() {
207
336
  await this.refreshRepos();
208
337
  // Start file watchers for incremental refresh
338
+ let anyEmbeddings = false;
209
339
  for (const [id, handle] of this.repos) {
210
340
  this.startWatcher(id, handle);
211
- // Seed watcher with changes that happened while the server was down
212
341
  this.seedWatcherFromGit(id, handle);
342
+ // Load embedding caches into memory for fast vector search
343
+ if ((handle.stats?.embeddings ?? 0) > 0) {
344
+ this.loadEmbeddingCache(id);
345
+ anyEmbeddings = true;
346
+ }
347
+ this.loadNlEmbeddingCache(id); // NL cache loaded regardless (cheap, may not exist)
348
+ }
349
+ // Pre-warm MLX embedder so first query has zero model-load latency
350
+ if (anyEmbeddings) {
351
+ import('../../core/embeddings/embedder.js').then(({ initEmbedder }) => {
352
+ initEmbedder().catch(() => { });
353
+ }).catch(() => { });
213
354
  }
214
355
  return this.repos.size > 0;
215
356
  }
@@ -518,6 +659,9 @@ export class LocalBackend {
518
659
  const end = d.endLine || 0;
519
660
  const isSmall = end > 0 && start > 0 && (end - start) < 10;
520
661
  lines.push(` ${sig} — ${d.type} @ ${this.shortPath(d.filePath)}:${start || '?'}${mod}`);
662
+ if (d.match_reason) {
663
+ lines.push(` _"${d.match_reason}"_`);
664
+ }
521
665
  if (isSmall && d.content) {
522
666
  const src = String(d.content).trim();
523
667
  if (src.length < 500) {
@@ -597,6 +741,9 @@ export class LocalBackend {
597
741
  renderFlows(2);
598
742
  }
599
743
  }
744
+ lines.push('');
745
+ lines.push('---');
746
+ lines.push('_Note: Results ranked by BM25 keyword + semantic + refs + file-content signals. Natural language queries may miss code using different terminology. If results seem incomplete, try specific identifiers or `grep -rn "keyword" --include="*.ts"` for exhaustive search._');
600
747
  return lines.join('\n');
601
748
  }
602
749
  formatContextAsText(result) {
@@ -702,6 +849,10 @@ export class LocalBackend {
702
849
  lines.push(` ${p.name} (step ${p.step_index}/${p.step_count})`);
703
850
  }
704
851
  }
852
+ // Guidance footer for agents
853
+ lines.push('');
854
+ lines.push('---');
855
+ lines.push('_Note: Callers are from graph edges + refs index. For widely-used symbols, verify completeness with `grep -rn "symbolName(" --include="*.ts"`. Outgoing calls may miss dynamic dispatch or reflection._');
705
856
  return lines.join('\n');
706
857
  }
707
858
  formatImpactAsText(result) {
@@ -748,6 +899,9 @@ export class LocalBackend {
748
899
  lines.push('');
749
900
  lines.push(`### Modules: ${mods.map((m) => `${m.name} (${m.hits} ${m.impact})`).join(' | ')}`);
750
901
  }
902
+ lines.push('');
903
+ lines.push('---');
904
+ lines.push('_Note: d=1 callers include graph edges + refs index. Indirect deps through dynamic dispatch, config, or type-only references may not appear. For critical changes, verify d=1 with `grep -rn "symbolName" --include="*.ts"`._');
751
905
  return lines.join('\n');
752
906
  }
753
907
  formatDetectChangesAsText(result) {
@@ -918,15 +1072,24 @@ export class LocalBackend {
918
1072
  // Enrich semantic query with task_context/goal for better embeddings
919
1073
  const semanticQuery = [searchQuery, params.goal, params.task_context]
920
1074
  .filter(Boolean).join(' — ');
921
- // Step 1: Hybrid search (BM25 + semantic in parallel)
922
- // BM25 uses raw query (keyword matching); semantic uses enriched query (better embedding)
1075
+ // Step 0: Query expansion via nearest-neighbor embeddings
1076
+ // "double charging" finds "idempotencyKey" nearby BM25 now searches for it
1077
+ const { expandQuery } = await import('../../core/search/query-expansion.js');
1078
+ const nnExpansionTerms = await this.expandQueryViaNearestNeighbors(repo, semanticQuery);
1079
+ const expandedSearchQuery = nnExpansionTerms.length > 0
1080
+ ? expandQuery(searchQuery, nnExpansionTerms)
1081
+ : searchQuery;
1082
+ // Step 1: Four-signal search in parallel
1083
+ // BM25 uses expanded query; semantic uses enriched query; refs + file_words use raw query
923
1084
  const searchLimit = processLimit * maxSymbolsPerProcess;
924
- const [bm25Results, semanticResults] = await Promise.all([
925
- this.bm25Search(repo, searchQuery, searchLimit),
1085
+ const [bm25Results, semanticResults, nlSemanticResults, refsResults, fileWordsResults] = await Promise.all([
1086
+ this.bm25Search(repo, expandedSearchQuery, searchLimit),
926
1087
  this.semanticSearch(repo, semanticQuery, searchLimit),
1088
+ this.nlSemanticSearch(repo, searchQuery, searchLimit),
1089
+ Promise.resolve(this.refsSearch(repo, searchQuery, searchLimit)),
1090
+ Promise.resolve(this.fileWordsSearch(repo, searchQuery, searchLimit)),
927
1091
  ]);
928
- // Step 2: Weighted RRF merge (single implementation — no duplication)
929
- // BM25 now returns symbol-level results with nodeId, name, type
1092
+ // Step 2: Weighted RRF merge (5 signals)
930
1093
  const bm25ForRRF = bm25Results.map((r, i) => ({
931
1094
  nodeId: String(r.nodeId ?? ''),
932
1095
  name: String(r.name ?? ''),
@@ -943,7 +1106,71 @@ export class LocalBackend {
943
1106
  ...(r.startLine != null ? { startLine: r.startLine } : {}),
944
1107
  ...(r.endLine != null ? { endLine: r.endLine } : {}),
945
1108
  }));
946
- const rrfMerged = mergeWithRRF(bm25ForRRF, semanticForRRF, { limit: searchLimit });
1109
+ // Refs and file_words use BM25-compatible format for RRF
1110
+ const refsForRRF = refsResults.map((r, i) => ({
1111
+ nodeId: String(r.nodeId ?? ''), name: String(r.name ?? ''), type: String(r.type ?? 'File'),
1112
+ filePath: String(r.filePath ?? ''), score: 1.0, rank: i + 1,
1113
+ ...(r.startLine != null ? { startLine: r.startLine } : {}),
1114
+ ...(r.endLine != null ? { endLine: r.endLine } : {}),
1115
+ }));
1116
+ const fileWordsForRRF = fileWordsResults.map((r, i) => ({
1117
+ nodeId: String(r.nodeId ?? ''), name: String(r.name ?? ''), type: String(r.type ?? 'File'),
1118
+ filePath: String(r.filePath ?? ''), score: 1.0, rank: i + 1,
1119
+ ...(r.startLine != null ? { startLine: r.startLine } : {}),
1120
+ ...(r.endLine != null ? { endLine: r.endLine } : {}),
1121
+ }));
1122
+ // NL semantic results get high weight — proven 100% recall on conceptual queries
1123
+ const nlForRRF = nlSemanticResults.map((r) => ({
1124
+ nodeId: String(r.nodeId ?? ''), name: String(r.name ?? ''), label: String(r.type ?? ''),
1125
+ filePath: String(r.filePath ?? ''), distance: Number(r.distance ?? 1),
1126
+ ...(r.startLine != null ? { startLine: r.startLine } : {}),
1127
+ ...(r.endLine != null ? { endLine: r.endLine } : {}),
1128
+ }));
1129
+ // Merge code + NL semantic into one semantic list (best of both worlds)
1130
+ const combinedSemantic = [...semanticForRRF, ...nlForRRF]
1131
+ .sort((a, b) => a.distance - b.distance)
1132
+ .filter((r, i, arr) => arr.findIndex(x => x.nodeId === r.nodeId) === i) // dedupe by nodeId
1133
+ .slice(0, searchLimit);
1134
+ let rrfMerged = mergeWithRRF(bm25ForRRF, combinedSemantic, { limit: searchLimit });
1135
+ // Store NL match reasons for display
1136
+ const nlMatchReasons = new Map();
1137
+ for (const r of nlSemanticResults) {
1138
+ if (r.match_reason && !nlMatchReasons.has(r.nodeId)) {
1139
+ nlMatchReasons.set(r.nodeId, r.match_reason);
1140
+ }
1141
+ }
1142
+ // Merge refs + fileWords into the RRF results (lower weight)
1143
+ if (refsForRRF.length > 0 || fileWordsForRRF.length > 0) {
1144
+ const supplemental = mergeWithRRF(refsForRRF, fileWordsForRRF.map((r) => ({
1145
+ nodeId: r.nodeId, name: r.name, label: r.type, filePath: r.filePath, distance: 0.5,
1146
+ ...(r.startLine != null ? { startLine: r.startLine } : {}),
1147
+ ...(r.endLine != null ? { endLine: r.endLine } : {}),
1148
+ })), { limit: searchLimit });
1149
+ // Add supplemental results not already in main merge
1150
+ const mainIds = new Set(rrfMerged.map(r => r.nodeId || r.filePath));
1151
+ for (const s of supplemental) {
1152
+ const key = s.nodeId || s.filePath;
1153
+ if (!mainIds.has(key)) {
1154
+ rrfMerged.push({ ...s, score: s.score * 0.5 }); // lower weight for supplemental
1155
+ mainIds.add(key);
1156
+ }
1157
+ }
1158
+ rrfMerged.sort((a, b) => b.score - a.score);
1159
+ rrfMerged = rrfMerged.slice(0, searchLimit);
1160
+ }
1161
+ // Step 2b: Ripgrep fallback when all signals return sparse results
1162
+ if (rrfMerged.length < 3) {
1163
+ const rgResults = this.ripgrepFallback(repo, searchQuery, 10);
1164
+ for (const rg of rgResults) {
1165
+ if (!rrfMerged.some(m => m.filePath === rg.filePath)) {
1166
+ rrfMerged.push({
1167
+ filePath: rg.filePath, score: 0.01, rank: rrfMerged.length + 1,
1168
+ sources: ['bm25'], nodeId: rg.nodeId, name: rg.name, label: rg.type,
1169
+ startLine: rg.startLine, endLine: rg.endLine,
1170
+ });
1171
+ }
1172
+ }
1173
+ }
947
1174
  // Build lookup from original search data (keyed by both nodeId and filePath for cross-referencing)
948
1175
  const searchDataMap = new Map();
949
1176
  for (const r of bm25Results) {
@@ -957,13 +1184,22 @@ export class LocalBackend {
957
1184
  if (!searchDataMap.has(key))
958
1185
  searchDataMap.set(key, r);
959
1186
  }
960
- let merged = rrfMerged.map(rrf => ({
961
- score: rrf.score,
962
- data: searchDataMap.get(rrf.nodeId ?? '') ?? searchDataMap.get(rrf.filePath) ?? {
1187
+ for (const r of nlSemanticResults) {
1188
+ const key = r.nodeId || r.filePath;
1189
+ if (!searchDataMap.has(key))
1190
+ searchDataMap.set(key, r);
1191
+ }
1192
+ let merged = rrfMerged.map(rrf => {
1193
+ const data = searchDataMap.get(rrf.nodeId ?? '') ?? searchDataMap.get(rrf.filePath) ?? {
963
1194
  name: rrf.name ?? rrf.filePath.split('/').pop(), type: rrf.label ?? 'File',
964
1195
  filePath: rrf.filePath, nodeId: rrf.nodeId,
965
- },
966
- }));
1196
+ };
1197
+ // Attach NL match reason if available
1198
+ const reason = nlMatchReasons.get(rrf.nodeId ?? '') ?? nlMatchReasons.get(data.nodeId ?? '');
1199
+ if (reason)
1200
+ data.match_reason = reason;
1201
+ return { score: rrf.score, data };
1202
+ });
967
1203
  // Filter noise: remove test files, config files, docs from results by default
968
1204
  merged = merged.filter(item => {
969
1205
  const fp = String(item.data.filePath ?? '').toLowerCase();
@@ -1341,25 +1577,34 @@ export class LocalBackend {
1341
1577
  */
1342
1578
  async semanticSearch(repo, query, limit) {
1343
1579
  try {
1344
- // Check if embeddings exist before loading the model (avoids heavy model init when embeddings are off)
1345
- const semDb = this.getDb(repo.id);
1346
- const embCount = countEmbeddings(semDb);
1347
- if (embCount === 0)
1348
- return [];
1580
+ // Use in-memory cache if available (10-100x faster than SQLite scan)
1581
+ const cache = this.embeddingCaches.get(repo.id);
1582
+ if (!cache || cache.nodeIds.length === 0) {
1583
+ // Fallback: check DB directly
1584
+ const checkDb = this.getDb(repo.id);
1585
+ const embCount = countEmbeddings(checkDb);
1586
+ if (embCount === 0)
1587
+ return [];
1588
+ // Load cache on demand
1589
+ this.loadEmbeddingCache(repo.id);
1590
+ if (!this.embeddingCaches.get(repo.id))
1591
+ return [];
1592
+ }
1349
1593
  const { DEFAULT_MAX_SEMANTIC_DISTANCE } = await import('../../core/search/types.js');
1350
1594
  const { embedQuery } = await import('../../core/embeddings/embedder.js');
1351
1595
  const queryVec = await embedQuery(query);
1352
- // Brute-force cosine search via adapter (fast enough for <200K vectors at 256 dims)
1353
- const vecResults = searchVector(semDb, queryVec, limit, DEFAULT_MAX_SEMANTIC_DISTANCE);
1596
+ // In-memory cosine search no disk I/O
1597
+ const vecResults = this.searchEmbeddingsInMemory(repo.id, queryVec, limit, DEFAULT_MAX_SEMANTIC_DISTANCE);
1354
1598
  if (vecResults.length === 0)
1355
1599
  return [];
1356
1600
  // Batch metadata fetch
1601
+ const metaDb = this.getDb(repo.id);
1357
1602
  const vecNodeIds = vecResults.map(r => r.nodeId);
1358
1603
  const distanceMap = new Map();
1359
1604
  for (const r of vecResults) {
1360
1605
  distanceMap.set(r.nodeId, r.distance);
1361
1606
  }
1362
- const metaNodes = queries.findNodesByIds(semDb, vecNodeIds);
1607
+ const metaNodes = queries.findNodesByIds(metaDb, vecNodeIds);
1363
1608
  return metaNodes.map(node => ({
1364
1609
  nodeId: node.id,
1365
1610
  name: node.name,
@@ -1375,6 +1620,185 @@ export class LocalBackend {
1375
1620
  return [];
1376
1621
  }
1377
1622
  }
1623
+ /**
1624
+ * NL semantic search: embed query with bge-small, search NL descriptions.
1625
+ * Returns match_reason (the NL text that matched) for agent transparency.
1626
+ */
1627
+ async nlSemanticSearch(repo, query, limit) {
1628
+ try {
1629
+ const cache = this.nlEmbeddingCaches.get(repo.id);
1630
+ if (!cache || cache.nodeIds.length === 0)
1631
+ return [];
1632
+ const { nlEmbed } = await import('../../core/embeddings/nl-embedder.js');
1633
+ const queryVec = await nlEmbed(query);
1634
+ const vecResults = this.searchNlEmbeddingsInMemory(repo.id, queryVec, limit, 0.5);
1635
+ if (vecResults.length === 0)
1636
+ return [];
1637
+ // Fetch node metadata
1638
+ const metaDb = this.getDb(repo.id);
1639
+ const seen = new Set();
1640
+ const results = [];
1641
+ for (const r of vecResults) {
1642
+ if (seen.has(r.nodeId))
1643
+ continue;
1644
+ seen.add(r.nodeId);
1645
+ const node = getNode(metaDb, toNodeId(r.nodeId));
1646
+ if (node) {
1647
+ results.push({
1648
+ nodeId: r.nodeId,
1649
+ name: node.name,
1650
+ type: node.label,
1651
+ filePath: node.filePath,
1652
+ distance: r.distance,
1653
+ startLine: node.startLine,
1654
+ endLine: node.endLine,
1655
+ match_reason: r.text, // The NL text that matched — shown to agents
1656
+ });
1657
+ }
1658
+ }
1659
+ return results;
1660
+ }
1661
+ catch {
1662
+ return [];
1663
+ }
1664
+ }
1665
+ /**
1666
+ * Refs-based search: find symbols referenced in files that contain the query identifiers.
1667
+ * Bridges the gap between graph edges (incomplete) and grep (complete for exact names).
1668
+ */
1669
+ refsSearch(repo, query, limit) {
1670
+ try {
1671
+ const db = this.getDb(repo.id);
1672
+ // Tokenize query into potential identifier names
1673
+ const tokens = query.match(/\b[a-zA-Z_]\w{2,}\b/g) || [];
1674
+ if (tokens.length === 0)
1675
+ return [];
1676
+ const seen = new Set();
1677
+ const results = [];
1678
+ for (const token of tokens) {
1679
+ const refs = findRefsBySymbol(db, token, 50);
1680
+ for (const ref of refs) {
1681
+ if (seen.has(ref.filePath))
1682
+ continue;
1683
+ seen.add(ref.filePath);
1684
+ // Find the best symbol in this file
1685
+ const fileNodes = findNodesByFile(db, ref.filePath);
1686
+ const bestNode = fileNodes.find(n => n.label !== 'File') || fileNodes[0];
1687
+ if (bestNode) {
1688
+ results.push({
1689
+ nodeId: bestNode.id,
1690
+ name: bestNode.name,
1691
+ type: bestNode.label,
1692
+ filePath: bestNode.filePath,
1693
+ startLine: bestNode.startLine,
1694
+ endLine: bestNode.endLine,
1695
+ refsScore: -results.length, // rank by order found
1696
+ });
1697
+ }
1698
+ if (results.length >= limit)
1699
+ break;
1700
+ }
1701
+ if (results.length >= limit)
1702
+ break;
1703
+ }
1704
+ return results;
1705
+ }
1706
+ catch {
1707
+ return [];
1708
+ }
1709
+ }
1710
+ /**
1711
+ * File-words FTS search: find files whose content contains conceptual terms,
1712
+ * then return the best symbol from each file. Bridges NL → code gap.
1713
+ */
1714
+ fileWordsSearch(repo, query, limit) {
1715
+ try {
1716
+ const db = this.getDb(repo.id);
1717
+ const fileResults = searchFileWords(db, query, limit);
1718
+ if (fileResults.length === 0)
1719
+ return [];
1720
+ const results = [];
1721
+ for (const fr of fileResults) {
1722
+ const fileNodes = findNodesByFile(db, fr.filePath);
1723
+ const bestNode = fileNodes.find(n => n.label !== 'File') || fileNodes[0];
1724
+ if (bestNode) {
1725
+ results.push({
1726
+ nodeId: bestNode.id,
1727
+ name: bestNode.name,
1728
+ type: bestNode.label,
1729
+ filePath: bestNode.filePath,
1730
+ startLine: bestNode.startLine,
1731
+ endLine: bestNode.endLine,
1732
+ fileWordsScore: fr.score,
1733
+ });
1734
+ }
1735
+ }
1736
+ return results;
1737
+ }
1738
+ catch {
1739
+ return [];
1740
+ }
1741
+ }
1742
+ /**
1743
+ * Query expansion via embedding nearest neighbors: embed the query,
1744
+ * find 5 closest symbols, extract their names as BM25 expansion terms.
1745
+ */
1746
+ async expandQueryViaNearestNeighbors(repo, query) {
1747
+ try {
1748
+ const cache = this.embeddingCaches.get(repo.id);
1749
+ if (!cache || cache.nodeIds.length === 0)
1750
+ return [];
1751
+ const { embedQuery } = await import('../../core/embeddings/embedder.js');
1752
+ const queryVec = await embedQuery(query);
1753
+ const neighbors = this.searchEmbeddingsInMemory(repo.id, queryVec, 5, 0.7);
1754
+ // Extract symbol names from nodeIds (format: "Label:filePath:name")
1755
+ return neighbors.map(n => {
1756
+ const parts = n.nodeId.split(':');
1757
+ return parts[parts.length - 1] || '';
1758
+ }).filter(Boolean);
1759
+ }
1760
+ catch {
1761
+ return [];
1762
+ }
1763
+ }
1764
+ /**
1765
+ * Ripgrep fallback: when all search signals return sparse results,
1766
+ * grep the repo for query terms to find any relevant files.
1767
+ */
1768
+ ripgrepFallback(repo, query, limit) {
1769
+ try {
1770
+ const words = query.split(/\s+/).filter(w => w.length > 2).slice(0, 3);
1771
+ if (words.length === 0)
1772
+ return [];
1773
+ const pattern = words.join('|');
1774
+ const output = execFileSync('rg', ['-l', '-i', '--max-count', '1', pattern, '.'], {
1775
+ cwd: repo.repoPath, encoding: 'utf-8', timeout: 3000,
1776
+ });
1777
+ const db = this.getDb(repo.id);
1778
+ const results = [];
1779
+ const files = output.trim().split('\n').filter(Boolean).slice(0, limit);
1780
+ for (const f of files) {
1781
+ const relPath = f.replace(/^\.\//, '');
1782
+ const fileNodes = findNodesByFile(db, relPath);
1783
+ const bestNode = fileNodes.find(n => n.label !== 'File') || fileNodes[0];
1784
+ if (bestNode) {
1785
+ results.push({
1786
+ nodeId: bestNode.id,
1787
+ name: bestNode.name,
1788
+ type: bestNode.label,
1789
+ filePath: bestNode.filePath,
1790
+ startLine: bestNode.startLine,
1791
+ endLine: bestNode.endLine,
1792
+ matched_by: 'ripgrep',
1793
+ });
1794
+ }
1795
+ }
1796
+ return results;
1797
+ }
1798
+ catch {
1799
+ return [];
1800
+ }
1801
+ }
1378
1802
  async executeSql(repoName, query) {
1379
1803
  const repo = await this.resolveRepo(repoName);
1380
1804
  return this.sqlQuery(repo, { query });
@@ -1758,6 +2182,32 @@ export class LocalBackend {
1758
2182
  // tsgo reference lookup failed — non-fatal, graph results still available
1759
2183
  }
1760
2184
  }
2185
+ // Supplement callers from refs table (catches callers the graph missed)
2186
+ try {
2187
+ const refCallers = findRefsBySymbol(db, sym.name, 200);
2188
+ const knownFiles = new Set(incomingRows.map(r => r.filePath));
2189
+ let refsAdded = 0;
2190
+ for (const ref of refCallers) {
2191
+ if (ref.filePath === sym.filePath)
2192
+ continue; // skip self-file
2193
+ if (knownFiles.has(ref.filePath))
2194
+ continue; // already have a caller from this file
2195
+ const enclosing = this.findNodeAtPosition(db, ref.filePath, ref.line);
2196
+ if (!enclosing)
2197
+ continue; // no symbol at this line (e.g. import statement)
2198
+ knownFiles.add(ref.filePath); // mark AFTER finding a valid node
2199
+ incomingRows.push({
2200
+ relType: 'CALLS', uid: '', name: enclosing.name, filePath: ref.filePath,
2201
+ kind: enclosing.label, startLine: ref.line, reason: 'refs-index',
2202
+ });
2203
+ refsAdded++;
2204
+ }
2205
+ if (process.env['CODE_MAPPER_VERBOSE'])
2206
+ console.error(`Code Mapper: refs supplement for '${sym.name}': ${refsAdded} added from ${refCallers.length} refs`);
2207
+ }
2208
+ catch (err) {
2209
+ console.error(`Code Mapper: refs supplement failed: ${err instanceof Error ? err.message : err}`);
2210
+ }
1761
2211
  // Outgoing refs — exclude generic method names that produce false positives at low confidence
1762
2212
  const GENERIC_NAMES_EXCLUDE = new Set(['has', 'get', 'set', 'add', 'remove', 'delete', 'close', 'stop', 'clear', 'reset', 'toString', 'valueOf', 'push', 'pop', 'entries', 'keys', 'values']);
1763
2213
  let outgoingRows = [];
@@ -2095,10 +2545,44 @@ export class LocalBackend {
2095
2545
  logQueryError('rename:read-ref', e);
2096
2546
  }
2097
2547
  }
2098
- // Step 3: Text search for refs the graph might have missed
2099
- let astSearchEdits = 0;
2548
+ // Step 3a: Refs table lookup (instant, covers most cases the graph missed)
2549
+ let refsEdits = 0;
2100
2550
  const graphFiles = new Set([sym.filePath, ...allIncoming.map(r => r.filePath)].filter(Boolean));
2101
- // Simple text search across the repo for the old name (in files not already covered by graph)
2551
+ try {
2552
+ const refsDb = this.getDb(repo.id);
2553
+ const refsForName = findRefsBySymbol(refsDb, oldName, 500);
2554
+ for (const ref of refsForName) {
2555
+ const normalizedFile = ref.filePath.replace(/\\/g, '/');
2556
+ if (graphFiles.has(normalizedFile))
2557
+ continue;
2558
+ graphFiles.add(normalizedFile); // mark so ripgrep doesn't re-process
2559
+ try {
2560
+ const content = await fs.readFile(assertSafePath(normalizedFile), 'utf-8');
2561
+ const lines = content.split('\n');
2562
+ const regex = new RegExp(`\\b${oldName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'g');
2563
+ for (let i = 0; i < lines.length; i++) {
2564
+ const searchLine = lines[i];
2565
+ if (!searchLine)
2566
+ continue;
2567
+ regex.lastIndex = 0;
2568
+ if (regex.test(searchLine)) {
2569
+ regex.lastIndex = 0;
2570
+ addEdit(normalizedFile, i + 1, searchLine.trim(), searchLine.replace(regex, new_name).trim(), 'refs');
2571
+ refsEdits++;
2572
+ }
2573
+ }
2574
+ }
2575
+ catch (e) {
2576
+ logQueryError('rename:refs-read', e);
2577
+ }
2578
+ }
2579
+ }
2580
+ catch (e) {
2581
+ logQueryError('rename:refs', e);
2582
+ }
2583
+ // Step 3b: Ripgrep text search for anything refs + graph missed
2584
+ let astSearchEdits = 0;
2585
+ // Simple text search across the repo for the old name (in files not already covered)
2102
2586
  try {
2103
2587
  const { execFileSync } = await import('child_process');
2104
2588
  const rgArgs = [
@@ -2163,9 +2647,11 @@ export class LocalBackend {
2163
2647
  files_affected: allChanges.length,
2164
2648
  total_edits: totalEdits,
2165
2649
  graph_edits: graphEdits,
2650
+ refs_edits: refsEdits,
2166
2651
  text_search_edits: astSearchEdits,
2167
2652
  changes: allChanges,
2168
2653
  applied: !dry_run,
2654
+ _note: 'Rename uses graph edges + refs index + ripgrep. Always review changes before applying. String literals, comments, and dynamic references (e.g. obj[methodName]) need manual review.',
2169
2655
  };
2170
2656
  }
2171
2657
  async impact(repo, params) {
@@ -2242,6 +2728,32 @@ export class LocalBackend {
2242
2728
  }
2243
2729
  }
2244
2730
  }
2731
+ // Supplement d=1 callers from refs table (catches callers the graph missed)
2732
+ if (direction === 'upstream') {
2733
+ try {
2734
+ const targetName = sym.name;
2735
+ const d1FromRefs = findRefsBySymbol(db, targetName, 200);
2736
+ for (const ref of d1FromRefs) {
2737
+ if (ref.filePath === sym.filePath)
2738
+ continue;
2739
+ const refNode = this.findNodeAtPosition(db, ref.filePath, ref.line);
2740
+ if (refNode && !seenIds.has(refNode.name + ':' + ref.filePath)) {
2741
+ // Find the actual node ID for this position
2742
+ const fullNodes = findNodesByFile(db, ref.filePath);
2743
+ const match = fullNodes.find(n => n.name === refNode.name && n.startLine != null && n.startLine <= ref.line + 1 && (n.endLine ?? 9999) >= ref.line + 1);
2744
+ if (match && !seenIds.has(match.id) && !startIds.some(s => s === match.id)) {
2745
+ seenIds.add(match.id);
2746
+ mergedNodes.push({
2747
+ id: match.id, name: match.name, label: match.label,
2748
+ filePath: match.filePath, depth: 1,
2749
+ relationType: 'CALLS', confidence: 0.8,
2750
+ });
2751
+ }
2752
+ }
2753
+ }
2754
+ }
2755
+ catch { /* refs table may not exist */ }
2756
+ }
2245
2757
  const impacted = mergedNodes;
2246
2758
  const truncated = anyTruncated;
2247
2759
  const grouped = {};