@zuvia-software-solutions/code-mapper 2.4.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/analyze.js +16 -76
- package/dist/cli/index.js +1 -1
- package/dist/core/db/adapter.d.ts +9 -0
- package/dist/core/db/adapter.js +40 -0
- package/dist/core/db/schema.d.ts +9 -1
- package/dist/core/db/schema.js +11 -0
- package/dist/core/embeddings/index.d.ts +2 -3
- package/dist/core/embeddings/index.js +2 -3
- package/dist/core/embeddings/nl-embed-worker.d.ts +8 -0
- package/dist/core/embeddings/nl-embed-worker.js +38 -0
- package/dist/core/embeddings/nl-embedder.d.ts +44 -0
- package/dist/core/embeddings/nl-embedder.js +431 -0
- package/dist/core/incremental/refresh.js +18 -26
- package/dist/mcp/local/local-backend.d.ts +11 -0
- package/dist/mcp/local/local-backend.js +264 -32
- package/dist/mcp/server.js +2 -2
- package/dist/mcp/tools.js +1 -0
- package/package.json +2 -5
- package/models/jina-code-0.5b-mlx/config.json +0 -73
- package/models/jina-code-0.5b-mlx/model.py +0 -127
- package/models/mlx-embedder.py +0 -604
|
@@ -57,6 +57,8 @@ export class LocalBackend {
|
|
|
57
57
|
tsgoServices = new Map();
|
|
58
58
|
/** Per-repo in-memory embedding cache: nodeId → Float32Array (256-dim) */
|
|
59
59
|
embeddingCaches = new Map();
|
|
60
|
+
/** Per-repo in-memory NL embedding cache: includes source text for match_reason */
|
|
61
|
+
nlEmbeddingCaches = new Map();
|
|
60
62
|
/** Get (or lazily start) a tsgo LSP service for a repo. Returns null if unavailable. */
|
|
61
63
|
async getTsgo(repo) {
|
|
62
64
|
const existing = this.tsgoServices.get(repo.id);
|
|
@@ -140,6 +142,70 @@ export class LocalBackend {
|
|
|
140
142
|
results.sort((a, b) => a.distance - b.distance);
|
|
141
143
|
return results.slice(0, limit);
|
|
142
144
|
}
|
|
145
|
+
/** Load NL embeddings into memory for fast conceptual search */
|
|
146
|
+
loadNlEmbeddingCache(repoId) {
|
|
147
|
+
try {
|
|
148
|
+
const db = this.getDb(repoId);
|
|
149
|
+
let rows;
|
|
150
|
+
try {
|
|
151
|
+
rows = db.prepare('SELECT nodeId, embedding, text FROM nl_embeddings').all();
|
|
152
|
+
}
|
|
153
|
+
catch {
|
|
154
|
+
return;
|
|
155
|
+
} // table might not exist
|
|
156
|
+
if (rows.length === 0) {
|
|
157
|
+
this.nlEmbeddingCaches.delete(repoId);
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
const dims = rows[0].embedding.byteLength / 4;
|
|
161
|
+
const nodeIds = [];
|
|
162
|
+
const texts = [];
|
|
163
|
+
const matrix = new Float32Array(rows.length * dims);
|
|
164
|
+
const norms = new Float32Array(rows.length);
|
|
165
|
+
for (let i = 0; i < rows.length; i++) {
|
|
166
|
+
const row = rows[i];
|
|
167
|
+
nodeIds.push(row.nodeId);
|
|
168
|
+
texts.push(row.text);
|
|
169
|
+
const vec = new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4);
|
|
170
|
+
matrix.set(vec, i * dims);
|
|
171
|
+
let norm = 0;
|
|
172
|
+
for (let d = 0; d < dims; d++)
|
|
173
|
+
norm += vec[d] * vec[d];
|
|
174
|
+
norms[i] = Math.sqrt(norm);
|
|
175
|
+
}
|
|
176
|
+
this.nlEmbeddingCaches.set(repoId, { nodeIds, texts, matrix, norms });
|
|
177
|
+
}
|
|
178
|
+
catch { /* NL embeddings not available */ }
|
|
179
|
+
}
|
|
180
|
+
/** Search NL embeddings in memory, returns match_reason text */
|
|
181
|
+
searchNlEmbeddingsInMemory(repoId, queryVec, limit = 10, maxDistance = 0.5) {
|
|
182
|
+
const cache = this.nlEmbeddingCaches.get(repoId);
|
|
183
|
+
if (!cache || cache.nodeIds.length === 0)
|
|
184
|
+
return [];
|
|
185
|
+
const dims = queryVec.length;
|
|
186
|
+
const results = [];
|
|
187
|
+
let qNorm = 0;
|
|
188
|
+
for (let d = 0; d < dims; d++)
|
|
189
|
+
qNorm += queryVec[d] * queryVec[d];
|
|
190
|
+
qNorm = Math.sqrt(qNorm);
|
|
191
|
+
if (qNorm === 0)
|
|
192
|
+
return [];
|
|
193
|
+
const cacheDims = cache.matrix.length / cache.nodeIds.length;
|
|
194
|
+
for (let i = 0; i < cache.nodeIds.length; i++) {
|
|
195
|
+
const offset = i * cacheDims;
|
|
196
|
+
let dot = 0;
|
|
197
|
+
const minDims = Math.min(dims, cacheDims);
|
|
198
|
+
for (let d = 0; d < minDims; d++)
|
|
199
|
+
dot += queryVec[d] * cache.matrix[offset + d];
|
|
200
|
+
const similarity = dot / (qNorm * cache.norms[i]);
|
|
201
|
+
const distance = 1 - similarity;
|
|
202
|
+
if (distance < maxDistance) {
|
|
203
|
+
results.push({ nodeId: cache.nodeIds[i], distance, text: cache.texts[i] });
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
results.sort((a, b) => a.distance - b.distance);
|
|
207
|
+
return results.slice(0, limit);
|
|
208
|
+
}
|
|
143
209
|
/** Hard ceiling — beyond this, incremental is unreliable, warn prominently */
|
|
144
210
|
static MAX_INCREMENTAL_FILES = 200;
|
|
145
211
|
/** Start file system watcher for a repo to detect source changes */
|
|
@@ -273,16 +339,17 @@ export class LocalBackend {
|
|
|
273
339
|
for (const [id, handle] of this.repos) {
|
|
274
340
|
this.startWatcher(id, handle);
|
|
275
341
|
this.seedWatcherFromGit(id, handle);
|
|
276
|
-
// Load embedding
|
|
342
|
+
// Load embedding caches into memory for fast vector search
|
|
277
343
|
if ((handle.stats?.embeddings ?? 0) > 0) {
|
|
278
344
|
this.loadEmbeddingCache(id);
|
|
279
345
|
anyEmbeddings = true;
|
|
280
346
|
}
|
|
347
|
+
this.loadNlEmbeddingCache(id); // NL cache loaded regardless (cheap, may not exist)
|
|
281
348
|
}
|
|
282
|
-
// Pre-warm
|
|
349
|
+
// Pre-warm bge-small embedder so first query has zero model-load latency
|
|
283
350
|
if (anyEmbeddings) {
|
|
284
|
-
import('../../core/embeddings/embedder.js').then(({
|
|
285
|
-
|
|
351
|
+
import('../../core/embeddings/nl-embedder.js').then(({ initNlEmbedder }) => {
|
|
352
|
+
initNlEmbedder().catch(() => { });
|
|
286
353
|
}).catch(() => { });
|
|
287
354
|
}
|
|
288
355
|
return this.repos.size > 0;
|
|
@@ -592,6 +659,9 @@ export class LocalBackend {
|
|
|
592
659
|
const end = d.endLine || 0;
|
|
593
660
|
const isSmall = end > 0 && start > 0 && (end - start) < 10;
|
|
594
661
|
lines.push(` ${sig} — ${d.type} @ ${this.shortPath(d.filePath)}:${start || '?'}${mod}`);
|
|
662
|
+
if (d.match_reason) {
|
|
663
|
+
lines.push(` _"${d.match_reason}"_`);
|
|
664
|
+
}
|
|
595
665
|
if (isSmall && d.content) {
|
|
596
666
|
const src = String(d.content).trim();
|
|
597
667
|
if (src.length < 500) {
|
|
@@ -671,6 +741,9 @@ export class LocalBackend {
|
|
|
671
741
|
renderFlows(2);
|
|
672
742
|
}
|
|
673
743
|
}
|
|
744
|
+
lines.push('');
|
|
745
|
+
lines.push('---');
|
|
746
|
+
lines.push('_Note: Results ranked by BM25 keyword + semantic + refs + file-content signals. Natural language queries may miss code using different terminology. If results seem incomplete, try specific identifiers or `grep -rn "keyword" --include="*.ts"` for exhaustive search._');
|
|
674
747
|
return lines.join('\n');
|
|
675
748
|
}
|
|
676
749
|
formatContextAsText(result) {
|
|
@@ -776,6 +849,10 @@ export class LocalBackend {
|
|
|
776
849
|
lines.push(` ${p.name} (step ${p.step_index}/${p.step_count})`);
|
|
777
850
|
}
|
|
778
851
|
}
|
|
852
|
+
// Guidance footer for agents
|
|
853
|
+
lines.push('');
|
|
854
|
+
lines.push('---');
|
|
855
|
+
lines.push('_Note: Callers are from graph edges + refs index. For widely-used symbols, verify completeness with `grep -rn "symbolName(" --include="*.ts"`. Outgoing calls may miss dynamic dispatch or reflection._');
|
|
779
856
|
return lines.join('\n');
|
|
780
857
|
}
|
|
781
858
|
formatImpactAsText(result) {
|
|
@@ -822,6 +899,9 @@ export class LocalBackend {
|
|
|
822
899
|
lines.push('');
|
|
823
900
|
lines.push(`### Modules: ${mods.map((m) => `${m.name} (${m.hits} ${m.impact})`).join(' | ')}`);
|
|
824
901
|
}
|
|
902
|
+
lines.push('');
|
|
903
|
+
lines.push('---');
|
|
904
|
+
lines.push('_Note: d=1 callers include graph edges + refs index. Indirect deps through dynamic dispatch, config, or type-only references may not appear. For critical changes, verify d=1 with `grep -rn "symbolName" --include="*.ts"`._');
|
|
825
905
|
return lines.join('\n');
|
|
826
906
|
}
|
|
827
907
|
formatDetectChangesAsText(result) {
|
|
@@ -1002,13 +1082,14 @@ export class LocalBackend {
|
|
|
1002
1082
|
// Step 1: Four-signal search in parallel
|
|
1003
1083
|
// BM25 uses expanded query; semantic uses enriched query; refs + file_words use raw query
|
|
1004
1084
|
const searchLimit = processLimit * maxSymbolsPerProcess;
|
|
1005
|
-
const [bm25Results, semanticResults, refsResults, fileWordsResults] = await Promise.all([
|
|
1085
|
+
const [bm25Results, semanticResults, nlSemanticResults, refsResults, fileWordsResults] = await Promise.all([
|
|
1006
1086
|
this.bm25Search(repo, expandedSearchQuery, searchLimit),
|
|
1007
1087
|
this.semanticSearch(repo, semanticQuery, searchLimit),
|
|
1088
|
+
this.nlSemanticSearch(repo, searchQuery, searchLimit),
|
|
1008
1089
|
Promise.resolve(this.refsSearch(repo, searchQuery, searchLimit)),
|
|
1009
1090
|
Promise.resolve(this.fileWordsSearch(repo, searchQuery, searchLimit)),
|
|
1010
1091
|
]);
|
|
1011
|
-
// Step 2: Weighted RRF merge (
|
|
1092
|
+
// Step 2: Weighted RRF merge (5 signals)
|
|
1012
1093
|
const bm25ForRRF = bm25Results.map((r, i) => ({
|
|
1013
1094
|
nodeId: String(r.nodeId ?? ''),
|
|
1014
1095
|
name: String(r.name ?? ''),
|
|
@@ -1039,6 +1120,34 @@ export class LocalBackend {
|
|
|
1039
1120
|
...(r.endLine != null ? { endLine: r.endLine } : {}),
|
|
1040
1121
|
}));
|
|
1041
1122
|
let rrfMerged = mergeWithRRF(bm25ForRRF, semanticForRRF, { limit: searchLimit });
|
|
1123
|
+
// Store NL match reasons for display
|
|
1124
|
+
const nlMatchReasons = new Map();
|
|
1125
|
+
for (const r of nlSemanticResults) {
|
|
1126
|
+
if (r.match_reason && !nlMatchReasons.has(r.nodeId)) {
|
|
1127
|
+
nlMatchReasons.set(r.nodeId, r.match_reason);
|
|
1128
|
+
}
|
|
1129
|
+
}
|
|
1130
|
+
// Inject NL semantic results directly — they bridge the vocabulary gap
|
|
1131
|
+
// that BM25 and code embeddings miss. Insert at high score so they
|
|
1132
|
+
// appear in results even when BM25 finds unrelated "prevent" matches.
|
|
1133
|
+
if (nlSemanticResults.length > 0) {
|
|
1134
|
+
const mainIds = new Set(rrfMerged.map(r => r.nodeId || r.filePath));
|
|
1135
|
+
const topMainScore = rrfMerged[0]?.score ?? 0.01;
|
|
1136
|
+
for (let i = 0; i < Math.min(nlSemanticResults.length, 5); i++) {
|
|
1137
|
+
const nlr = nlSemanticResults[i];
|
|
1138
|
+
if (mainIds.has(nlr.nodeId))
|
|
1139
|
+
continue; // already in results
|
|
1140
|
+
// Score NL results high — at or above the top BM25 result
|
|
1141
|
+
const nlScore = topMainScore * (1.0 - i * 0.1);
|
|
1142
|
+
rrfMerged.push({
|
|
1143
|
+
filePath: nlr.filePath, score: nlScore, rank: i + 1,
|
|
1144
|
+
sources: ['semantic'], nodeId: nlr.nodeId, name: nlr.name,
|
|
1145
|
+
label: nlr.type, startLine: nlr.startLine, endLine: nlr.endLine,
|
|
1146
|
+
});
|
|
1147
|
+
}
|
|
1148
|
+
rrfMerged.sort((a, b) => b.score - a.score);
|
|
1149
|
+
rrfMerged = rrfMerged.slice(0, searchLimit);
|
|
1150
|
+
}
|
|
1042
1151
|
// Merge refs + fileWords into the RRF results (lower weight)
|
|
1043
1152
|
if (refsForRRF.length > 0 || fileWordsForRRF.length > 0) {
|
|
1044
1153
|
const supplemental = mergeWithRRF(refsForRRF, fileWordsForRRF.map((r) => ({
|
|
@@ -1084,21 +1193,29 @@ export class LocalBackend {
|
|
|
1084
1193
|
if (!searchDataMap.has(key))
|
|
1085
1194
|
searchDataMap.set(key, r);
|
|
1086
1195
|
}
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1196
|
+
for (const r of nlSemanticResults) {
|
|
1197
|
+
const key = r.nodeId || r.filePath;
|
|
1198
|
+
if (!searchDataMap.has(key))
|
|
1199
|
+
searchDataMap.set(key, r);
|
|
1200
|
+
}
|
|
1201
|
+
let merged = rrfMerged.map(rrf => {
|
|
1202
|
+
const data = searchDataMap.get(rrf.nodeId ?? '') ?? searchDataMap.get(rrf.filePath) ?? {
|
|
1090
1203
|
name: rrf.name ?? rrf.filePath.split('/').pop(), type: rrf.label ?? 'File',
|
|
1091
1204
|
filePath: rrf.filePath, nodeId: rrf.nodeId,
|
|
1092
|
-
}
|
|
1093
|
-
|
|
1094
|
-
|
|
1205
|
+
};
|
|
1206
|
+
// Attach NL match reason if available
|
|
1207
|
+
const reason = nlMatchReasons.get(rrf.nodeId ?? '') ?? nlMatchReasons.get(data.nodeId ?? '');
|
|
1208
|
+
if (reason)
|
|
1209
|
+
data.match_reason = reason;
|
|
1210
|
+
return { score: rrf.score, data };
|
|
1211
|
+
});
|
|
1212
|
+
// Filter non-code files (JSON, MD, YAML). Test files are included by default.
|
|
1213
|
+
// Agents can pass exclude_tests: true to filter test files when not needed.
|
|
1095
1214
|
merged = merged.filter(item => {
|
|
1096
1215
|
const fp = String(item.data.filePath ?? '').toLowerCase();
|
|
1097
|
-
if (isTestFilePath(fp))
|
|
1098
|
-
return false;
|
|
1099
1216
|
if (fp.endsWith('.json') || fp.endsWith('.md') || fp.endsWith('.yml') || fp.endsWith('.yaml'))
|
|
1100
1217
|
return false;
|
|
1101
|
-
if (
|
|
1218
|
+
if (params.exclude_tests && isTestFilePath(fp))
|
|
1102
1219
|
return false;
|
|
1103
1220
|
return true;
|
|
1104
1221
|
});
|
|
@@ -1482,8 +1599,8 @@ export class LocalBackend {
|
|
|
1482
1599
|
return [];
|
|
1483
1600
|
}
|
|
1484
1601
|
const { DEFAULT_MAX_SEMANTIC_DISTANCE } = await import('../../core/search/types.js');
|
|
1485
|
-
const {
|
|
1486
|
-
const queryVec = await
|
|
1602
|
+
const { nlEmbed } = await import('../../core/embeddings/nl-embedder.js');
|
|
1603
|
+
const queryVec = await nlEmbed(query);
|
|
1487
1604
|
// In-memory cosine search — no disk I/O
|
|
1488
1605
|
const vecResults = this.searchEmbeddingsInMemory(repo.id, queryVec, limit, DEFAULT_MAX_SEMANTIC_DISTANCE);
|
|
1489
1606
|
if (vecResults.length === 0)
|
|
@@ -1511,6 +1628,53 @@ export class LocalBackend {
|
|
|
1511
1628
|
return [];
|
|
1512
1629
|
}
|
|
1513
1630
|
}
|
|
1631
|
+
/**
|
|
1632
|
+
* NL semantic search: embed query with bge-small, search NL descriptions.
|
|
1633
|
+
* Returns match_reason (the NL text that matched) for agent transparency.
|
|
1634
|
+
*/
|
|
1635
|
+
async nlSemanticSearch(repo, query, limit) {
|
|
1636
|
+
try {
|
|
1637
|
+
let cache = this.nlEmbeddingCaches.get(repo.id);
|
|
1638
|
+
if (!cache || cache.nodeIds.length === 0) {
|
|
1639
|
+
// Try loading on demand
|
|
1640
|
+
this.loadNlEmbeddingCache(repo.id);
|
|
1641
|
+
cache = this.nlEmbeddingCaches.get(repo.id);
|
|
1642
|
+
if (!cache || cache.nodeIds.length === 0)
|
|
1643
|
+
return [];
|
|
1644
|
+
}
|
|
1645
|
+
const { nlEmbed } = await import('../../core/embeddings/nl-embedder.js');
|
|
1646
|
+
const queryVec = await nlEmbed(query);
|
|
1647
|
+
const vecResults = this.searchNlEmbeddingsInMemory(repo.id, queryVec, limit, 0.5);
|
|
1648
|
+
if (vecResults.length === 0)
|
|
1649
|
+
return [];
|
|
1650
|
+
// Fetch node metadata
|
|
1651
|
+
const metaDb = this.getDb(repo.id);
|
|
1652
|
+
const seen = new Set();
|
|
1653
|
+
const results = [];
|
|
1654
|
+
for (const r of vecResults) {
|
|
1655
|
+
if (seen.has(r.nodeId))
|
|
1656
|
+
continue;
|
|
1657
|
+
seen.add(r.nodeId);
|
|
1658
|
+
const node = getNode(metaDb, toNodeId(r.nodeId));
|
|
1659
|
+
if (node) {
|
|
1660
|
+
results.push({
|
|
1661
|
+
nodeId: r.nodeId,
|
|
1662
|
+
name: node.name,
|
|
1663
|
+
type: node.label,
|
|
1664
|
+
filePath: node.filePath,
|
|
1665
|
+
distance: r.distance,
|
|
1666
|
+
startLine: node.startLine,
|
|
1667
|
+
endLine: node.endLine,
|
|
1668
|
+
match_reason: r.text, // The NL text that matched — shown to agents
|
|
1669
|
+
});
|
|
1670
|
+
}
|
|
1671
|
+
}
|
|
1672
|
+
return results;
|
|
1673
|
+
}
|
|
1674
|
+
catch {
|
|
1675
|
+
return [];
|
|
1676
|
+
}
|
|
1677
|
+
}
|
|
1514
1678
|
/**
|
|
1515
1679
|
* Refs-based search: find symbols referenced in files that contain the query identifiers.
|
|
1516
1680
|
* Bridges the gap between graph edges (incomplete) and grep (complete for exact names).
|
|
@@ -1597,8 +1761,8 @@ export class LocalBackend {
|
|
|
1597
1761
|
const cache = this.embeddingCaches.get(repo.id);
|
|
1598
1762
|
if (!cache || cache.nodeIds.length === 0)
|
|
1599
1763
|
return [];
|
|
1600
|
-
const {
|
|
1601
|
-
const queryVec = await
|
|
1764
|
+
const { nlEmbed } = await import('../../core/embeddings/nl-embedder.js');
|
|
1765
|
+
const queryVec = await nlEmbed(query);
|
|
1602
1766
|
const neighbors = this.searchEmbeddingsInMemory(repo.id, queryVec, 5, 0.7);
|
|
1603
1767
|
// Extract symbol names from nodeIds (format: "Label:filePath:name")
|
|
1604
1768
|
return neighbors.map(n => {
|
|
@@ -2033,24 +2197,30 @@ export class LocalBackend {
|
|
|
2033
2197
|
}
|
|
2034
2198
|
// Supplement callers from refs table (catches callers the graph missed)
|
|
2035
2199
|
try {
|
|
2036
|
-
const refCallers = findRefsBySymbol(db, sym.name,
|
|
2200
|
+
const refCallers = findRefsBySymbol(db, sym.name, 200);
|
|
2037
2201
|
const knownFiles = new Set(incomingRows.map(r => r.filePath));
|
|
2202
|
+
let refsAdded = 0;
|
|
2038
2203
|
for (const ref of refCallers) {
|
|
2039
2204
|
if (ref.filePath === sym.filePath)
|
|
2040
2205
|
continue; // skip self-file
|
|
2041
2206
|
if (knownFiles.has(ref.filePath))
|
|
2042
|
-
continue; // already
|
|
2043
|
-
knownFiles.add(ref.filePath);
|
|
2207
|
+
continue; // already have a caller from this file
|
|
2044
2208
|
const enclosing = this.findNodeAtPosition(db, ref.filePath, ref.line);
|
|
2045
|
-
if (enclosing)
|
|
2046
|
-
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
2209
|
+
if (!enclosing)
|
|
2210
|
+
continue; // no symbol at this line (e.g. import statement)
|
|
2211
|
+
knownFiles.add(ref.filePath); // mark AFTER finding a valid node
|
|
2212
|
+
incomingRows.push({
|
|
2213
|
+
relType: 'CALLS', uid: '', name: enclosing.name, filePath: ref.filePath,
|
|
2214
|
+
kind: enclosing.label, startLine: ref.line, reason: 'refs-index',
|
|
2215
|
+
});
|
|
2216
|
+
refsAdded++;
|
|
2051
2217
|
}
|
|
2218
|
+
if (process.env['CODE_MAPPER_VERBOSE'])
|
|
2219
|
+
console.error(`Code Mapper: refs supplement for '${sym.name}': ${refsAdded} added from ${refCallers.length} refs`);
|
|
2220
|
+
}
|
|
2221
|
+
catch (err) {
|
|
2222
|
+
console.error(`Code Mapper: refs supplement failed: ${err instanceof Error ? err.message : err}`);
|
|
2052
2223
|
}
|
|
2053
|
-
catch { /* refs table may not exist yet */ }
|
|
2054
2224
|
// Outgoing refs — exclude generic method names that produce false positives at low confidence
|
|
2055
2225
|
const GENERIC_NAMES_EXCLUDE = new Set(['has', 'get', 'set', 'add', 'remove', 'delete', 'close', 'stop', 'clear', 'reset', 'toString', 'valueOf', 'push', 'pop', 'entries', 'keys', 'values']);
|
|
2056
2226
|
let outgoingRows = [];
|
|
@@ -2388,10 +2558,44 @@ export class LocalBackend {
|
|
|
2388
2558
|
logQueryError('rename:read-ref', e);
|
|
2389
2559
|
}
|
|
2390
2560
|
}
|
|
2391
|
-
// Step
|
|
2392
|
-
let
|
|
2561
|
+
// Step 3a: Refs table lookup (instant, covers most cases the graph missed)
|
|
2562
|
+
let refsEdits = 0;
|
|
2393
2563
|
const graphFiles = new Set([sym.filePath, ...allIncoming.map(r => r.filePath)].filter(Boolean));
|
|
2394
|
-
|
|
2564
|
+
try {
|
|
2565
|
+
const refsDb = this.getDb(repo.id);
|
|
2566
|
+
const refsForName = findRefsBySymbol(refsDb, oldName, 500);
|
|
2567
|
+
for (const ref of refsForName) {
|
|
2568
|
+
const normalizedFile = ref.filePath.replace(/\\/g, '/');
|
|
2569
|
+
if (graphFiles.has(normalizedFile))
|
|
2570
|
+
continue;
|
|
2571
|
+
graphFiles.add(normalizedFile); // mark so ripgrep doesn't re-process
|
|
2572
|
+
try {
|
|
2573
|
+
const content = await fs.readFile(assertSafePath(normalizedFile), 'utf-8');
|
|
2574
|
+
const lines = content.split('\n');
|
|
2575
|
+
const regex = new RegExp(`\\b${oldName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'g');
|
|
2576
|
+
for (let i = 0; i < lines.length; i++) {
|
|
2577
|
+
const searchLine = lines[i];
|
|
2578
|
+
if (!searchLine)
|
|
2579
|
+
continue;
|
|
2580
|
+
regex.lastIndex = 0;
|
|
2581
|
+
if (regex.test(searchLine)) {
|
|
2582
|
+
regex.lastIndex = 0;
|
|
2583
|
+
addEdit(normalizedFile, i + 1, searchLine.trim(), searchLine.replace(regex, new_name).trim(), 'refs');
|
|
2584
|
+
refsEdits++;
|
|
2585
|
+
}
|
|
2586
|
+
}
|
|
2587
|
+
}
|
|
2588
|
+
catch (e) {
|
|
2589
|
+
logQueryError('rename:refs-read', e);
|
|
2590
|
+
}
|
|
2591
|
+
}
|
|
2592
|
+
}
|
|
2593
|
+
catch (e) {
|
|
2594
|
+
logQueryError('rename:refs', e);
|
|
2595
|
+
}
|
|
2596
|
+
// Step 3b: Ripgrep text search for anything refs + graph missed
|
|
2597
|
+
let astSearchEdits = 0;
|
|
2598
|
+
// Simple text search across the repo for the old name (in files not already covered)
|
|
2395
2599
|
try {
|
|
2396
2600
|
const { execFileSync } = await import('child_process');
|
|
2397
2601
|
const rgArgs = [
|
|
@@ -2456,9 +2660,11 @@ export class LocalBackend {
|
|
|
2456
2660
|
files_affected: allChanges.length,
|
|
2457
2661
|
total_edits: totalEdits,
|
|
2458
2662
|
graph_edits: graphEdits,
|
|
2663
|
+
refs_edits: refsEdits,
|
|
2459
2664
|
text_search_edits: astSearchEdits,
|
|
2460
2665
|
changes: allChanges,
|
|
2461
2666
|
applied: !dry_run,
|
|
2667
|
+
_note: 'Rename uses graph edges + refs index + ripgrep. Always review changes before applying. String literals, comments, and dynamic references (e.g. obj[methodName]) need manual review.',
|
|
2462
2668
|
};
|
|
2463
2669
|
}
|
|
2464
2670
|
async impact(repo, params) {
|
|
@@ -2535,6 +2741,32 @@ export class LocalBackend {
|
|
|
2535
2741
|
}
|
|
2536
2742
|
}
|
|
2537
2743
|
}
|
|
2744
|
+
// Supplement d=1 callers from refs table (catches callers the graph missed)
|
|
2745
|
+
if (direction === 'upstream') {
|
|
2746
|
+
try {
|
|
2747
|
+
const targetName = sym.name;
|
|
2748
|
+
const d1FromRefs = findRefsBySymbol(db, targetName, 200);
|
|
2749
|
+
for (const ref of d1FromRefs) {
|
|
2750
|
+
if (ref.filePath === sym.filePath)
|
|
2751
|
+
continue;
|
|
2752
|
+
const refNode = this.findNodeAtPosition(db, ref.filePath, ref.line);
|
|
2753
|
+
if (refNode && !seenIds.has(refNode.name + ':' + ref.filePath)) {
|
|
2754
|
+
// Find the actual node ID for this position
|
|
2755
|
+
const fullNodes = findNodesByFile(db, ref.filePath);
|
|
2756
|
+
const match = fullNodes.find(n => n.name === refNode.name && n.startLine != null && n.startLine <= ref.line + 1 && (n.endLine ?? 9999) >= ref.line + 1);
|
|
2757
|
+
if (match && !seenIds.has(match.id) && !startIds.some(s => s === match.id)) {
|
|
2758
|
+
seenIds.add(match.id);
|
|
2759
|
+
mergedNodes.push({
|
|
2760
|
+
id: match.id, name: match.name, label: match.label,
|
|
2761
|
+
filePath: match.filePath, depth: 1,
|
|
2762
|
+
relationType: 'CALLS', confidence: 0.8,
|
|
2763
|
+
});
|
|
2764
|
+
}
|
|
2765
|
+
}
|
|
2766
|
+
}
|
|
2767
|
+
}
|
|
2768
|
+
catch { /* refs table may not exist */ }
|
|
2769
|
+
}
|
|
2538
2770
|
const impacted = mergedNodes;
|
|
2539
2771
|
const truncated = anyTruncated;
|
|
2540
2772
|
const grouped = {};
|
package/dist/mcp/server.js
CHANGED
|
@@ -13,8 +13,8 @@ import { getResourceDefinitions, getResourceTemplates, readResource } from './re
|
|
|
13
13
|
// the MCP tool descriptions. Hints wasted ~40 tokens per response.
|
|
14
14
|
/** Create a configured MCP Server with all handlers registered (transport-agnostic) */
|
|
15
15
|
export function createMCPServer(backend) {
|
|
16
|
-
// Preload embedding model in background so first query doesn't pay cold-start cost
|
|
17
|
-
import('../core/embeddings/embedder.js').then(m => m.
|
|
16
|
+
// Preload bge-small embedding model in background so first query doesn't pay cold-start cost
|
|
17
|
+
import('../core/embeddings/nl-embedder.js').then(m => m.initNlEmbedder()).catch(() => { });
|
|
18
18
|
const require = createRequire(import.meta.url);
|
|
19
19
|
const pkgVersion = require('../../package.json').version;
|
|
20
20
|
const server = new Server({
|
package/dist/mcp/tools.js
CHANGED
|
@@ -44,6 +44,7 @@ Hybrid ranking: BM25 keyword + semantic vector search, ranked by Reciprocal Rank
|
|
|
44
44
|
limit: { type: 'number', description: 'Max processes to return (default: 5)', default: 5 },
|
|
45
45
|
max_symbols: { type: 'number', description: 'Max symbols per process (default: 10)', default: 10 },
|
|
46
46
|
include_content: { type: 'boolean', description: 'Include full symbol source code (default: false)', default: false },
|
|
47
|
+
exclude_tests: { type: 'boolean', description: 'Exclude test/spec/fixture files from results (default: false)', default: false },
|
|
47
48
|
repo: { type: 'string', description: 'Repository name or path. Omit if only one repo is indexed.' },
|
|
48
49
|
},
|
|
49
50
|
required: ['query'],
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@zuvia-software-solutions/code-mapper",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.5.0",
|
|
4
4
|
"description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
|
|
5
5
|
"author": "Abhigyan Patwari",
|
|
6
6
|
"license": "PolyForm-Noncommercial-1.0.0",
|
|
@@ -34,10 +34,7 @@
|
|
|
34
34
|
"hooks",
|
|
35
35
|
"scripts",
|
|
36
36
|
"skills",
|
|
37
|
-
"vendor"
|
|
38
|
-
"models/mlx-embedder.py",
|
|
39
|
-
"models/jina-code-0.5b-mlx/model.py",
|
|
40
|
-
"models/jina-code-0.5b-mlx/config.json"
|
|
37
|
+
"vendor"
|
|
41
38
|
],
|
|
42
39
|
"scripts": {
|
|
43
40
|
"build": "tsc",
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"architectures": [
|
|
3
|
-
"Qwen2ForCausalLM"
|
|
4
|
-
],
|
|
5
|
-
"attention_dropout": 0.0,
|
|
6
|
-
"bos_token_id": 151643,
|
|
7
|
-
"eos_token_id": 151643,
|
|
8
|
-
"hidden_act": "silu",
|
|
9
|
-
"hidden_size": 896,
|
|
10
|
-
"initializer_range": 0.02,
|
|
11
|
-
"intermediate_size": 4864,
|
|
12
|
-
"layer_types": [
|
|
13
|
-
"full_attention",
|
|
14
|
-
"full_attention",
|
|
15
|
-
"full_attention",
|
|
16
|
-
"full_attention",
|
|
17
|
-
"full_attention",
|
|
18
|
-
"full_attention",
|
|
19
|
-
"full_attention",
|
|
20
|
-
"full_attention",
|
|
21
|
-
"full_attention",
|
|
22
|
-
"full_attention",
|
|
23
|
-
"full_attention",
|
|
24
|
-
"full_attention",
|
|
25
|
-
"full_attention",
|
|
26
|
-
"full_attention",
|
|
27
|
-
"full_attention",
|
|
28
|
-
"full_attention",
|
|
29
|
-
"full_attention",
|
|
30
|
-
"full_attention",
|
|
31
|
-
"full_attention",
|
|
32
|
-
"full_attention",
|
|
33
|
-
"full_attention",
|
|
34
|
-
"full_attention",
|
|
35
|
-
"full_attention",
|
|
36
|
-
"full_attention"
|
|
37
|
-
],
|
|
38
|
-
"matryoshka_dims": [
|
|
39
|
-
64,
|
|
40
|
-
128,
|
|
41
|
-
256,
|
|
42
|
-
512,
|
|
43
|
-
896
|
|
44
|
-
],
|
|
45
|
-
"max_position_embeddings": 32768,
|
|
46
|
-
"max_window_layers": 24,
|
|
47
|
-
"model_type": "qwen2",
|
|
48
|
-
"num_attention_heads": 14,
|
|
49
|
-
"num_hidden_layers": 24,
|
|
50
|
-
"num_key_value_heads": 2,
|
|
51
|
-
"prompt_names": [
|
|
52
|
-
"query",
|
|
53
|
-
"passage"
|
|
54
|
-
],
|
|
55
|
-
"rms_norm_eps": 1e-06,
|
|
56
|
-
"rope_scaling": null,
|
|
57
|
-
"rope_theta": 1000000.0,
|
|
58
|
-
"sliding_window": null,
|
|
59
|
-
"task_names": [
|
|
60
|
-
"nl2code",
|
|
61
|
-
"qa",
|
|
62
|
-
"code2code",
|
|
63
|
-
"code2nl",
|
|
64
|
-
"code2completion"
|
|
65
|
-
],
|
|
66
|
-
"tie_word_embeddings": true,
|
|
67
|
-
"tokenizer_class": "Qwen2TokenizerFast",
|
|
68
|
-
"torch_dtype": "bfloat16",
|
|
69
|
-
"transformers_version": "4.53.0",
|
|
70
|
-
"use_cache": true,
|
|
71
|
-
"use_sliding_window": false,
|
|
72
|
-
"vocab_size": 151936
|
|
73
|
-
}
|