@zuvia-software-solutions/code-mapper 2.6.1 → 2.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/analyze.js
CHANGED
|
@@ -273,59 +273,73 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
273
273
|
recordPhase('refs');
|
|
274
274
|
updateBar(85, 'Building refs index...');
|
|
275
275
|
{
|
|
276
|
-
const { clearRefs,
|
|
277
|
-
const
|
|
276
|
+
const { clearRefs, clearFileWords, upsertFileWords } = await import('../core/db/adapter.js');
|
|
277
|
+
const fsSync = await import('fs');
|
|
278
278
|
clearRefs(db);
|
|
279
279
|
clearFileWords(db);
|
|
280
|
-
// Scan all source files for identifier occurrences
|
|
281
280
|
const STOP_WORDS = new Set(['the', 'and', 'for', 'from', 'with', 'this', 'that', 'have', 'has', 'not', 'are', 'was', 'were', 'been', 'being', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'does', 'did', 'let', 'var', 'const', 'new', 'return', 'function', 'class', 'import', 'export', 'default', 'void', 'null', 'undefined', 'true', 'false', 'else', 'case', 'break', 'continue', 'while', 'throw', 'catch', 'try', 'finally', 'async', 'await', 'yield', 'typeof', 'instanceof', 'delete', 'switch', 'interface', 'type', 'enum', 'extends', 'implements', 'static', 'private', 'public', 'protected', 'abstract', 'readonly', 'override', 'declare', 'module', 'namespace', 'require', 'string', 'number', 'boolean', 'object', 'any', 'never', 'unknown', 'symbol']);
|
|
282
281
|
const SRC_EXTENSIONS = new Set(['.ts', '.tsx', '.js', '.jsx', '.py', '.go', '.rs', '.java', '.c', '.h', '.cpp', '.hpp', '.cs', '.rb', '.php', '.kt', '.swift', '.mts', '.mjs', '.cts', '.cjs']);
|
|
283
282
|
const identRegex = /\b[a-zA-Z_]\w{2,}\b/g;
|
|
284
283
|
const wordRegex = /\b[a-zA-Z]\w{2,}\b/g;
|
|
285
|
-
// Get all file paths from the nodes table
|
|
286
284
|
const fileRows = db.prepare("SELECT DISTINCT filePath FROM nodes WHERE label = 'File'").all();
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
285
|
+
// Single transaction for all refs + file_words — avoids per-file transaction overhead
|
|
286
|
+
const refsStmt = db.prepare('INSERT INTO refs (symbol, filePath, line) VALUES (?, ?, ?)');
|
|
287
|
+
const tx = db.transaction(() => {
|
|
288
|
+
let refsBuilt = 0;
|
|
289
|
+
for (const { filePath } of fileRows) {
|
|
290
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
291
|
+
if (!SRC_EXTENSIONS.has(ext))
|
|
292
|
+
continue;
|
|
293
|
+
let content;
|
|
294
|
+
try {
|
|
295
|
+
content = fsSync.readFileSync(path.resolve(repoPath, filePath), 'utf-8');
|
|
296
|
+
}
|
|
297
|
+
catch {
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
// Pre-build line offset table for O(1) line lookups
|
|
301
|
+
const lineOffsets = [0];
|
|
302
|
+
for (let i = 0; i < content.length; i++) {
|
|
303
|
+
if (content.charCodeAt(i) === 10)
|
|
304
|
+
lineOffsets.push(i + 1);
|
|
305
|
+
}
|
|
306
|
+
const getLine = (offset) => {
|
|
307
|
+
let lo = 0, hi = lineOffsets.length - 1;
|
|
308
|
+
while (lo < hi) {
|
|
309
|
+
const mid = (lo + hi + 1) >> 1;
|
|
310
|
+
if (lineOffsets[mid] <= offset)
|
|
311
|
+
lo = mid;
|
|
312
|
+
else
|
|
313
|
+
hi = mid - 1;
|
|
314
|
+
}
|
|
315
|
+
return lo;
|
|
316
|
+
};
|
|
317
|
+
// Refs: regex over whole content with binary-search line lookup
|
|
304
318
|
identRegex.lastIndex = 0;
|
|
305
|
-
|
|
319
|
+
let match;
|
|
320
|
+
while ((match = identRegex.exec(content)) !== null) {
|
|
306
321
|
if (!STOP_WORDS.has(match[0].toLowerCase())) {
|
|
307
|
-
|
|
322
|
+
refsStmt.run(match[0], filePath, getLine(match.index));
|
|
308
323
|
}
|
|
309
324
|
}
|
|
325
|
+
// File words for conceptual search
|
|
326
|
+
const wordSet = new Set();
|
|
327
|
+
wordRegex.lastIndex = 0;
|
|
328
|
+
let wMatch;
|
|
329
|
+
while ((wMatch = wordRegex.exec(content)) !== null) {
|
|
330
|
+
const w = wMatch[0].toLowerCase();
|
|
331
|
+
if (!STOP_WORDS.has(w))
|
|
332
|
+
wordSet.add(w);
|
|
333
|
+
}
|
|
334
|
+
if (wordSet.size > 0)
|
|
335
|
+
upsertFileWords(db, filePath, [...wordSet].join(' '));
|
|
336
|
+
refsBuilt++;
|
|
337
|
+
if (refsBuilt % 500 === 0) {
|
|
338
|
+
updateBar(85, `Building refs index... (${refsBuilt}/${fileRows.length})`);
|
|
339
|
+
}
|
|
310
340
|
}
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
// Build file_words (conceptual search)
|
|
314
|
-
const wordSet = new Set();
|
|
315
|
-
let wMatch;
|
|
316
|
-
wordRegex.lastIndex = 0;
|
|
317
|
-
while ((wMatch = wordRegex.exec(content)) !== null) {
|
|
318
|
-
const w = wMatch[0].toLowerCase();
|
|
319
|
-
if (!STOP_WORDS.has(w))
|
|
320
|
-
wordSet.add(w);
|
|
321
|
-
}
|
|
322
|
-
if (wordSet.size > 0)
|
|
323
|
-
upsertFileWords(db, filePath, [...wordSet].join(' '));
|
|
324
|
-
refsBuilt++;
|
|
325
|
-
if (refsBuilt % 500 === 0) {
|
|
326
|
-
updateBar(85, `Building refs index... (${refsBuilt}/${fileRows.length})`);
|
|
327
|
-
}
|
|
328
|
-
}
|
|
341
|
+
});
|
|
342
|
+
tx();
|
|
329
343
|
}
|
|
330
344
|
// Phase 3: FTS (85-90%)
|
|
331
345
|
// FTS5 is auto-created by schema triggers — no manual index creation needed
|
|
@@ -9,20 +9,28 @@
|
|
|
9
9
|
import { pipeline } from '@huggingface/transformers';
|
|
10
10
|
const MODEL_ID = 'Xenova/bge-small-en-v1.5';
|
|
11
11
|
async function main() {
|
|
12
|
-
// Load model
|
|
13
12
|
const extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
|
|
14
13
|
process.send({ type: 'ready' });
|
|
15
14
|
// Process messages from parent
|
|
16
15
|
process.on('message', async (msg) => {
|
|
17
16
|
if (msg.type === 'embed') {
|
|
18
17
|
const results = [];
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
18
|
+
try {
|
|
19
|
+
const texts = msg.items.map((item) => item.text);
|
|
20
|
+
const batchResult = await extractor(texts, { pooling: 'cls', normalize: true });
|
|
21
|
+
const dims = batchResult.dims?.[1] ?? 384;
|
|
22
|
+
const flat = batchResult.data;
|
|
23
|
+
for (let i = 0; i < msg.items.length; i++) {
|
|
24
|
+
results.push({ nodeId: msg.items[i].nodeId, vec: Array.from(flat.subarray(i * dims, (i + 1) * dims)) });
|
|
23
25
|
}
|
|
24
|
-
|
|
25
|
-
|
|
26
|
+
}
|
|
27
|
+
catch {
|
|
28
|
+
for (const item of msg.items) {
|
|
29
|
+
try {
|
|
30
|
+
const result = await extractor(item.text, { pooling: 'cls', normalize: true });
|
|
31
|
+
results.push({ nodeId: item.nodeId, vec: Array.from(result.data) });
|
|
32
|
+
}
|
|
33
|
+
catch { /* skip */ }
|
|
26
34
|
}
|
|
27
35
|
}
|
|
28
36
|
process.send({ type: 'results', results, batchId: msg.batchId });
|
|
@@ -21,9 +21,11 @@ export async function initNlEmbedder() {
|
|
|
21
21
|
return loadPromise;
|
|
22
22
|
loadPromise = (async () => {
|
|
23
23
|
const { pipeline, env } = await import('@huggingface/transformers');
|
|
24
|
+
const os = await import('os');
|
|
25
|
+
const cpuCount = os.cpus().length;
|
|
24
26
|
// Use all available CPU threads for ONNX inference
|
|
25
27
|
if (env.backends?.onnx?.wasm) {
|
|
26
|
-
env.backends.onnx.wasm.numThreads = Math.max(1,
|
|
28
|
+
env.backends.onnx.wasm.numThreads = Math.max(1, cpuCount);
|
|
27
29
|
}
|
|
28
30
|
extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
|
|
29
31
|
})();
|
|
@@ -44,14 +46,15 @@ export async function nlEmbed(text) {
|
|
|
44
46
|
export async function nlEmbedBatch(texts) {
|
|
45
47
|
if (!extractor)
|
|
46
48
|
await initNlEmbedder();
|
|
47
|
-
const BATCH =
|
|
49
|
+
const BATCH = 64;
|
|
48
50
|
const results = [];
|
|
49
51
|
for (let i = 0; i < texts.length; i += BATCH) {
|
|
50
52
|
const batch = texts.slice(i, i + BATCH);
|
|
51
|
-
|
|
52
|
-
const
|
|
53
|
-
|
|
54
|
-
|
|
53
|
+
const batchResult = await extractor(batch, { pooling: 'cls', normalize: true });
|
|
54
|
+
const dims = batchResult.dims?.[1] ?? 384;
|
|
55
|
+
const flat = batchResult.data;
|
|
56
|
+
for (let j = 0; j < batch.length; j++) {
|
|
57
|
+
results.push(Array.from(flat.subarray(j * dims, (j + 1) * dims)));
|
|
55
58
|
}
|
|
56
59
|
}
|
|
57
60
|
return results;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@zuvia-software-solutions/code-mapper",
|
|
3
|
-
"version": "2.6.
|
|
3
|
+
"version": "2.6.3",
|
|
4
4
|
"description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
|
|
5
5
|
"author": "Abhigyan Patwari",
|
|
6
6
|
"license": "PolyForm-Noncommercial-1.0.0",
|