@zuvia-software-solutions/code-mapper 2.6.1 → 2.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -273,59 +273,73 @@ export const analyzeCommand = async (inputPath, options) => {
273
273
  recordPhase('refs');
274
274
  updateBar(85, 'Building refs index...');
275
275
  {
276
- const { clearRefs, insertRefsBatch, clearFileWords, upsertFileWords } = await import('../core/db/adapter.js');
277
- const fsRef = await import('fs/promises');
276
+ const { clearRefs, clearFileWords, upsertFileWords } = await import('../core/db/adapter.js');
277
+ const fsSync = await import('fs');
278
278
  clearRefs(db);
279
279
  clearFileWords(db);
280
- // Scan all source files for identifier occurrences
281
280
  const STOP_WORDS = new Set(['the', 'and', 'for', 'from', 'with', 'this', 'that', 'have', 'has', 'not', 'are', 'was', 'were', 'been', 'being', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'does', 'did', 'let', 'var', 'const', 'new', 'return', 'function', 'class', 'import', 'export', 'default', 'void', 'null', 'undefined', 'true', 'false', 'else', 'case', 'break', 'continue', 'while', 'throw', 'catch', 'try', 'finally', 'async', 'await', 'yield', 'typeof', 'instanceof', 'delete', 'switch', 'interface', 'type', 'enum', 'extends', 'implements', 'static', 'private', 'public', 'protected', 'abstract', 'readonly', 'override', 'declare', 'module', 'namespace', 'require', 'string', 'number', 'boolean', 'object', 'any', 'never', 'unknown', 'symbol']);
282
281
  const SRC_EXTENSIONS = new Set(['.ts', '.tsx', '.js', '.jsx', '.py', '.go', '.rs', '.java', '.c', '.h', '.cpp', '.hpp', '.cs', '.rb', '.php', '.kt', '.swift', '.mts', '.mjs', '.cts', '.cjs']);
283
282
  const identRegex = /\b[a-zA-Z_]\w{2,}\b/g;
284
283
  const wordRegex = /\b[a-zA-Z]\w{2,}\b/g;
285
- // Get all file paths from the nodes table
286
284
  const fileRows = db.prepare("SELECT DISTINCT filePath FROM nodes WHERE label = 'File'").all();
287
- let refsBuilt = 0;
288
- for (const { filePath } of fileRows) {
289
- const ext = path.extname(filePath).toLowerCase();
290
- if (!SRC_EXTENSIONS.has(ext))
291
- continue;
292
- let content;
293
- try {
294
- content = await fsRef.readFile(path.resolve(repoPath, filePath), 'utf-8');
295
- }
296
- catch {
297
- continue;
298
- }
299
- // Build refs (identifier occurrences — skip language keywords)
300
- const refs = [];
301
- const lines = content.split('\n');
302
- for (let lineIdx = 0; lineIdx < lines.length; lineIdx++) {
303
- let match;
285
+ // Single transaction for all refs + file_words — avoids per-file transaction overhead
286
+ const refsStmt = db.prepare('INSERT INTO refs (symbol, filePath, line) VALUES (?, ?, ?)');
287
+ const tx = db.transaction(() => {
288
+ let refsBuilt = 0;
289
+ for (const { filePath } of fileRows) {
290
+ const ext = path.extname(filePath).toLowerCase();
291
+ if (!SRC_EXTENSIONS.has(ext))
292
+ continue;
293
+ let content;
294
+ try {
295
+ content = fsSync.readFileSync(path.resolve(repoPath, filePath), 'utf-8');
296
+ }
297
+ catch {
298
+ continue;
299
+ }
300
+ // Pre-build line offset table for O(1) line lookups
301
+ const lineOffsets = [0];
302
+ for (let i = 0; i < content.length; i++) {
303
+ if (content.charCodeAt(i) === 10)
304
+ lineOffsets.push(i + 1);
305
+ }
306
+ const getLine = (offset) => {
307
+ let lo = 0, hi = lineOffsets.length - 1;
308
+ while (lo < hi) {
309
+ const mid = (lo + hi + 1) >> 1;
310
+ if (lineOffsets[mid] <= offset)
311
+ lo = mid;
312
+ else
313
+ hi = mid - 1;
314
+ }
315
+ return lo;
316
+ };
317
+ // Refs: regex over whole content with binary-search line lookup
304
318
  identRegex.lastIndex = 0;
305
- while ((match = identRegex.exec(lines[lineIdx])) !== null) {
319
+ let match;
320
+ while ((match = identRegex.exec(content)) !== null) {
306
321
  if (!STOP_WORDS.has(match[0].toLowerCase())) {
307
- refs.push({ symbol: match[0], filePath, line: lineIdx });
322
+ refsStmt.run(match[0], filePath, getLine(match.index));
308
323
  }
309
324
  }
325
+ // File words for conceptual search
326
+ const wordSet = new Set();
327
+ wordRegex.lastIndex = 0;
328
+ let wMatch;
329
+ while ((wMatch = wordRegex.exec(content)) !== null) {
330
+ const w = wMatch[0].toLowerCase();
331
+ if (!STOP_WORDS.has(w))
332
+ wordSet.add(w);
333
+ }
334
+ if (wordSet.size > 0)
335
+ upsertFileWords(db, filePath, [...wordSet].join(' '));
336
+ refsBuilt++;
337
+ if (refsBuilt % 500 === 0) {
338
+ updateBar(85, `Building refs index... (${refsBuilt}/${fileRows.length})`);
339
+ }
310
340
  }
311
- if (refs.length > 0)
312
- insertRefsBatch(db, refs);
313
- // Build file_words (conceptual search)
314
- const wordSet = new Set();
315
- let wMatch;
316
- wordRegex.lastIndex = 0;
317
- while ((wMatch = wordRegex.exec(content)) !== null) {
318
- const w = wMatch[0].toLowerCase();
319
- if (!STOP_WORDS.has(w))
320
- wordSet.add(w);
321
- }
322
- if (wordSet.size > 0)
323
- upsertFileWords(db, filePath, [...wordSet].join(' '));
324
- refsBuilt++;
325
- if (refsBuilt % 500 === 0) {
326
- updateBar(85, `Building refs index... (${refsBuilt}/${fileRows.length})`);
327
- }
328
- }
341
+ });
342
+ tx();
329
343
  }
330
344
  // Phase 3: FTS (85-90%)
331
345
  // FTS5 is auto-created by schema triggers — no manual index creation needed
@@ -9,20 +9,28 @@
9
9
  import { pipeline } from '@huggingface/transformers';
10
10
  const MODEL_ID = 'Xenova/bge-small-en-v1.5';
11
11
  async function main() {
12
- // Load model
13
12
  const extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
14
13
  process.send({ type: 'ready' });
15
14
  // Process messages from parent
16
15
  process.on('message', async (msg) => {
17
16
  if (msg.type === 'embed') {
18
17
  const results = [];
19
- for (const item of msg.items) {
20
- try {
21
- const result = await extractor(item.text, { pooling: 'cls', normalize: true });
22
- results.push({ nodeId: item.nodeId, vec: Array.from(result.data) });
18
+ try {
19
+ const texts = msg.items.map((item) => item.text);
20
+ const batchResult = await extractor(texts, { pooling: 'cls', normalize: true });
21
+ const dims = batchResult.dims?.[1] ?? 384;
22
+ const flat = batchResult.data;
23
+ for (let i = 0; i < msg.items.length; i++) {
24
+ results.push({ nodeId: msg.items[i].nodeId, vec: Array.from(flat.subarray(i * dims, (i + 1) * dims)) });
23
25
  }
24
- catch {
25
- // Skip failed embeddings
26
+ }
27
+ catch {
28
+ for (const item of msg.items) {
29
+ try {
30
+ const result = await extractor(item.text, { pooling: 'cls', normalize: true });
31
+ results.push({ nodeId: item.nodeId, vec: Array.from(result.data) });
32
+ }
33
+ catch { /* skip */ }
26
34
  }
27
35
  }
28
36
  process.send({ type: 'results', results, batchId: msg.batchId });
@@ -21,9 +21,11 @@ export async function initNlEmbedder() {
21
21
  return loadPromise;
22
22
  loadPromise = (async () => {
23
23
  const { pipeline, env } = await import('@huggingface/transformers');
24
+ const os = await import('os');
25
+ const cpuCount = os.cpus().length;
24
26
  // Use all available CPU threads for ONNX inference
25
27
  if (env.backends?.onnx?.wasm) {
26
- env.backends.onnx.wasm.numThreads = Math.max(1, (await import('os')).cpus().length);
28
+ env.backends.onnx.wasm.numThreads = Math.max(1, cpuCount);
27
29
  }
28
30
  extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
29
31
  })();
@@ -44,14 +46,15 @@ export async function nlEmbed(text) {
44
46
  export async function nlEmbedBatch(texts) {
45
47
  if (!extractor)
46
48
  await initNlEmbedder();
47
- const BATCH = 32; // sub-batch size — balances throughput vs memory
49
+ const BATCH = 64;
48
50
  const results = [];
49
51
  for (let i = 0; i < texts.length; i += BATCH) {
50
52
  const batch = texts.slice(i, i + BATCH);
51
- // Process sub-batch transformers.js handles arrays
52
- const batchResults = await Promise.all(batch.map(text => extractor(text, { pooling: 'cls', normalize: true })));
53
- for (const result of batchResults) {
54
- results.push(Array.from(result.data));
53
+ const batchResult = await extractor(batch, { pooling: 'cls', normalize: true });
54
+ const dims = batchResult.dims?.[1] ?? 384;
55
+ const flat = batchResult.data;
56
+ for (let j = 0; j < batch.length; j++) {
57
+ results.push(Array.from(flat.subarray(j * dims, (j + 1) * dims)));
55
58
  }
56
59
  }
57
60
  return results;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zuvia-software-solutions/code-mapper",
3
- "version": "2.6.1",
3
+ "version": "2.6.3",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",