npm - @zuvia-software-solutions/code-mapper - Versions diffs - 2.6.1 → 2.6.3 - Mend

@zuvia-software-solutions/code-mapper 2.6.1 → 2.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/cli/analyze.js +55 -41
package/dist/core/embeddings/nl-embed-worker.js +15 -7
package/dist/core/embeddings/nl-embedder.js +9 -6
package/package.json +1 -1

package/dist/cli/analyze.js CHANGED Viewed

@@ -273,59 +273,73 @@ export const analyzeCommand = async (inputPath, options) => {
     recordPhase('refs');
     updateBar(85, 'Building refs index...');
     {
-        const { clearRefs, insertRefsBatch, clearFileWords, upsertFileWords } = await import('../core/db/adapter.js');
-        const fsRef = await import('fs/promises');
+        const { clearRefs, clearFileWords, upsertFileWords } = await import('../core/db/adapter.js');
+        const fsSync = await import('fs');
         clearRefs(db);
         clearFileWords(db);
-        // Scan all source files for identifier occurrences
         const STOP_WORDS = new Set(['the', 'and', 'for', 'from', 'with', 'this', 'that', 'have', 'has', 'not', 'are', 'was', 'were', 'been', 'being', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'does', 'did', 'let', 'var', 'const', 'new', 'return', 'function', 'class', 'import', 'export', 'default', 'void', 'null', 'undefined', 'true', 'false', 'else', 'case', 'break', 'continue', 'while', 'throw', 'catch', 'try', 'finally', 'async', 'await', 'yield', 'typeof', 'instanceof', 'delete', 'switch', 'interface', 'type', 'enum', 'extends', 'implements', 'static', 'private', 'public', 'protected', 'abstract', 'readonly', 'override', 'declare', 'module', 'namespace', 'require', 'string', 'number', 'boolean', 'object', 'any', 'never', 'unknown', 'symbol']);
         const SRC_EXTENSIONS = new Set(['.ts', '.tsx', '.js', '.jsx', '.py', '.go', '.rs', '.java', '.c', '.h', '.cpp', '.hpp', '.cs', '.rb', '.php', '.kt', '.swift', '.mts', '.mjs', '.cts', '.cjs']);
         const identRegex = /\b[a-zA-Z_]\w{2,}\b/g;
         const wordRegex = /\b[a-zA-Z]\w{2,}\b/g;
-        // Get all file paths from the nodes table
         const fileRows = db.prepare("SELECT DISTINCT filePath FROM nodes WHERE label = 'File'").all();
-        let refsBuilt = 0;
-        for (const { filePath } of fileRows) {
-            const ext = path.extname(filePath).toLowerCase();
-            if (!SRC_EXTENSIONS.has(ext))
-                continue;
-            let content;
-            try {
-                content = await fsRef.readFile(path.resolve(repoPath, filePath), 'utf-8');
-            }
-            catch {
-                continue;
-            }
-            // Build refs (identifier occurrences — skip language keywords)
-            const refs = [];
-            const lines = content.split('\n');
-            for (let lineIdx = 0; lineIdx < lines.length; lineIdx++) {
-                let match;
+        // Single transaction for all refs + file_words — avoids per-file transaction overhead
+        const refsStmt = db.prepare('INSERT INTO refs (symbol, filePath, line) VALUES (?, ?, ?)');
+        const tx = db.transaction(() => {
+            let refsBuilt = 0;
+            for (const { filePath } of fileRows) {
+                const ext = path.extname(filePath).toLowerCase();
+                if (!SRC_EXTENSIONS.has(ext))
+                    continue;
+                let content;
+                try {
+                    content = fsSync.readFileSync(path.resolve(repoPath, filePath), 'utf-8');
+                }
+                catch {
+                    continue;
+                }
+                // Pre-build line offset table for O(1) line lookups
+                const lineOffsets = [0];
+                for (let i = 0; i < content.length; i++) {
+                    if (content.charCodeAt(i) === 10)
+                        lineOffsets.push(i + 1);
+                }
+                const getLine = (offset) => {
+                    let lo = 0, hi = lineOffsets.length - 1;
+                    while (lo < hi) {
+                        const mid = (lo + hi + 1) >> 1;
+                        if (lineOffsets[mid] <= offset)
+                            lo = mid;
+                        else
+                            hi = mid - 1;
+                    }
+                    return lo;
+                };
+                // Refs: regex over whole content with binary-search line lookup
                 identRegex.lastIndex = 0;
-                while ((match = identRegex.exec(lines[lineIdx])) !== null) {
+                let match;
+                while ((match = identRegex.exec(content)) !== null) {
                     if (!STOP_WORDS.has(match[0].toLowerCase())) {
-                        refs.push({ symbol: match[0], filePath, line: lineIdx });
+                        refsStmt.run(match[0], filePath, getLine(match.index));
                     }
                 }
+                // File words for conceptual search
+                const wordSet = new Set();
+                wordRegex.lastIndex = 0;
+                let wMatch;
+                while ((wMatch = wordRegex.exec(content)) !== null) {
+                    const w = wMatch[0].toLowerCase();
+                    if (!STOP_WORDS.has(w))
+                        wordSet.add(w);
+                }
+                if (wordSet.size > 0)
+                    upsertFileWords(db, filePath, [...wordSet].join(' '));
+                refsBuilt++;
+                if (refsBuilt % 500 === 0) {
+                    updateBar(85, `Building refs index... (${refsBuilt}/${fileRows.length})`);
+                }
             }
-            if (refs.length > 0)
-                insertRefsBatch(db, refs);
-            // Build file_words (conceptual search)
-            const wordSet = new Set();
-            let wMatch;
-            wordRegex.lastIndex = 0;
-            while ((wMatch = wordRegex.exec(content)) !== null) {
-                const w = wMatch[0].toLowerCase();
-                if (!STOP_WORDS.has(w))
-                    wordSet.add(w);
-            }
-            if (wordSet.size > 0)
-                upsertFileWords(db, filePath, [...wordSet].join(' '));
-            refsBuilt++;
-            if (refsBuilt % 500 === 0) {
-                updateBar(85, `Building refs index... (${refsBuilt}/${fileRows.length})`);
-            }
-        }
+        });
+        tx();
     }
     // Phase 3: FTS (85-90%)
     // FTS5 is auto-created by schema triggers — no manual index creation needed

package/dist/core/embeddings/nl-embed-worker.js CHANGED Viewed

@@ -9,20 +9,28 @@
 import { pipeline } from '@huggingface/transformers';
 const MODEL_ID = 'Xenova/bge-small-en-v1.5';
 async function main() {
-    // Load model
     const extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
     process.send({ type: 'ready' });
     // Process messages from parent
     process.on('message', async (msg) => {
         if (msg.type === 'embed') {
             const results = [];
-            for (const item of msg.items) {
-                try {
-                    const result = await extractor(item.text, { pooling: 'cls', normalize: true });
-                    results.push({ nodeId: item.nodeId, vec: Array.from(result.data) });
+            try {
+                const texts = msg.items.map((item) => item.text);
+                const batchResult = await extractor(texts, { pooling: 'cls', normalize: true });
+                const dims = batchResult.dims?.[1] ?? 384;
+                const flat = batchResult.data;
+                for (let i = 0; i < msg.items.length; i++) {
+                    results.push({ nodeId: msg.items[i].nodeId, vec: Array.from(flat.subarray(i * dims, (i + 1) * dims)) });
                 }
-                catch {
-                    // Skip failed embeddings
+            }
+            catch {
+                for (const item of msg.items) {
+                    try {
+                        const result = await extractor(item.text, { pooling: 'cls', normalize: true });
+                        results.push({ nodeId: item.nodeId, vec: Array.from(result.data) });
+                    }
+                    catch { /* skip */ }
                 }
             }
             process.send({ type: 'results', results, batchId: msg.batchId });

package/dist/core/embeddings/nl-embedder.js CHANGED Viewed

@@ -21,9 +21,11 @@ export async function initNlEmbedder() {
         return loadPromise;
     loadPromise = (async () => {
         const { pipeline, env } = await import('@huggingface/transformers');
+        const os = await import('os');
+        const cpuCount = os.cpus().length;
         // Use all available CPU threads for ONNX inference
         if (env.backends?.onnx?.wasm) {
-            env.backends.onnx.wasm.numThreads = Math.max(1, (await import('os')).cpus().length);
+            env.backends.onnx.wasm.numThreads = Math.max(1, cpuCount);
         }
         extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
     })();
@@ -44,14 +46,15 @@ export async function nlEmbed(text) {
 export async function nlEmbedBatch(texts) {
     if (!extractor)
         await initNlEmbedder();
-    const BATCH = 32; // sub-batch size — balances throughput vs memory
+    const BATCH = 64;
     const results = [];
     for (let i = 0; i < texts.length; i += BATCH) {
         const batch = texts.slice(i, i + BATCH);
-        // Process sub-batch — transformers.js handles arrays
-        const batchResults = await Promise.all(batch.map(text => extractor(text, { pooling: 'cls', normalize: true })));
-        for (const result of batchResults) {
-            results.push(Array.from(result.data));
+        const batchResult = await extractor(batch, { pooling: 'cls', normalize: true });
+        const dims = batchResult.dims?.[1] ?? 384;
+        const flat = batchResult.data;
+        for (let j = 0; j < batch.length; j++) {
+            results.push(Array.from(flat.subarray(j * dims, (j + 1) * dims)));
         }
     }
     return results;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zuvia-software-solutions/code-mapper",
-  "version": "2.6.1",
+  "version": "2.6.3",
   "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
   "author": "Abhigyan Patwari",
   "license": "PolyForm-Noncommercial-1.0.0",