npm - @zuvia-software-solutions/code-mapper - Versions diffs - 2.6.3 → 2.6.4 - Mend

@zuvia-software-solutions/code-mapper 2.6.3 → 2.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/core/embeddings/nl-embed-worker.js +1 -1
package/dist/core/embeddings/nl-embedder.d.ts +1 -1
package/dist/core/embeddings/nl-embedder.js +24 -16
package/package.json +1 -1

package/dist/core/embeddings/nl-embed-worker.js CHANGED Viewed

@@ -9,7 +9,7 @@
 import { pipeline } from '@huggingface/transformers';
 const MODEL_ID = 'Xenova/bge-small-en-v1.5';
 async function main() {
-    const extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
+    const extractor = await pipeline('feature-extraction', MODEL_ID, { dtype: 'q8' });
     process.send({ type: 'ready' });
     // Process messages from parent
     process.on('message', async (msg) => {

package/dist/core/embeddings/nl-embedder.d.ts CHANGED Viewed

@@ -30,7 +30,7 @@ interface NlDocument {
     source: string;
     text: string;
 }
-/** Build NL documents from a node */
+/** Build NL documents from a node — keyword-dense, minimal tokens */
 export declare function extractNlTexts(node: NodeForNl): NlDocument[];
 /**
  * Build NL embeddings for all eligible nodes in the database.

package/dist/core/embeddings/nl-embedder.js CHANGED Viewed

@@ -27,7 +27,7 @@ export async function initNlEmbedder() {
         if (env.backends?.onnx?.wasm) {
             env.backends.onnx.wasm.numThreads = Math.max(1, cpuCount);
         }
-        extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
+        extractor = await pipeline('feature-extraction', MODEL_ID, { dtype: 'q8' });
     })();
     return loadPromise;
 }
@@ -147,11 +147,19 @@ function extractParamNames(content) {
         .map(p => expandIdentifier(p))
         .join(', ');
 }
-/** Build NL documents from a node */
+/** Strip noise tokens that waste tokenizer budget without adding semantic value */
+function condense(text) {
+    return text
+        .replace(/---[^-]*---/g, '') // section headers from comments
+        .replace(/[{}[\]()'",;:]/g, '') // punctuation
+        .replace(/\. /g, ' ') // sentence separators
+        .replace(/\s{2,}/g, ' ') // collapse whitespace
+        .trim();
+}
+/** Build NL documents from a node — keyword-dense, minimal tokens */
 export function extractNlTexts(node) {
     const docs = [];
-    const name = node.name;
-    const expandedName = expandIdentifier(name);
+    const expandedName = expandIdentifier(node.name);
     const dir = node.filePath.split('/').slice(-3, -1).join('/');
     // 1. Comment-based NL text (primary)
     const comment = extractFullComment(node.content);
@@ -159,22 +167,21 @@ export function extractNlTexts(node) {
         docs.push({
             nodeId: node.id,
             source: 'comment',
-            text: `${expandedName}: ${comment}. File: ${dir}`,
+            text: condense(`${expandedName} ${comment} ${dir}`),
         });
     }
-    // 2. Name + params + return type (always available)
+    // 2. Name + params (always available)
     const params = extractParamNames(node.content);
-    const parts = [expandedName];
-    if (params)
-        parts.push(`Parameters: ${params}`);
-    if (dir)
-        parts.push(`in ${dir}`);
     if (!comment) {
-        // Only add name-based doc if no comment (avoid duplication)
+        const parts = [expandedName];
+        if (params)
+            parts.push(params);
+        if (dir)
+            parts.push(dir);
         docs.push({
             nodeId: node.id,
             source: 'name',
-            text: parts.join('. '),
+            text: condense(parts.join(' ')),
         });
     }
     // 3. Enum/const values
@@ -184,7 +191,7 @@ export function extractNlTexts(node) {
             docs.push({
                 nodeId: node.id,
                 source: 'enum',
-                text: `${expandedName}: ${values}`,
+                text: condense(`${expandedName} ${values}`),
             });
         }
     }
@@ -274,8 +281,9 @@ export async function buildNlEmbeddings(db, onProgress) {
     // Find worker script path
     const thisDir = pathMod.dirname(fileURLToPath(import.meta.url));
     const workerScript = pathMod.join(thisDir, 'nl-embed-worker.js');
-    // Split work across workers
-    const ITEMS_PER_BATCH = 50;
+    // Split work across workers — larger batches reduce IPC round-trips
+    // and let the ONNX runtime amortize overhead across more items
+    const ITEMS_PER_BATCH = 256;
     let nextIdx = 0;
     let embedded = 0;
     const getNextBatch = () => {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zuvia-software-solutions/code-mapper",
-  "version": "2.6.3",
+  "version": "2.6.4",
   "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
   "author": "Abhigyan Patwari",
   "license": "PolyForm-Noncommercial-1.0.0",