npm - @zuvia-software-solutions/code-mapper - Versions diffs - 2.6.3 → 2.6.5 - Mend

@zuvia-software-solutions/code-mapper 2.6.3 → 2.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/core/embeddings/nl-embed-worker.js +1 -1
package/dist/core/embeddings/nl-embedder.d.ts +1 -1
package/dist/core/embeddings/nl-embedder.js +25 -16
package/dist/core/incremental/refresh.js +93 -0
package/package.json +1 -1

package/dist/core/embeddings/nl-embed-worker.js CHANGED Viewed

@@ -9,7 +9,7 @@
 import { pipeline } from '@huggingface/transformers';
 const MODEL_ID = 'Xenova/bge-small-en-v1.5';
 async function main() {
-    const extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
+    const extractor = await pipeline('feature-extraction', MODEL_ID, { dtype: 'q8' });
     process.send({ type: 'ready' });
     // Process messages from parent
     process.on('message', async (msg) => {

package/dist/core/embeddings/nl-embedder.d.ts CHANGED Viewed

@@ -30,7 +30,7 @@ interface NlDocument {
     source: string;
     text: string;
 }
-/** Build NL documents from a node */
+/** Build NL documents from a node — keyword-dense, minimal tokens */
 export declare function extractNlTexts(node: NodeForNl): NlDocument[];
 /**
  * Build NL embeddings for all eligible nodes in the database.

package/dist/core/embeddings/nl-embedder.js CHANGED Viewed

@@ -27,7 +27,7 @@ export async function initNlEmbedder() {
         if (env.backends?.onnx?.wasm) {
             env.backends.onnx.wasm.numThreads = Math.max(1, cpuCount);
         }
-        extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
+        extractor = await pipeline('feature-extraction', MODEL_ID, { dtype: 'q8' });
     })();
     return loadPromise;
 }
@@ -147,11 +147,19 @@ function extractParamNames(content) {
         .map(p => expandIdentifier(p))
         .join(', ');
 }
-/** Build NL documents from a node */
+/** Strip noise tokens that waste tokenizer budget without adding semantic value */
+function condense(text) {
+    return text
+        .replace(/---[^-]*---/g, '') // section headers from comments
+        .replace(/[{}[\]()'",;:]/g, '') // punctuation
+        .replace(/\. /g, ' ') // sentence separators
+        .replace(/\s{2,}/g, ' ') // collapse whitespace
+        .trim();
+}
+/** Build NL documents from a node — keyword-dense, minimal tokens */
 export function extractNlTexts(node) {
     const docs = [];
-    const name = node.name;
-    const expandedName = expandIdentifier(name);
+    const expandedName = expandIdentifier(node.name);
     const dir = node.filePath.split('/').slice(-3, -1).join('/');
     // 1. Comment-based NL text (primary)
     const comment = extractFullComment(node.content);
@@ -159,22 +167,21 @@ export function extractNlTexts(node) {
         docs.push({
             nodeId: node.id,
             source: 'comment',
-            text: `${expandedName}: ${comment}. File: ${dir}`,
+            text: condense(`${expandedName} ${comment} ${dir}`),
         });
     }
-    // 2. Name + params + return type (always available)
+    // 2. Name + params (always available)
     const params = extractParamNames(node.content);
-    const parts = [expandedName];
-    if (params)
-        parts.push(`Parameters: ${params}`);
-    if (dir)
-        parts.push(`in ${dir}`);
     if (!comment) {
-        // Only add name-based doc if no comment (avoid duplication)
+        const parts = [expandedName];
+        if (params)
+            parts.push(params);
+        if (dir)
+            parts.push(dir);
         docs.push({
             nodeId: node.id,
             source: 'name',
-            text: parts.join('. '),
+            text: condense(parts.join(' ')),
         });
     }
     // 3. Enum/const values
@@ -184,7 +191,7 @@ export function extractNlTexts(node) {
             docs.push({
                 nodeId: node.id,
                 source: 'enum',
-                text: `${expandedName}: ${values}`,
+                text: condense(`${expandedName} ${values}`),
             });
         }
     }
@@ -274,8 +281,9 @@ export async function buildNlEmbeddings(db, onProgress) {
     // Find worker script path
     const thisDir = pathMod.dirname(fileURLToPath(import.meta.url));
     const workerScript = pathMod.join(thisDir, 'nl-embed-worker.js');
-    // Split work across workers
-    const ITEMS_PER_BATCH = 50;
+    // Split work across workers — larger batches reduce IPC round-trips
+    // and let the ONNX runtime amortize overhead across more items
+    const ITEMS_PER_BATCH = 256;
     let nextIdx = 0;
     let embedded = 0;
     const getNextBatch = () => {
@@ -432,3 +440,4 @@ export async function buildNlEmbeddings(db, onProgress) {
     }
     return { embedded, skipped, durationMs: Date.now() - t0 };
 }
+// touch

package/dist/core/incremental/refresh.js CHANGED Viewed

@@ -311,6 +311,99 @@ export async function refreshFiles(db, repoPath, dirtyFiles) {
         }
     }
     // FTS5 auto-updates via triggers — no manual rebuild needed
+    // Phase 5: Rebuild graph-level analyses (communities, processes, interface dispatch)
+    // These are cheap (<300ms) but critical — stale communities/processes mislead agents.
+    // Load full graph from SQLite, re-run analyses, write results back.
+    try {
+        const { createKnowledgeGraph } = await import('../graph/graph.js');
+        const { processCommunities } = await import('../ingestion/community-processor.js');
+        const { processProcesses } = await import('../ingestion/process-processor.js');
+        const { insertNodesBatch, insertEdgesBatch } = await import('../db/adapter.js');
+        const { toNodeId, toEdgeId } = await import('../db/schema.js');
+        const graph = createKnowledgeGraph();
+        // Load all non-community/process nodes and edges into in-memory graph
+        const allNodes = db.prepare('SELECT * FROM nodes WHERE label NOT IN (\'Community\', \'Process\')').all();
+        const allEdges = db.prepare('SELECT * FROM edges WHERE type NOT IN (\'MEMBER_OF\', \'STEP_IN_PROCESS\')').all();
+        for (const row of allNodes) {
+            graph.addNode({
+                id: toNodeId(row.id),
+                label: row.label,
+                properties: {
+                    name: row.name ?? '', filePath: row.filePath ?? '',
+                    startLine: row.startLine ?? undefined, endLine: row.endLine ?? undefined,
+                    isExported: Boolean(row.isExported),
+                    description: row.description ?? undefined,
+                    parameterCount: row.parameterCount ?? undefined,
+                    returnType: row.returnType ?? undefined,
+                },
+            });
+        }
+        for (const row of allEdges) {
+            graph.addRelationship({
+                id: toEdgeId(row.id),
+                sourceId: toNodeId(row.sourceId),
+                targetId: toNodeId(row.targetId),
+                type: row.type,
+                confidence: row.confidence ?? 1.0,
+                reason: row.reason ?? '',
+            });
+        }
+        // Delete old community/process data from SQLite
+        db.exec('BEGIN');
+        db.prepare("DELETE FROM edges WHERE type IN ('MEMBER_OF', 'STEP_IN_PROCESS')").run();
+        db.prepare("DELETE FROM nodes WHERE label IN ('Community', 'Process')").run();
+        // Re-run community detection + process detection
+        const communityResult = await processCommunities(graph, () => { });
+        const processResult = await processProcesses(graph, communityResult.memberships, () => { });
+        // Write new community/process nodes + edges back to SQLite
+        const newNodes = [];
+        const newEdges = [];
+        for (const node of graph.iterNodes()) {
+            if (node.label === 'Community' || node.label === 'Process') {
+                newNodes.push({
+                    id: node.id,
+                    label: node.label,
+                    name: node.properties.name ?? '',
+                    filePath: node.properties.filePath ?? '',
+                    heuristicLabel: node.properties.heuristicLabel ?? null,
+                    cohesion: node.properties.cohesion ?? null,
+                    symbolCount: node.properties.symbolCount ?? null,
+                    keywords: Array.isArray(node.properties.keywords) ? node.properties.keywords.join(', ') : node.properties.keywords ?? null,
+                    processType: node.properties.processType ?? null,
+                    stepCount: node.properties.stepCount ?? null,
+                    communities: node.properties.communities ?? null,
+                    entryPointId: node.properties.entryPointId ?? null,
+                    terminalId: node.properties.terminalId ?? null,
+                });
+            }
+        }
+        for (const rel of graph.iterRelationships()) {
+            if (rel.type === 'MEMBER_OF' || rel.type === 'STEP_IN_PROCESS') {
+                newEdges.push({
+                    id: rel.id,
+                    sourceId: rel.sourceId,
+                    targetId: rel.targetId,
+                    type: rel.type,
+                    confidence: rel.confidence,
+                    reason: rel.reason,
+                    step: rel.step,
+                });
+            }
+        }
+        if (newNodes.length > 0)
+            insertNodesBatch(db, newNodes);
+        if (newEdges.length > 0)
+            insertEdgesBatch(db, newEdges);
+        db.exec('COMMIT');
+        console.error(`Code Mapper: refresh Phase 5 — ${communityResult.communities.length} communities, ${processResult.stats.totalProcesses} processes rebuilt`);
+    }
+    catch (err) {
+        try {
+            db.exec('ROLLBACK');
+        }
+        catch { }
+        console.error(`Code Mapper: Phase 5 graph rebuild failed: ${err instanceof Error ? err.message : err}`);
+    }
     return {
         filesProcessed: filesToProcess.length, filesSkipped,
         nodesDeleted, nodesInserted, edgesInserted,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zuvia-software-solutions/code-mapper",
-  "version": "2.6.3",
+  "version": "2.6.5",
   "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
   "author": "Abhigyan Patwari",
   "license": "PolyForm-Noncommercial-1.0.0",