npm - @zuvia-software-solutions/code-mapper - Versions diffs - 2.4.1 → 2.5.0 - Mend

@zuvia-software-solutions/code-mapper 2.4.1 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/cli/analyze.d.ts +0 -1
package/dist/cli/analyze.js +11 -87
package/dist/cli/index.js +2 -2
package/dist/core/embeddings/index.d.ts +2 -3
package/dist/core/embeddings/index.js +2 -3
package/dist/core/embeddings/nl-embed-worker.d.ts +8 -0
package/dist/core/embeddings/nl-embed-worker.js +38 -0
package/dist/core/embeddings/nl-embedder.d.ts +1 -1
package/dist/core/embeddings/nl-embedder.js +199 -30
package/dist/core/incremental/refresh.js +18 -26
package/dist/mcp/local/local-backend.js +40 -27
package/dist/mcp/server.js +2 -2
package/dist/mcp/tools.js +1 -0
package/package.json +2 -5
package/models/jina-code-0.5b-mlx/config.json +0 -73
package/models/jina-code-0.5b-mlx/model.py +0 -127
package/models/mlx-embedder.py +0 -604

package/dist/cli/analyze.d.ts CHANGED Viewed

@@ -2,7 +2,6 @@
 export interface AnalyzeOptions {
     force?: boolean;
     embeddings?: boolean;
-    nlEmbeddings?: boolean;
     tsgo?: boolean;
     verbose?: boolean;
 }

package/dist/cli/analyze.js CHANGED Viewed

@@ -347,101 +347,25 @@ export const analyzeCommand = async (inputPath, options) => {
             catch { /* some may fail if node was removed, that's fine */ }
         }
     }
-    // Phase 4: Embeddings (90-98%)
+    // Phase 4: Embeddings — bge-small NL embeddings (CPU, Node.js, no Python)
+    // Extracts natural language from code (comments, names, enums, patterns)
+    // and embeds with bge-small-en-v1.5 (33M params, 384-dim, ~6ms/doc).
     const stats = getStats(db);
     let embeddingFailed = false;
     if (options?.embeddings) {
         recordPhase('embeddings');
-        updateBar(90, 'Generating embeddings...');
-        // Close DB so Python can write to it
-        closeDb(dbPath);
-        // Run Python embedder in batch mode — reads from SQLite, embeds, writes back.
-        // Zero IPC overhead: ~3x faster than Node↔Python JSON streaming.
-        const { spawn: spawnChild } = await import('child_process');
-        const { fileURLToPath } = await import('url');
-        const mlxScript = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..', 'models', 'mlx-embedder.py');
-        await new Promise((resolve) => {
-            // Use spawn (not execFile) — no internal buffer limit, streams only.
-            // execFile buffers all stdout in memory which causes OOM/kill on large codebases.
-            const proc = spawnChild('python3', [mlxScript, 'batch', dbPath, '--dims', '256', '--max-tokens', '2048'], {
-                stdio: ['ignore', 'pipe', 'pipe'],
-            });
-            let stderrBuf = '';
-            proc.stderr?.on('data', (chunk) => {
-                stderrBuf += chunk.toString();
-                if (stderrBuf.length > 10240)
-                    stderrBuf = stderrBuf.slice(-10240);
-            });
-            proc.on('close', (code) => {
-                if (code !== 0) {
-                    // Non-fatal: index is already saved, just embeddings failed
-                    console.error(`\n  Warning: Embedding failed (exit code ${code}). Index saved without embeddings.`);
-                    if (stderrBuf.trim())
-                        console.error(`  ${stderrBuf.trim().split('\n').slice(-3).join('\n  ')}`);
-                    embeddingFailed = true;
-                }
-                resolve();
-            });
-            proc.on('error', (err) => {
-                console.error(`\n  Warning: Embedding failed: ${err.message}. Index saved without embeddings.`);
-                embeddingFailed = true;
-                resolve();
-            });
-            // Stream progress from Python's JSON lines on stdout
-            let lineBuf = '';
-            proc.stdout?.on('data', (chunk) => {
-                lineBuf += chunk.toString();
-                const lines = lineBuf.split('\n');
-                lineBuf = lines.pop() || '';
-                for (const line of lines) {
-                    if (!line.trim())
-                        continue;
-                    try {
-                        const msg = JSON.parse(line);
-                        if (msg.phase === 'downloading' || msg.phase === 'converting') {
-                            updateBar(90, msg.message);
-                        }
-                        else if (msg.phase === 'loaded') {
-                            updateBar(91, `Model loaded (${msg.load_ms}ms)`);
-                        }
-                        else if (msg.phase === 'queried') {
-                            updateBar(92, `Found ${msg.nodes} embeddable nodes${msg.skipped_tests ? ` (${msg.skipped_tests} test files skipped)` : ''}`);
-                        }
-                        else if (msg.phase === 'prepared') {
-                            updateBar(93, `${msg.to_embed} to embed, ${msg.skipped} cached`);
-                        }
-                        else if (msg.phase === 'embedding') {
-                            const scaled = 93 + Math.round((msg.progress / 100) * 4);
-                            updateBar(scaled, `Embedding... ${msg.progress}% (${msg.embedded} written)`);
-                        }
-                        else if (msg.phase === 'embedded') {
-                            updateBar(97, `Embedded ${msg.count} nodes (${(msg.ms / 1000).toFixed(1)}s)`);
-                        }
-                        else if (msg.phase === 'done') {
-                            updateBar(98, `Embeddings complete (${msg.embedded} new, ${msg.skipped} cached)`);
-                        }
-                    }
-                    catch { }
-                }
-            });
-        });
-        // Reopen DB after Python is done
-        db = openDb(dbPath);
-    }
-    // Phase 4b: NL Embeddings (bge-small, CPU, Node.js)
-    if (options?.nlEmbeddings) {
-        recordPhase('nl-embeddings');
-        updateBar(95, 'Generating NL embeddings (bge-small)...');
+        updateBar(90, 'Generating embeddings (bge-small)...');
         const { buildNlEmbeddings } = await import('../core/embeddings/nl-embedder.js');
         try {
-            const nlResult = await buildNlEmbeddings(db, (current, total) => {
-                const pct = 95 + Math.round((current / Math.max(total, 1)) * 3);
-                updateBar(pct, `NL embeddings (${current}/${total})`, 'NL embeddings');
+            const result = await buildNlEmbeddings(db, (current, total) => {
+                const pct = 90 + Math.round((current / Math.max(total, 1)) * 8);
+                updateBar(pct, `Embeddings (${current}/${total})`, 'Embeddings');
             });
-            updateBar(98, `NL embeddings: ${nlResult.embedded} embedded, ${nlResult.skipped} cached (${(nlResult.durationMs / 1000).toFixed(1)}s)`);
+            updateBar(98, `Embeddings: ${result.embedded} embedded, ${result.skipped} cached (${(result.durationMs / 1000).toFixed(1)}s)`);
         }
         catch (err) {
-            console.error(`\n  Warning: NL embeddings failed: ${err instanceof Error ? err.message : err}`);
+            console.error(`\n  Warning: Embedding failed: ${err instanceof Error ? err.message : err}`);
+            embeddingFailed = true;
         }
     }
     // Phase 5: Finalize (98-100%)
@@ -535,7 +459,7 @@ export const analyzeCommand = async (inputPath, options) => {
         'search-text': 'Search text',
         fts: 'FTS indexing',
         'restore-embeddings': 'Restore embeddings',
-        embeddings: 'Embeddings (MLX)',
+        embeddings: 'Embeddings (bge-small)',
         finalize: 'Finalize & context',
         done: 'Done',
     };

package/dist/cli/index.js CHANGED Viewed

@@ -22,8 +22,8 @@ program
     .command('analyze [path]')
     .description('Index a repository (full analysis)')
     .option('-f, --force', 'Force full re-index even if up to date')
-    .option('--embeddings', 'Enable code embedding generation (Jina/MLX, GPU)', false)
-    .option('--nl-embeddings', 'Enable NL embedding generation (bge-small, CPU, recommended)', false)
+    .option('--embeddings', 'Generate semantic embeddings (bge-small, CPU, fast)')
+    .option('--no-embeddings', 'Skip embedding generation')
     .option('--no-tsgo', 'Skip tsgo LSP for call resolution (faster, less accurate)')
     .option('-v, --verbose', 'Enable verbose ingestion warnings (default: false)')
     .addHelpText('after', '\nEnvironment variables:\n  CODE_MAPPER_NO_GITIGNORE=1  Skip .gitignore parsing (still reads .code-mapperignore)')

package/dist/core/embeddings/index.d.ts CHANGED Viewed

@@ -1,5 +1,4 @@
-/** @file index.ts @description Barrel re-exports for the embedding pipeline system */
+/** @file Barrel re-exports for the embedding system (bge-small NL embedder) */
 export * from './types.js';
-export * from './embedder.js';
 export * from './text-generator.js';
-export * from './embedding-pipeline.js';
+export * from './nl-embedder.js';

package/dist/core/embeddings/index.js CHANGED Viewed

@@ -1,6 +1,5 @@
 // code-mapper/src/core/embeddings/index.ts
-/** @file index.ts @description Barrel re-exports for the embedding pipeline system */
+/** @file Barrel re-exports for the embedding system (bge-small NL embedder) */
 export * from './types.js';
-export * from './embedder.js';
 export * from './text-generator.js';
-export * from './embedding-pipeline.js';
+export * from './nl-embedder.js';

package/dist/core/embeddings/nl-embed-worker.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+/**
+ * @file Worker process for parallel NL embedding.
+ * Spawned by buildNlEmbeddings — loads bge-small independently,
+ * embeds texts received via IPC, sends vectors back.
+ *
+ * Same architecture as parallel tsgo: N processes, each with own model.
+ */
+export {};

package/dist/core/embeddings/nl-embed-worker.js ADDED Viewed

@@ -0,0 +1,38 @@
+// code-mapper/src/core/embeddings/nl-embed-worker.ts
+/**
+ * @file Worker process for parallel NL embedding.
+ * Spawned by buildNlEmbeddings — loads bge-small independently,
+ * embeds texts received via IPC, sends vectors back.
+ *
+ * Same architecture as parallel tsgo: N processes, each with own model.
+ */
+import { pipeline } from '@huggingface/transformers';
+const MODEL_ID = 'Xenova/bge-small-en-v1.5';
+async function main() {
+    // Load model
+    const extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
+    process.send({ type: 'ready' });
+    // Process messages from parent
+    process.on('message', async (msg) => {
+        if (msg.type === 'embed') {
+            const results = [];
+            for (const item of msg.items) {
+                try {
+                    const result = await extractor(item.text, { pooling: 'cls', normalize: true });
+                    results.push({ nodeId: item.nodeId, vec: Array.from(result.data) });
+                }
+                catch {
+                    // Skip failed embeddings
+                }
+            }
+            process.send({ type: 'results', results, batchId: msg.batchId });
+        }
+        else if (msg.type === 'exit') {
+            process.exit(0);
+        }
+    });
+}
+main().catch(err => {
+    console.error('NL embed worker failed:', err);
+    process.exit(1);
+});

package/dist/core/embeddings/nl-embedder.d.ts CHANGED Viewed

@@ -14,7 +14,7 @@ export declare function initNlEmbedder(): Promise<void>;
 export declare function isNlEmbedderReady(): boolean;
 /** Embed a single text, returns Float32Array */
 export declare function nlEmbed(text: string): Promise<number[]>;
-/** Embed a batch of texts */
+/** Embed a batch of texts (processes in sub-batches for memory efficiency) */
 export declare function nlEmbedBatch(texts: string[]): Promise<number[][]>;
 interface NodeForNl {
     id: string;

package/dist/core/embeddings/nl-embedder.js CHANGED Viewed

@@ -20,7 +20,11 @@ export async function initNlEmbedder() {
     if (loadPromise)
         return loadPromise;
     loadPromise = (async () => {
-        const { pipeline } = await import('@huggingface/transformers');
+        const { pipeline, env } = await import('@huggingface/transformers');
+        // Use all available CPU threads for ONNX inference
+        if (env.backends?.onnx?.wasm) {
+            env.backends.onnx.wasm.numThreads = Math.max(1, (await import('os')).cpus().length);
+        }
         extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
     })();
     return loadPromise;
@@ -36,14 +40,19 @@ export async function nlEmbed(text) {
     const result = await extractor(text, { pooling: 'cls', normalize: true });
     return Array.from(result.data);
 }
-/** Embed a batch of texts */
+/** Embed a batch of texts (processes in sub-batches for memory efficiency) */
 export async function nlEmbedBatch(texts) {
     if (!extractor)
         await initNlEmbedder();
+    const BATCH = 32; // sub-batch size — balances throughput vs memory
     const results = [];
-    for (const text of texts) {
-        const result = await extractor(text, { pooling: 'cls', normalize: true });
-        results.push(Array.from(result.data));
+    for (let i = 0; i < texts.length; i += BATCH) {
+        const batch = texts.slice(i, i + BATCH);
+        // Process sub-batch — transformers.js handles arrays
+        const batchResults = await Promise.all(batch.map(text => extractor(text, { pooling: 'cls', normalize: true })));
+        for (const result of batchResults) {
+            results.push(Array.from(result.data));
+        }
     }
     return results;
 }
@@ -197,9 +206,10 @@ export async function buildNlEmbeddings(db, onProgress) {
     const labels = ['Function', 'Class', 'Method', 'Interface', 'Const', 'Enum', 'TypeAlias', 'Namespace', 'Module', 'Struct'];
     const placeholders = labels.map(() => '?').join(',');
     const rows = db.prepare(`SELECT id, name, label, filePath, content, startLine, description FROM nodes WHERE label IN (${placeholders})`).all(...labels);
-    // Skip test files
-    const testPatterns = ['/test/', '/tests/', '/spec/', '/fixtures/', '/__tests__/', '/__mocks__/', '.test.', '.spec.', '_test.', '_spec.'];
-    const filteredRows = rows.filter(r => !testPatterns.some(p => r.filePath.includes(p)));
+    // NL embeddings include ALL files (including tests) — test names describe
+    // functionality in natural language which helps conceptual search.
+    // The bge-small model is fast enough (~6ms/doc) that the cost is trivial.
+    const filteredRows = rows;
     // Extract NL documents
     const allDocs = [];
     for (const row of filteredRows) {
@@ -210,10 +220,20 @@ export async function buildNlEmbeddings(db, onProgress) {
     if (allDocs.length === 0) {
         return { embedded: 0, skipped: 0, durationMs: Date.now() - t0 };
     }
+    // Deduplicate: one embedding per nodeId — prefer 'comment' source over 'name' or 'enum'
+    const SOURCE_PRIORITY = { comment: 0, enum: 1, name: 2 };
+    const bestByNode = new Map();
+    for (const doc of allDocs) {
+        const existing = bestByNode.get(doc.nodeId);
+        if (!existing || (SOURCE_PRIORITY[doc.source] ?? 9) < (SOURCE_PRIORITY[existing.source] ?? 9)) {
+            bestByNode.set(doc.nodeId, doc);
+        }
+    }
+    const uniqueDocs = [...bestByNode.values()];
     // Check existing hashes for skip detection
     const existingHashes = new Map();
     try {
-        const hashRows = db.prepare('SELECT nodeId, textHash FROM nl_embeddings WHERE textHash IS NOT NULL').all();
+        const hashRows = db.prepare('SELECT nodeId, textHash FROM embeddings WHERE textHash IS NOT NULL').all();
         for (const r of hashRows)
             existingHashes.set(r.nodeId + ':' + r.textHash, '1');
     }
@@ -221,7 +241,7 @@ export async function buildNlEmbeddings(db, onProgress) {
     // Filter to docs that need embedding
     const toEmbed = [];
     let skipped = 0;
-    for (const doc of allDocs) {
+    for (const doc of uniqueDocs) {
         const hash = md5(doc.text);
         if (existingHashes.has(doc.nodeId + ':' + hash)) {
             skipped++;
@@ -232,31 +252,180 @@ export async function buildNlEmbeddings(db, onProgress) {
     if (toEmbed.length === 0) {
         return { embedded: 0, skipped, durationMs: Date.now() - t0 };
     }
-    // Clear existing NL embeddings and rebuild
-    db.prepare('DELETE FROM nl_embeddings').run();
-    // Embed in batches and write to DB
-    const BATCH = 100;
-    const insertStmt = db.prepare('INSERT INTO nl_embeddings (nodeId, embedding, textHash, source, text) VALUES (?, ?, ?, ?, ?)');
+    // Clear existing embeddings and rebuild
+    db.prepare('DELETE FROM embeddings').run();
+    try {
+        db.prepare('DELETE FROM nl_embeddings').run();
+    }
+    catch { /* table may not exist */ }
+    // Parallel multi-process embedding — same architecture as tsgo
+    // Each worker loads its own bge-small model, embeds independently.
+    const os = await import('os');
+    const { fork } = await import('child_process');
+    const { fileURLToPath } = await import('url');
+    const pathMod = await import('path');
+    const cpuCount = os.cpus().length;
+    const maxByCore = Math.max(1, Math.floor(cpuCount * 0.75));
+    const maxByWorkload = Math.max(1, Math.floor(toEmbed.length / 50));
+    const workerCount = Math.min(maxByCore, maxByWorkload, 8); // cap at 8 for memory
+    // Find worker script path
+    const thisDir = pathMod.dirname(fileURLToPath(import.meta.url));
+    const workerScript = pathMod.join(thisDir, 'nl-embed-worker.js');
+    // Split work across workers
+    const ITEMS_PER_BATCH = 50;
+    let nextIdx = 0;
     let embedded = 0;
-    db.exec('BEGIN');
+    const getNextBatch = () => {
+        if (nextIdx >= toEmbed.length)
+            return null;
+        const batch = toEmbed.slice(nextIdx, nextIdx + ITEMS_PER_BATCH);
+        nextIdx += ITEMS_PER_BATCH;
+        return batch;
+    };
+    // Prepare DB statements
+    const insertStmt = db.prepare('INSERT OR REPLACE INTO embeddings (nodeId, embedding, textHash) VALUES (?, ?, ?)');
+    let nlInsertStmt = null;
     try {
-        for (let i = 0; i < toEmbed.length; i += BATCH) {
-            const batch = toEmbed.slice(i, i + BATCH);
-            const vecs = await nlEmbedBatch(batch.map(d => d.text));
-            for (let j = 0; j < batch.length; j++) {
-                const doc = batch[j];
-                const vec = vecs[j];
-                const blob = Buffer.from(new Float32Array(vec).buffer);
-                insertStmt.run(doc.nodeId, blob, doc.hash, doc.source, doc.text);
-                embedded++;
+        nlInsertStmt = db.prepare('INSERT INTO nl_embeddings (nodeId, embedding, textHash, source, text) VALUES (?, ?, ?, ?, ?)');
+    }
+    catch { /* nl_embeddings table may not exist */ }
+    // Track doc metadata for nl_embeddings text lookup
+    const docMap = new Map();
+    for (const doc of toEmbed)
+        docMap.set(doc.nodeId, { source: doc.source, text: doc.text, hash: doc.hash });
+    if (workerCount <= 1) {
+        // Single process — use in-process embedding (small workloads)
+        await initNlEmbedder();
+        db.exec('BEGIN');
+        try {
+            for (let i = 0; i < toEmbed.length; i += ITEMS_PER_BATCH) {
+                const batch = toEmbed.slice(i, i + ITEMS_PER_BATCH);
+                const vecs = await nlEmbedBatch(batch.map(d => d.text));
+                for (let j = 0; j < batch.length; j++) {
+                    const doc = batch[j];
+                    const vec = vecs[j];
+                    const blob = Buffer.from(new Float32Array(vec).buffer);
+                    insertStmt.run(doc.nodeId, blob, doc.hash);
+                    if (nlInsertStmt) {
+                        try {
+                            nlInsertStmt.run(doc.nodeId, blob, doc.hash, doc.source, doc.text);
+                        }
+                        catch { }
+                    }
+                    embedded++;
+                }
+                onProgress?.(Math.min(i + ITEMS_PER_BATCH, toEmbed.length), toEmbed.length);
             }
-            onProgress?.(Math.min(i + BATCH, toEmbed.length), toEmbed.length);
+            db.exec('COMMIT');
+        }
+        catch (err) {
+            db.exec('ROLLBACK');
+            throw err;
         }
-        db.exec('COMMIT');
     }
-    catch (err) {
-        db.exec('ROLLBACK');
-        throw err;
+    else {
+        // Multi-process: spawn N workers, dynamic dispatch
+        const workers = [];
+        const workerReady = [];
+        for (let i = 0; i < workerCount; i++) {
+            const worker = fork(workerScript, [], { stdio: ['pipe', 'pipe', 'pipe', 'ipc'] });
+            workers.push(worker);
+            workerReady.push(new Promise((resolve) => {
+                const handler = (msg) => {
+                    if (msg?.type === 'ready') {
+                        worker.removeListener('message', handler);
+                        resolve();
+                    }
+                };
+                worker.on('message', handler);
+                // Timeout: if worker doesn't ready in 30s, skip it
+                setTimeout(() => resolve(), 30000);
+            }));
+        }
+        // Wait for all workers to load model
+        await Promise.all(workerReady);
+        const activeWorkers = workers.filter(w => w.connected);
+        if (activeWorkers.length === 0) {
+            // Fallback to single process
+            await initNlEmbedder();
+            db.exec('BEGIN');
+            try {
+                for (let i = 0; i < toEmbed.length; i += ITEMS_PER_BATCH) {
+                    const batch = toEmbed.slice(i, i + ITEMS_PER_BATCH);
+                    const vecs = await nlEmbedBatch(batch.map(d => d.text));
+                    for (let j = 0; j < batch.length; j++) {
+                        const doc = batch[j];
+                        const blob = Buffer.from(new Float32Array(vecs[j]).buffer);
+                        insertStmt.run(doc.nodeId, blob, doc.hash);
+                        embedded++;
+                    }
+                    onProgress?.(Math.min(i + ITEMS_PER_BATCH, toEmbed.length), toEmbed.length);
+                }
+                db.exec('COMMIT');
+            }
+            catch (err) {
+                db.exec('ROLLBACK');
+                throw err;
+            }
+        }
+        else {
+            // Dynamic dispatch: each worker requests next batch when done
+            db.exec('BEGIN');
+            let batchId = 0;
+            const runWorker = (worker) => {
+                return new Promise((resolve) => {
+                    const sendNext = () => {
+                        const batch = getNextBatch();
+                        if (!batch) {
+                            worker.send({ type: 'exit' });
+                            resolve();
+                            return;
+                        }
+                        worker.send({
+                            type: 'embed',
+                            batchId: batchId++,
+                            items: batch.map(d => ({ nodeId: d.nodeId, text: d.text })),
+                        });
+                    };
+                    worker.on('message', (msg) => {
+                        if (msg?.type === 'results') {
+                            // Write results to DB
+                            for (const r of msg.results) {
+                                const blob = Buffer.from(new Float32Array(r.vec).buffer);
+                                const meta = docMap.get(r.nodeId);
+                                insertStmt.run(r.nodeId, blob, meta?.hash ?? '');
+                                if (nlInsertStmt && meta) {
+                                    try {
+                                        nlInsertStmt.run(r.nodeId, blob, meta.hash, meta.source, meta.text);
+                                    }
+                                    catch { }
+                                }
+                                embedded++;
+                            }
+                            onProgress?.(embedded, toEmbed.length);
+                            sendNext(); // request next batch
+                        }
+                    });
+                    worker.on('exit', () => resolve());
+                    sendNext(); // start first batch
+                });
+            };
+            try {
+                await Promise.all(activeWorkers.map(w => runWorker(w)));
+                db.exec('COMMIT');
+            }
+            catch (err) {
+                db.exec('ROLLBACK');
+                throw err;
+            }
+        }
+        // Cleanup workers
+        for (const w of workers) {
+            try {
+                w.kill();
+            }
+            catch { }
+        }
     }
     return { embedded, skipped, durationMs: Date.now() - t0 };
 }

package/dist/core/incremental/refresh.js CHANGED Viewed

@@ -492,46 +492,38 @@ export async function refreshEmbeddings(db, dirtyFiles, hasEmbeddings) {
         }
         if (newNodes.length === 0)
             return;
-        // Step 3: Enrich with graph context — same as the full analyze pipeline
-        // Lazy import to avoid circular dependency at module load time
-        const { fetchGraphContext, enrichTextWithGraphContext } = await import('../embeddings/embedding-pipeline.js');
-        const { generateEmbeddingText } = await import('../embeddings/text-generator.js');
-        const { initEmbedder, embedBatch, embeddingToArray } = await import('../embeddings/embedder.js');
-        const graphContext = fetchGraphContext(db, newNodes);
-        // Step 4: Generate enriched text + hash for skip detection
+        // Step 3: Extract NL text and embed with bge-small (same model as full analyze)
+        const { extractNlTexts, initNlEmbedder, nlEmbed } = await import('../embeddings/nl-embedder.js');
         const { createHash } = await import('crypto');
         const { getEmbeddingHashes } = await import('../db/adapter.js');
         const existingHashes = getEmbeddingHashes(db);
+        await initNlEmbedder();
         const toEmbed = [];
         for (const node of newNodes) {
-            let text = generateEmbeddingText(node);
-            const ctx = graphContext.get(node.id);
-            if (ctx) {
-                text = enrichTextWithGraphContext(text, ctx);
-            }
-            const hash = createHash('md5').update(text).digest('hex');
-            // Skip if hash unchanged (content + graph context identical)
+            const nlDocs = extractNlTexts({
+                id: node.id, name: node.name, label: node.label,
+                filePath: node.filePath, content: node.content || '',
+                startLine: node.startLine ?? null, description: node.description || '',
+            });
+            // Pick best doc (prefer comment over name)
+            const best = nlDocs.find(d => d.source === 'comment') || nlDocs[0];
+            if (!best)
+                continue;
+            const hash = createHash('md5').update(best.text).digest('hex');
             if (existingHashes.get(node.id) === hash)
                 continue;
-            toEmbed.push({ node, text, hash });
+            toEmbed.push({ nodeId: node.id, text: best.text, hash, source: best.source });
         }
         if (toEmbed.length === 0) {
             console.error(`Code Mapper: All ${newNodes.length} node(s) unchanged (hash skip)`);
             return;
         }
         console.error(`Code Mapper: Embedding ${toEmbed.length}/${newNodes.length} node(s) (${newNodes.length - toEmbed.length} unchanged)`);
-        // Step 5: Ensure embedder is ready
-        await initEmbedder();
-        // Step 6: Batch embed only changed nodes
-        const embeddings = await embedBatch(toEmbed.map(e => e.text));
-        // Step 7: Insert with hashes
+        // Step 4: Embed and insert
         const items = [];
-        for (let i = 0; i < toEmbed.length; i++) {
-            const entry = toEmbed[i];
-            const emb = embeddings[i];
-            if (entry?.node && emb) {
-                items.push({ nodeId: toNodeId(entry.node.id), embedding: embeddingToArray(emb), textHash: entry.hash });
-            }
+        for (const entry of toEmbed) {
+            const vec = await nlEmbed(entry.text);
+            items.push({ nodeId: toNodeId(entry.nodeId), embedding: vec, textHash: entry.hash });
         }
         insertEmbeddingsBatch(db, items);
         console.error(`Code Mapper: Embedded ${items.length} node(s) incrementally`);