npm - @zuvia-software-solutions/code-mapper - Versions diffs - 1.4.0 - Mend

@zuvia-software-solutions/code-mapper 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (213) hide show

package/README.md +215 -0
package/dist/cli/ai-context.d.ts +19 -0
package/dist/cli/ai-context.js +168 -0
package/dist/cli/analyze.d.ts +7 -0
package/dist/cli/analyze.js +325 -0
package/dist/cli/augment.d.ts +7 -0
package/dist/cli/augment.js +27 -0
package/dist/cli/clean.d.ts +5 -0
package/dist/cli/clean.js +56 -0
package/dist/cli/eval-server.d.ts +25 -0
package/dist/cli/eval-server.js +365 -0
package/dist/cli/index.d.ts +6 -0
package/dist/cli/index.js +102 -0
package/dist/cli/lazy-action.d.ts +6 -0
package/dist/cli/lazy-action.js +19 -0
package/dist/cli/list.d.ts +2 -0
package/dist/cli/list.js +27 -0
package/dist/cli/mcp.d.ts +8 -0
package/dist/cli/mcp.js +35 -0
package/dist/cli/refresh.d.ts +12 -0
package/dist/cli/refresh.js +165 -0
package/dist/cli/serve.d.ts +5 -0
package/dist/cli/serve.js +8 -0
package/dist/cli/setup.d.ts +6 -0
package/dist/cli/setup.js +218 -0
package/dist/cli/status.d.ts +2 -0
package/dist/cli/status.js +33 -0
package/dist/cli/tool.d.ts +28 -0
package/dist/cli/tool.js +87 -0
package/dist/config/ignore-service.d.ts +32 -0
package/dist/config/ignore-service.js +282 -0
package/dist/config/supported-languages.d.ts +23 -0
package/dist/config/supported-languages.js +52 -0
package/dist/core/augmentation/engine.d.ts +22 -0
package/dist/core/augmentation/engine.js +232 -0
package/dist/core/embeddings/embedder.d.ts +35 -0
package/dist/core/embeddings/embedder.js +171 -0
package/dist/core/embeddings/embedding-pipeline.d.ts +41 -0
package/dist/core/embeddings/embedding-pipeline.js +402 -0
package/dist/core/embeddings/index.d.ts +5 -0
package/dist/core/embeddings/index.js +6 -0
package/dist/core/embeddings/text-generator.d.ts +20 -0
package/dist/core/embeddings/text-generator.js +159 -0
package/dist/core/embeddings/types.d.ts +60 -0
package/dist/core/embeddings/types.js +23 -0
package/dist/core/graph/graph.d.ts +4 -0
package/dist/core/graph/graph.js +65 -0
package/dist/core/graph/types.d.ts +69 -0
package/dist/core/graph/types.js +3 -0
package/dist/core/incremental/child-process.d.ts +8 -0
package/dist/core/incremental/child-process.js +649 -0
package/dist/core/incremental/refresh-coordinator.d.ts +32 -0
package/dist/core/incremental/refresh-coordinator.js +147 -0
package/dist/core/incremental/types.d.ts +78 -0
package/dist/core/incremental/types.js +153 -0
package/dist/core/incremental/watcher.d.ts +63 -0
package/dist/core/incremental/watcher.js +338 -0
package/dist/core/ingestion/ast-cache.d.ts +12 -0
package/dist/core/ingestion/ast-cache.js +34 -0
package/dist/core/ingestion/call-processor.d.ts +34 -0
package/dist/core/ingestion/call-processor.js +937 -0
package/dist/core/ingestion/call-routing.d.ts +40 -0
package/dist/core/ingestion/call-routing.js +97 -0
package/dist/core/ingestion/cluster-enricher.d.ts +30 -0
package/dist/core/ingestion/cluster-enricher.js +151 -0
package/dist/core/ingestion/community-processor.d.ts +26 -0
package/dist/core/ingestion/community-processor.js +272 -0
package/dist/core/ingestion/constants.d.ts +5 -0
package/dist/core/ingestion/constants.js +8 -0
package/dist/core/ingestion/entry-point-scoring.d.ts +23 -0
package/dist/core/ingestion/entry-point-scoring.js +317 -0
package/dist/core/ingestion/export-detection.d.ts +11 -0
package/dist/core/ingestion/export-detection.js +203 -0
package/dist/core/ingestion/filesystem-walker.d.ts +18 -0
package/dist/core/ingestion/filesystem-walker.js +64 -0
package/dist/core/ingestion/framework-detection.d.ts +42 -0
package/dist/core/ingestion/framework-detection.js +405 -0
package/dist/core/ingestion/heritage-processor.d.ts +15 -0
package/dist/core/ingestion/heritage-processor.js +237 -0
package/dist/core/ingestion/import-processor.d.ts +31 -0
package/dist/core/ingestion/import-processor.js +416 -0
package/dist/core/ingestion/language-config.d.ts +32 -0
package/dist/core/ingestion/language-config.js +161 -0
package/dist/core/ingestion/mro-processor.d.ts +32 -0
package/dist/core/ingestion/mro-processor.js +343 -0
package/dist/core/ingestion/named-binding-extraction.d.ts +51 -0
package/dist/core/ingestion/named-binding-extraction.js +343 -0
package/dist/core/ingestion/parsing-processor.d.ts +20 -0
package/dist/core/ingestion/parsing-processor.js +282 -0
package/dist/core/ingestion/pipeline.d.ts +3 -0
package/dist/core/ingestion/pipeline.js +416 -0
package/dist/core/ingestion/process-processor.d.ts +42 -0
package/dist/core/ingestion/process-processor.js +357 -0
package/dist/core/ingestion/resolution-context.d.ts +40 -0
package/dist/core/ingestion/resolution-context.js +171 -0
package/dist/core/ingestion/resolvers/csharp.d.ts +10 -0
package/dist/core/ingestion/resolvers/csharp.js +101 -0
package/dist/core/ingestion/resolvers/go.d.ts +8 -0
package/dist/core/ingestion/resolvers/go.js +33 -0
package/dist/core/ingestion/resolvers/index.d.ts +14 -0
package/dist/core/ingestion/resolvers/index.js +10 -0
package/dist/core/ingestion/resolvers/jvm.d.ts +9 -0
package/dist/core/ingestion/resolvers/jvm.js +74 -0
package/dist/core/ingestion/resolvers/php.d.ts +7 -0
package/dist/core/ingestion/resolvers/php.js +30 -0
package/dist/core/ingestion/resolvers/ruby.d.ts +9 -0
package/dist/core/ingestion/resolvers/ruby.js +13 -0
package/dist/core/ingestion/resolvers/rust.d.ts +5 -0
package/dist/core/ingestion/resolvers/rust.js +62 -0
package/dist/core/ingestion/resolvers/standard.d.ts +16 -0
package/dist/core/ingestion/resolvers/standard.js +144 -0
package/dist/core/ingestion/resolvers/utils.d.ts +18 -0
package/dist/core/ingestion/resolvers/utils.js +113 -0
package/dist/core/ingestion/structure-processor.d.ts +4 -0
package/dist/core/ingestion/structure-processor.js +39 -0
package/dist/core/ingestion/symbol-table.d.ts +34 -0
package/dist/core/ingestion/symbol-table.js +48 -0
package/dist/core/ingestion/tree-sitter-queries.d.ts +20 -0
package/dist/core/ingestion/tree-sitter-queries.js +691 -0
package/dist/core/ingestion/type-env.d.ts +52 -0
package/dist/core/ingestion/type-env.js +349 -0
package/dist/core/ingestion/type-extractors/c-cpp.d.ts +4 -0
package/dist/core/ingestion/type-extractors/c-cpp.js +214 -0
package/dist/core/ingestion/type-extractors/csharp.d.ts +4 -0
package/dist/core/ingestion/type-extractors/csharp.js +224 -0
package/dist/core/ingestion/type-extractors/go.d.ts +4 -0
package/dist/core/ingestion/type-extractors/go.js +261 -0
package/dist/core/ingestion/type-extractors/index.d.ts +20 -0
package/dist/core/ingestion/type-extractors/index.js +30 -0
package/dist/core/ingestion/type-extractors/jvm.d.ts +5 -0
package/dist/core/ingestion/type-extractors/jvm.js +386 -0
package/dist/core/ingestion/type-extractors/php.d.ts +4 -0
package/dist/core/ingestion/type-extractors/php.js +280 -0
package/dist/core/ingestion/type-extractors/python.d.ts +4 -0
package/dist/core/ingestion/type-extractors/python.js +175 -0
package/dist/core/ingestion/type-extractors/ruby.d.ts +12 -0
package/dist/core/ingestion/type-extractors/ruby.js +218 -0
package/dist/core/ingestion/type-extractors/rust.d.ts +4 -0
package/dist/core/ingestion/type-extractors/rust.js +290 -0
package/dist/core/ingestion/type-extractors/shared.d.ts +81 -0
package/dist/core/ingestion/type-extractors/shared.js +322 -0
package/dist/core/ingestion/type-extractors/swift.d.ts +4 -0
package/dist/core/ingestion/type-extractors/swift.js +140 -0
package/dist/core/ingestion/type-extractors/types.d.ts +111 -0
package/dist/core/ingestion/type-extractors/types.js +4 -0
package/dist/core/ingestion/type-extractors/typescript.d.ts +4 -0
package/dist/core/ingestion/type-extractors/typescript.js +227 -0
package/dist/core/ingestion/utils.d.ts +73 -0
package/dist/core/ingestion/utils.js +992 -0
package/dist/core/ingestion/workers/parse-worker.d.ts +99 -0
package/dist/core/ingestion/workers/parse-worker.js +1055 -0
package/dist/core/ingestion/workers/worker-pool.d.ts +15 -0
package/dist/core/ingestion/workers/worker-pool.js +123 -0
package/dist/core/lbug/csv-generator.d.ts +28 -0
package/dist/core/lbug/csv-generator.js +355 -0
package/dist/core/lbug/lbug-adapter.d.ts +96 -0
package/dist/core/lbug/lbug-adapter.js +753 -0
package/dist/core/lbug/schema.d.ts +46 -0
package/dist/core/lbug/schema.js +402 -0
package/dist/core/search/bm25-index.d.ts +20 -0
package/dist/core/search/bm25-index.js +123 -0
package/dist/core/search/hybrid-search.d.ts +32 -0
package/dist/core/search/hybrid-search.js +131 -0
package/dist/core/search/query-cache.d.ts +18 -0
package/dist/core/search/query-cache.js +47 -0
package/dist/core/search/query-expansion.d.ts +19 -0
package/dist/core/search/query-expansion.js +75 -0
package/dist/core/search/reranker.d.ts +29 -0
package/dist/core/search/reranker.js +122 -0
package/dist/core/search/types.d.ts +154 -0
package/dist/core/search/types.js +51 -0
package/dist/core/semantic/tsgo-service.d.ts +67 -0
package/dist/core/semantic/tsgo-service.js +355 -0
package/dist/core/tree-sitter/parser-loader.d.ts +12 -0
package/dist/core/tree-sitter/parser-loader.js +71 -0
package/dist/lib/memory-guard.d.ts +35 -0
package/dist/lib/memory-guard.js +70 -0
package/dist/lib/utils.d.ts +3 -0
package/dist/lib/utils.js +6 -0
package/dist/mcp/compatible-stdio-transport.d.ts +32 -0
package/dist/mcp/compatible-stdio-transport.js +209 -0
package/dist/mcp/core/embedder.d.ts +24 -0
package/dist/mcp/core/embedder.js +168 -0
package/dist/mcp/core/lbug-adapter.d.ts +29 -0
package/dist/mcp/core/lbug-adapter.js +330 -0
package/dist/mcp/local/local-backend.d.ts +188 -0
package/dist/mcp/local/local-backend.js +2759 -0
package/dist/mcp/resources.d.ts +22 -0
package/dist/mcp/resources.js +379 -0
package/dist/mcp/server.d.ts +10 -0
package/dist/mcp/server.js +217 -0
package/dist/mcp/staleness.d.ts +10 -0
package/dist/mcp/staleness.js +25 -0
package/dist/mcp/tools.d.ts +21 -0
package/dist/mcp/tools.js +202 -0
package/dist/server/api.d.ts +5 -0
package/dist/server/api.js +340 -0
package/dist/server/mcp-http.d.ts +7 -0
package/dist/server/mcp-http.js +95 -0
package/dist/storage/git.d.ts +6 -0
package/dist/storage/git.js +35 -0
package/dist/storage/repo-manager.d.ts +87 -0
package/dist/storage/repo-manager.js +249 -0
package/dist/types/pipeline.d.ts +35 -0
package/dist/types/pipeline.js +20 -0
package/hooks/claude/code-mapper-hook.cjs +238 -0
package/hooks/claude/pre-tool-use.sh +79 -0
package/hooks/claude/session-start.sh +42 -0
package/models/mlx-embedder.py +185 -0
package/package.json +100 -0
package/scripts/patch-tree-sitter-swift.cjs +74 -0
package/vendor/leiden/index.cjs +355 -0
package/vendor/leiden/utils.cjs +392 -0

package/dist/core/embeddings/embedder.js ADDED Viewed

@@ -0,0 +1,171 @@
+// code-mapper/src/core/embeddings/embedder.ts
+/**
+ * @file embedder.ts
+ * @description MLX-accelerated code embedder via Python subprocess
+ *
+ * Replaces the previous ONNX/transformers.js embedder with Jina Code 1.5B
+ * running on Apple Silicon Metal via MLX. Fail-fast — no fallback.
+ *
+ * Model: jinaai/jina-code-embeddings-1.5b-mlx (1.54B params, 1536 dims, 32K context)
+ * Matryoshka truncation to 256 dims for optimal speed/quality tradeoff
+ */
+import { spawn, execFileSync } from 'child_process';
+import path from 'path';
+import { fileURLToPath } from 'url';
+import { DEFAULT_EMBEDDING_CONFIG } from './types.js';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+// Path to MLX embedder script (relative to compiled dist/)
+const MLX_SCRIPT = path.resolve(__dirname, '..', '..', '..', 'models', 'mlx-embedder.py');
+// Singleton subprocess
+let mlxProcess = null;
+let pendingResolve = null;
+let pendingReject = null;
+let lineBuffer = '';
+let ready = false;
+/** Get the current inference device */
+export const getCurrentDevice = () => ready ? 'mlx-metal' : null;
+function ensureProcess() {
+    if (mlxProcess && !mlxProcess.killed)
+        return mlxProcess;
+    // Check prerequisites
+    try {
+        execFileSync('python3', ['-c', 'import mlx; import tokenizers'], {
+            timeout: 5000,
+            stdio: ['pipe', 'pipe', 'pipe'],
+        });
+    }
+    catch {
+        throw new Error('MLX embedder requires Python 3 + MLX on Apple Silicon.\n' +
+            'Install: pip3 install mlx tokenizers huggingface_hub\n' +
+            'The embedding model will download automatically on first use (~3GB).');
+    }
+    mlxProcess = spawn('python3', [MLX_SCRIPT], {
+        stdio: ['pipe', 'pipe', 'pipe'],
+        env: { ...process.env, TOKENIZERS_PARALLELISM: 'false' },
+    });
+    lineBuffer = '';
+    mlxProcess.stdout.on('data', (chunk) => {
+        lineBuffer += chunk.toString();
+        const lines = lineBuffer.split('\n');
+        lineBuffer = lines.pop() || '';
+        for (const line of lines) {
+            if (!line.trim())
+                continue;
+            try {
+                const msg = JSON.parse(line);
+                if (msg.status === 'ready' && !ready) {
+                    ready = true;
+                    console.error(`Code Mapper: MLX embedder ready (${msg.device}, loaded in ${msg.load_ms}ms)`);
+                }
+                if (pendingResolve) {
+                    const resolve = pendingResolve;
+                    pendingResolve = null;
+                    pendingReject = null;
+                    resolve(msg);
+                }
+            }
+            catch {
+                // Non-JSON output — ignore
+            }
+        }
+    });
+    mlxProcess.stderr.on('data', (chunk) => {
+        // Forward stderr for debugging
+        const msg = chunk.toString().trim();
+        if (msg)
+            console.error(`[mlx-embedder] ${msg}`);
+    });
+    mlxProcess.on('exit', (code) => {
+        ready = false;
+        mlxProcess = null;
+        if (pendingReject) {
+            const reject = pendingReject;
+            pendingResolve = null;
+            pendingReject = null;
+            reject(new Error(`MLX embedder exited with code ${code}`));
+        }
+    });
+    return mlxProcess;
+}
+function sendAndReceive(request) {
+    return new Promise((resolve, reject) => {
+        const proc = ensureProcess();
+        pendingResolve = resolve;
+        pendingReject = reject;
+        proc.stdin.write(JSON.stringify(request) + '\n');
+    });
+}
+/**
+ * Initialize the MLX embedder (spawns Python subprocess, loads model)
+ */
+export const initEmbedder = async (_onProgress, _config = {}) => {
+    if (ready)
+        return;
+    ensureProcess();
+    // Wait for the "ready" message from the Python process
+    const msg = await sendAndReceive({ cmd: 'ping' });
+    if (msg.error) {
+        throw new Error(`MLX embedder failed: ${msg.error}`);
+    }
+    return msg;
+};
+/** Check if the embedder is initialized and ready */
+export const isEmbedderReady = () => ready;
+/** Get the embedder instance — not applicable for MLX, returns null */
+export const getEmbedder = () => {
+    if (!ready)
+        throw new Error('MLX embedder not initialized. Call initEmbedder() first.');
+    return null; // No JS-side instance — inference happens in Python
+};
+/**
+ * Embed a single text string
+ */
+export const embedText = async (text) => {
+    const result = await sendAndReceive({
+        texts: [text],
+        task: 'nl2code',
+        type: 'passage',
+        dims: DEFAULT_EMBEDDING_CONFIG.dimensions,
+    });
+    if (result.error)
+        throw new Error(`Embedding failed: ${result.error}`);
+    return new Float32Array(result.embeddings[0]);
+};
+/**
+ * Embed multiple texts in a single batch
+ */
+export const embedBatch = async (texts) => {
+    if (texts.length === 0)
+        return [];
+    const result = await sendAndReceive({
+        texts,
+        task: 'nl2code',
+        type: 'passage',
+        dims: DEFAULT_EMBEDDING_CONFIG.dimensions,
+    });
+    if (result.error)
+        throw new Error(`Batch embedding failed: ${result.error}`);
+    return result.embeddings.map((e) => new Float32Array(e));
+};
+/** Convert Float32Array to number[] for LadybugDB storage */
+export const embeddingToArray = (embedding) => {
+    return Array.from(embedding);
+};
+/** Dispose the embedder subprocess */
+export const disposeEmbedder = async () => {
+    if (mlxProcess && !mlxProcess.killed) {
+        try {
+            mlxProcess.stdin.write(JSON.stringify({ cmd: 'quit' }) + '\n');
+            // Give it a moment to exit gracefully
+            await new Promise(resolve => setTimeout(resolve, 500));
+        }
+        catch { }
+        try {
+            mlxProcess.kill();
+        }
+        catch { }
+        mlxProcess = null;
+    }
+    ready = false;
+};

package/dist/core/embeddings/embedding-pipeline.d.ts ADDED Viewed

@@ -0,0 +1,41 @@
+/**
+ * @file embedding-pipeline.ts
+ * @description Orchestrates the background embedding process:
+ * 1) Query embeddable nodes from LadybugDB
+ * 2) Generate text representations
+ * 3) Batch embed using transformers.js
+ * 4) Store embeddings in LadybugDB
+ * 5) Create vector index for semantic search
+ */
+import { type EmbeddingProgress, type EmbeddingConfig, type SemanticSearchResult } from './types.js';
+/** Progress callback type */
+export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
+/**
+ * Run the full embedding pipeline (load model, embed nodes, create index)
+ * @param executeQuery - Execute Cypher queries against LadybugDB
+ * @param executeWithReusedStatement - Execute with reused prepared statement
+ * @param onProgress - Progress callback
+ * @param config - Configuration override
+ * @param skipNodeIds - Node IDs that already have embeddings (incremental mode)
+ */
+export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>) => Promise<void>;
+/**
+ * Perform semantic search via the CodeEmbedding vector index
+ * @param executeQuery - Execute Cypher queries
+ * @param query - Search query text
+ * @param k - Number of results (default: 10)
+ * @param maxDistance - Maximum cosine distance threshold (default: 0.5)
+ * @returns Search results ordered by relevance
+ */
+export declare const semanticSearch: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, maxDistance?: number) => Promise<SemanticSearchResult[]>;
+/**
+ * Semantic search with flattened results (graph expansion placeholder)
+ *
+ * For full graph traversal, use the execute_vector_cypher tool directly
+ *
+ * @param executeQuery - Execute Cypher queries
+ * @param query - Search query text
+ * @param k - Number of semantic matches (default: 5)
+ * @param _hops - Unused, kept for API compatibility
+ */
+export declare const semanticSearchWithContext: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, _hops?: number) => Promise<any[]>;

package/dist/core/embeddings/embedding-pipeline.js ADDED Viewed

@@ -0,0 +1,402 @@
+// code-mapper/src/core/embeddings/embedding-pipeline.ts
+/**
+ * @file embedding-pipeline.ts
+ * @description Orchestrates the background embedding process:
+ * 1) Query embeddable nodes from LadybugDB
+ * 2) Generate text representations
+ * 3) Batch embed using transformers.js
+ * 4) Store embeddings in LadybugDB
+ * 5) Create vector index for semantic search
+ */
+import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady } from './embedder.js';
+import { generateEmbeddingText } from './text-generator.js';
+import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, } from './types.js';
+const isDev = process.env.NODE_ENV === 'development';
+/** Query all embeddable nodes from LadybugDB (File has different schema than code elements) */
+const queryEmbeddableNodes = async (executeQuery) => {
+    const allNodes = [];
+    for (const label of EMBEDDABLE_LABELS) {
+        try {
+            // All embeddable labels are code elements with startLine/endLine
+            const query = `
+        MATCH (n:${label})
+        RETURN n.id AS id, n.name AS name, '${label}' AS label,
+               n.filePath AS filePath, n.content AS content,
+               n.startLine AS startLine, n.endLine AS endLine
+      `;
+            const rows = await executeQuery(query);
+            for (const row of rows) {
+                allNodes.push({
+                    id: row.id ?? row[0],
+                    name: row.name ?? row[1],
+                    label: row.label ?? row[2],
+                    filePath: row.filePath ?? row[3],
+                    content: row.content ?? row[4] ?? '',
+                    startLine: row.startLine ?? row[5],
+                    endLine: row.endLine ?? row[6],
+                });
+            }
+        }
+        catch (error) {
+            // Table might not exist or be empty — continue
+            if (isDev) {
+                console.warn(`Query for ${label} nodes failed:`, error);
+            }
+        }
+    }
+    return allNodes;
+};
+/**
+ * Batch INSERT embeddings into the CodeEmbedding table
+ *
+ * Separate lightweight table avoids copy-on-write overhead from
+ * UPDATEing nodes with large content fields
+ */
+const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
+    // INSERT into separate embedding table — avoids large-row COW overhead
+    const cypher = `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`;
+    const paramsList = updates.map(u => ({ nodeId: u.id, embedding: u.embedding }));
+    await executeWithReusedStatement(cypher, paramsList);
+};
+/** Create the HNSW vector index on the CodeEmbedding table */
+let vectorExtensionLoaded = false;
+const createVectorIndex = async (executeQuery) => {
+    // LadybugDB v0.15+ requires explicit VECTOR extension load (once per session)
+    if (!vectorExtensionLoaded) {
+        try {
+            await executeQuery('INSTALL VECTOR');
+            await executeQuery('LOAD EXTENSION VECTOR');
+            vectorExtensionLoaded = true;
+        }
+        catch {
+            // Extension may already be loaded — index creation will fail clearly if not
+            vectorExtensionLoaded = true;
+        }
+    }
+    const cypher = `
+    CALL CREATE_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', 'embedding', metric := 'cosine')
+  `;
+    try {
+        await executeQuery(cypher);
+    }
+    catch (error) {
+        // Index might already exist
+        if (isDev) {
+            console.warn('Vector index creation warning:', error);
+        }
+    }
+};
+/**
+ * Run the full embedding pipeline (load model, embed nodes, create index)
+ * @param executeQuery - Execute Cypher queries against LadybugDB
+ * @param executeWithReusedStatement - Execute with reused prepared statement
+ * @param onProgress - Progress callback
+ * @param config - Configuration override
+ * @param skipNodeIds - Node IDs that already have embeddings (incremental mode)
+ */
+export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, skipNodeIds) => {
+    const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
+    try {
+        // Phase 1: Load model
+        onProgress({
+            phase: 'loading-model',
+            percent: 0,
+            modelDownloadPercent: 0,
+        });
+        await initEmbedder((modelProgress) => {
+            const downloadPercent = modelProgress.progress ?? 0;
+            onProgress({
+                phase: 'loading-model',
+                percent: Math.round(downloadPercent * 0.2),
+                modelDownloadPercent: downloadPercent,
+            });
+        }, finalConfig);
+        onProgress({
+            phase: 'loading-model',
+            percent: 20,
+            modelDownloadPercent: 100,
+        });
+        if (isDev) {
+            console.log('🔍 Querying embeddable nodes...');
+        }
+        // Phase 2: Query nodes
+        let nodes = await queryEmbeddableNodes(executeQuery);
+        // Incremental mode: skip already-embedded nodes
+        if (skipNodeIds && skipNodeIds.size > 0) {
+            const beforeCount = nodes.length;
+            nodes = nodes.filter(n => !skipNodeIds.has(n.id));
+            if (isDev) {
+                console.log(`📦 Incremental embeddings: ${beforeCount} total, ${skipNodeIds.size} cached, ${nodes.length} to embed`);
+            }
+        }
+        const totalNodes = nodes.length;
+        // Enrich nodes with graph context (callers, callees, module) for better embeddings
+        // This adds relationship context so "import resolution pipeline" matches processImports
+        // because its caller "runPipelineFromRepo" contains "pipeline"
+        const graphContext = new Map();
+        if (totalNodes > 0) {
+            try {
+                const nodeIds = nodes.map(n => `'${String(n.id).replace(/'/g, "''")}'`).join(', ');
+                // Batch fetch callers
+                const callerRows = await executeQuery(`
+          MATCH (caller)-[r:CodeRelation {type: 'CALLS'}]->(n) WHERE n.id IN [${nodeIds}] AND r.confidence >= 0.7
+          RETURN n.id AS nid, caller.name AS name LIMIT ${totalNodes * 3}
+        `);
+                const callerMap = new Map();
+                for (const r of callerRows) {
+                    const nid = String(r.nid ?? r[0]);
+                    if (!callerMap.has(nid))
+                        callerMap.set(nid, []);
+                    callerMap.get(nid).push(String(r.name ?? r[1]));
+                }
+                // Batch fetch callees
+                const calleeRows = await executeQuery(`
+          MATCH (n)-[r:CodeRelation {type: 'CALLS'}]->(callee) WHERE n.id IN [${nodeIds}] AND r.confidence >= 0.7
+          RETURN n.id AS nid, callee.name AS name LIMIT ${totalNodes * 3}
+        `);
+                const calleeMap = new Map();
+                for (const r of calleeRows) {
+                    const nid = String(r.nid ?? r[0]);
+                    if (!calleeMap.has(nid))
+                        calleeMap.set(nid, []);
+                    calleeMap.get(nid).push(String(r.name ?? r[1]));
+                }
+                // Batch fetch module
+                const moduleRows = await executeQuery(`
+          MATCH (n)-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community) WHERE n.id IN [${nodeIds}]
+          RETURN n.id AS nid, c.heuristicLabel AS module LIMIT ${totalNodes}
+        `);
+                const moduleMap = new Map();
+                for (const r of moduleRows) {
+                    moduleMap.set(String(r.nid ?? r[0]), String(r.module ?? r[1] ?? ''));
+                }
+                // Assemble
+                for (const node of nodes) {
+                    graphContext.set(node.id, {
+                        callers: (callerMap.get(node.id) || []).slice(0, 3),
+                        callees: (calleeMap.get(node.id) || []).slice(0, 3),
+                        module: moduleMap.get(node.id) || '',
+                    });
+                }
+            }
+            catch { } // Non-fatal — embeddings work without graph context
+        }
+        if (isDev) {
+            console.log(`📊 Found ${totalNodes} embeddable nodes (${graphContext.size} with graph context)`);
+        }
+        if (totalNodes === 0) {
+            onProgress({
+                phase: 'ready',
+                percent: 100,
+                nodesProcessed: 0,
+                totalNodes: 0,
+            });
+            return;
+        }
+        // Phase 3: Batch embed
+        const batchSize = finalConfig.batchSize;
+        const totalBatches = Math.ceil(totalNodes / batchSize);
+        let processedNodes = 0;
+        onProgress({
+            phase: 'embedding',
+            percent: 20,
+            nodesProcessed: 0,
+            totalNodes,
+        });
+        // Generate ALL text representations with graph context enrichment
+        const allTexts = nodes.map(node => {
+            const ctx = graphContext.get(node.id);
+            let text = generateEmbeddingText(node, finalConfig);
+            if (ctx) {
+                const parts = [];
+                if (ctx.module)
+                    parts.push(`Module: ${ctx.module}`);
+                if (ctx.callers.length > 0)
+                    parts.push(`Called by: ${ctx.callers.join(', ')}`);
+                if (ctx.callees.length > 0)
+                    parts.push(`Calls: ${ctx.callees.join(', ')}`);
+                if (parts.length > 0) {
+                    const lines = text.split('\n');
+                    const insertIdx = lines.findIndex(l => l === '') || 2;
+                    lines.splice(insertIdx, 0, ...parts);
+                    text = lines.join('\n');
+                }
+            }
+            return text;
+        });
+        // Send ALL texts to the MLX embedder in one call — it does length-tiered
+        // batching internally for optimal Metal GPU utilization
+        const allEmbeddings = await embedBatch(allTexts);
+        onProgress({
+            phase: 'embedding',
+            percent: 85,
+            nodesProcessed: totalNodes,
+            totalNodes,
+        });
+        // Insert all embeddings into LadybugDB in batches
+        const DB_BATCH = 200;
+        for (let i = 0; i < nodes.length; i += DB_BATCH) {
+            const batchNodes = nodes.slice(i, i + DB_BATCH);
+            const batchEmbeddings = allEmbeddings.slice(i, i + DB_BATCH);
+            const updates = batchNodes.map((node, j) => ({
+                id: node.id,
+                embedding: embeddingToArray(batchEmbeddings[j]),
+            }));
+            await batchInsertEmbeddings(executeWithReusedStatement, updates);
+            processedNodes = Math.min(i + DB_BATCH, nodes.length);
+            onProgress({
+                phase: 'embedding',
+                percent: Math.round(85 + ((processedNodes / totalNodes) * 5)),
+                nodesProcessed: processedNodes,
+                totalNodes,
+            });
+        }
+        // Phase 4: Create HNSW vector index
+        onProgress({
+            phase: 'indexing',
+            percent: 90,
+            nodesProcessed: totalNodes,
+            totalNodes,
+        });
+        if (isDev) {
+            console.log('📇 Creating vector index...');
+        }
+        await createVectorIndex(executeQuery);
+        // Done
+        onProgress({
+            phase: 'ready',
+            percent: 100,
+            nodesProcessed: totalNodes,
+            totalNodes,
+        });
+        if (isDev) {
+            console.log('✅ Embedding pipeline complete!');
+        }
+    }
+    catch (error) {
+        const errorMessage = error instanceof Error ? error.message : 'Unknown error';
+        if (isDev) {
+            console.error('❌ Embedding pipeline error:', error);
+        }
+        onProgress({
+            phase: 'error',
+            percent: 0,
+            error: errorMessage,
+        });
+        throw error;
+    }
+};
+/**
+ * Perform semantic search via the CodeEmbedding vector index
+ * @param executeQuery - Execute Cypher queries
+ * @param query - Search query text
+ * @param k - Number of results (default: 10)
+ * @param maxDistance - Maximum cosine distance threshold (default: 0.5)
+ * @returns Search results ordered by relevance
+ */
+export const semanticSearch = async (executeQuery, query, k = 10, maxDistance = 0.5) => {
+    if (!isEmbedderReady()) {
+        throw new Error('Embedding model not initialized. Run embedding pipeline first.');
+    }
+    // Embed query text
+    const queryEmbedding = await embedText(query);
+    const queryVec = embeddingToArray(queryEmbedding);
+    const queryVecStr = `[${queryVec.join(',')}]`;
+    // Query vector index for nearest neighbors
+    const vectorQuery = `
+    CALL QUERY_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx',
+      CAST(${queryVecStr} AS FLOAT[${DEFAULT_EMBEDDING_CONFIG.dimensions}]), ${k})
+    YIELD node AS emb, distance
+    WITH emb, distance
+    WHERE distance < ${maxDistance}
+    RETURN emb.nodeId AS nodeId, distance
+    ORDER BY distance
+  `;
+    const embResults = await executeQuery(vectorQuery);
+    if (embResults.length === 0) {
+        return [];
+    }
+    // Group by label for batched metadata queries
+    const byLabel = new Map();
+    for (const embRow of embResults) {
+        const nodeId = embRow.nodeId ?? embRow[0];
+        const distance = embRow.distance ?? embRow[1];
+        const labelEndIdx = nodeId.indexOf(':');
+        const label = labelEndIdx > 0 ? nodeId.substring(0, labelEndIdx) : 'Unknown';
+        if (!byLabel.has(label))
+            byLabel.set(label, []);
+        byLabel.get(label).push({ nodeId, distance });
+    }
+    // Batch-fetch node metadata per label
+    const results = [];
+    for (const [label, items] of byLabel) {
+        const idList = items.map(i => `'${i.nodeId.replace(/'/g, "''")}'`).join(', ');
+        try {
+            let nodeQuery;
+            if (label === 'File') {
+                nodeQuery = `
+          MATCH (n:File) WHERE n.id IN [${idList}]
+          RETURN n.id AS id, n.name AS name, n.filePath AS filePath
+        `;
+            }
+            else {
+                nodeQuery = `
+          MATCH (n:${label}) WHERE n.id IN [${idList}]
+          RETURN n.id AS id, n.name AS name, n.filePath AS filePath,
+                 n.startLine AS startLine, n.endLine AS endLine
+        `;
+            }
+            const nodeRows = await executeQuery(nodeQuery);
+            const rowMap = new Map();
+            for (const row of nodeRows) {
+                const id = row.id ?? row[0];
+                rowMap.set(id, row);
+            }
+            for (const item of items) {
+                const nodeRow = rowMap.get(item.nodeId);
+                if (nodeRow) {
+                    results.push({
+                        nodeId: item.nodeId,
+                        name: nodeRow.name ?? nodeRow[1] ?? '',
+                        label,
+                        filePath: nodeRow.filePath ?? nodeRow[2] ?? '',
+                        distance: item.distance,
+                        startLine: label !== 'File' ? (nodeRow.startLine ?? nodeRow[3]) : undefined,
+                        endLine: label !== 'File' ? (nodeRow.endLine ?? nodeRow[4]) : undefined,
+                    });
+                }
+            }
+        }
+        catch {
+            // Table might not exist — skip
+        }
+    }
+    // Re-sort by distance (batch queries may have mixed order)
+    results.sort((a, b) => a.distance - b.distance);
+    return results;
+};
+/**
+ * Semantic search with flattened results (graph expansion placeholder)
+ *
+ * For full graph traversal, use the execute_vector_cypher tool directly
+ *
+ * @param executeQuery - Execute Cypher queries
+ * @param query - Search query text
+ * @param k - Number of semantic matches (default: 5)
+ * @param _hops - Unused, kept for API compatibility
+ */
+export const semanticSearchWithContext = async (executeQuery, query, k = 5, _hops = 1) => {
+    // Return semantic results directly — use execute_vector_cypher for graph traversal
+    const results = await semanticSearch(executeQuery, query, k, 0.5);
+    return results.map(r => ({
+        matchId: r.nodeId,
+        matchName: r.name,
+        matchLabel: r.label,
+        matchPath: r.filePath,
+        distance: r.distance,
+        connectedId: null,
+        connectedName: null,
+        connectedLabel: null,
+        relationType: null,
+    }));
+};

package/dist/core/embeddings/index.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+/** @file index.ts @description Barrel re-exports for the embedding pipeline system */
+export * from './types.js';
+export * from './embedder.js';
+export * from './text-generator.js';
+export * from './embedding-pipeline.js';

package/dist/core/embeddings/index.js ADDED Viewed

@@ -0,0 +1,6 @@
+// code-mapper/src/core/embeddings/index.ts
+/** @file index.ts @description Barrel re-exports for the embedding pipeline system */
+export * from './types.js';
+export * from './embedder.js';
+export * from './text-generator.js';
+export * from './embedding-pipeline.js';

package/dist/core/embeddings/text-generator.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+/**
+ * @file text-generator.ts
+ * @description Pure functions to generate embedding text from code nodes,
+ * combining node metadata with code snippets for semantic matching
+ */
+import type { EmbeddableNode, EmbeddingConfig } from './types.js';
+/**
+ * Generate embedding text for any embeddable node (dispatches by label)
+ * @param node - The node to generate text for
+ * @param config - Optional configuration for max snippet length
+ * @returns Text suitable for embedding
+ */
+export declare const generateEmbeddingText: (node: EmbeddableNode, config?: Partial<EmbeddingConfig>) => string;
+/**
+ * Generate embedding texts for a batch of nodes
+ * @param nodes - Nodes to generate text for
+ * @param config - Optional configuration
+ * @returns Texts in the same order as input nodes
+ */
+export declare const generateBatchEmbeddingTexts: (nodes: EmbeddableNode[], config?: Partial<EmbeddingConfig>) => string[];