npm - @zuvia-software-solutions/code-mapper - Versions diffs - 1.4.0 → 2.0.1 - Mend

@zuvia-software-solutions/code-mapper 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

package/dist/cli/ai-context.js +1 -1
package/dist/cli/analyze.d.ts +1 -0
package/dist/cli/analyze.js +73 -82
package/dist/cli/augment.js +0 -2
package/dist/cli/eval-server.d.ts +2 -2
package/dist/cli/eval-server.js +6 -6
package/dist/cli/index.js +6 -10
package/dist/cli/mcp.d.ts +1 -3
package/dist/cli/mcp.js +3 -3
package/dist/cli/refresh.d.ts +2 -2
package/dist/cli/refresh.js +24 -29
package/dist/cli/status.js +4 -13
package/dist/cli/tool.d.ts +5 -4
package/dist/cli/tool.js +8 -10
package/dist/config/ignore-service.js +14 -34
package/dist/core/augmentation/engine.js +53 -83
package/dist/core/db/adapter.d.ts +99 -0
package/dist/core/db/adapter.js +402 -0
package/dist/core/db/graph-loader.d.ts +27 -0
package/dist/core/db/graph-loader.js +148 -0
package/dist/core/db/queries.d.ts +160 -0
package/dist/core/db/queries.js +441 -0
package/dist/core/db/schema.d.ts +108 -0
package/dist/core/db/schema.js +136 -0
package/dist/core/embeddings/embedder.d.ts +21 -12
package/dist/core/embeddings/embedder.js +104 -50
package/dist/core/embeddings/embedding-pipeline.d.ts +48 -22
package/dist/core/embeddings/embedding-pipeline.js +220 -262
package/dist/core/embeddings/text-generator.js +4 -19
package/dist/core/embeddings/types.d.ts +1 -1
package/dist/core/graph/graph.d.ts +1 -1
package/dist/core/graph/graph.js +1 -0
package/dist/core/graph/types.d.ts +11 -9
package/dist/core/graph/types.js +4 -1
package/dist/core/incremental/refresh.d.ts +46 -0
package/dist/core/incremental/refresh.js +503 -0
package/dist/core/incremental/types.d.ts +2 -1
package/dist/core/incremental/types.js +42 -44
package/dist/core/ingestion/ast-cache.js +1 -0
package/dist/core/ingestion/call-processor.d.ts +15 -3
package/dist/core/ingestion/call-processor.js +448 -60
package/dist/core/ingestion/cluster-enricher.d.ts +1 -1
package/dist/core/ingestion/cluster-enricher.js +2 -0
package/dist/core/ingestion/community-processor.d.ts +1 -1
package/dist/core/ingestion/community-processor.js +8 -3
package/dist/core/ingestion/export-detection.d.ts +1 -1
package/dist/core/ingestion/export-detection.js +1 -1
package/dist/core/ingestion/filesystem-walker.js +1 -1
package/dist/core/ingestion/heritage-processor.d.ts +2 -2
package/dist/core/ingestion/heritage-processor.js +22 -11
package/dist/core/ingestion/import-processor.d.ts +2 -2
package/dist/core/ingestion/import-processor.js +24 -9
package/dist/core/ingestion/language-config.js +7 -4
package/dist/core/ingestion/mro-processor.d.ts +1 -1
package/dist/core/ingestion/mro-processor.js +23 -11
package/dist/core/ingestion/named-binding-extraction.js +5 -5
package/dist/core/ingestion/parsing-processor.d.ts +4 -4
package/dist/core/ingestion/parsing-processor.js +26 -18
package/dist/core/ingestion/pipeline.d.ts +4 -2
package/dist/core/ingestion/pipeline.js +50 -20
package/dist/core/ingestion/process-processor.d.ts +2 -2
package/dist/core/ingestion/process-processor.js +28 -14
package/dist/core/ingestion/resolution-context.d.ts +1 -1
package/dist/core/ingestion/resolution-context.js +14 -4
package/dist/core/ingestion/resolvers/csharp.js +4 -3
package/dist/core/ingestion/resolvers/go.js +3 -1
package/dist/core/ingestion/resolvers/jvm.js +13 -4
package/dist/core/ingestion/resolvers/standard.js +2 -2
package/dist/core/ingestion/resolvers/utils.js +6 -2
package/dist/core/ingestion/route-stitcher.d.ts +15 -0
package/dist/core/ingestion/route-stitcher.js +92 -0
package/dist/core/ingestion/structure-processor.d.ts +1 -1
package/dist/core/ingestion/structure-processor.js +3 -2
package/dist/core/ingestion/symbol-table.d.ts +2 -0
package/dist/core/ingestion/symbol-table.js +5 -1
package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
package/dist/core/ingestion/tree-sitter-queries.js +177 -0
package/dist/core/ingestion/type-env.js +20 -0
package/dist/core/ingestion/type-extractors/csharp.js +4 -3
package/dist/core/ingestion/type-extractors/go.js +23 -12
package/dist/core/ingestion/type-extractors/php.js +18 -10
package/dist/core/ingestion/type-extractors/ruby.js +15 -3
package/dist/core/ingestion/type-extractors/rust.js +3 -2
package/dist/core/ingestion/type-extractors/shared.js +3 -2
package/dist/core/ingestion/type-extractors/typescript.js +11 -5
package/dist/core/ingestion/utils.d.ts +27 -4
package/dist/core/ingestion/utils.js +145 -100
package/dist/core/ingestion/workers/parse-worker.d.ts +1 -0
package/dist/core/ingestion/workers/parse-worker.js +97 -29
package/dist/core/ingestion/workers/worker-pool.js +3 -0
package/dist/core/search/bm25-index.d.ts +15 -8
package/dist/core/search/bm25-index.js +48 -98
package/dist/core/search/hybrid-search.d.ts +9 -3
package/dist/core/search/hybrid-search.js +30 -25
package/dist/core/search/reranker.js +9 -7
package/dist/core/search/types.d.ts +0 -4
package/dist/core/semantic/tsgo-service.d.ts +7 -1
package/dist/core/semantic/tsgo-service.js +165 -66
package/dist/lib/tsgo-test.d.ts +2 -0
package/dist/lib/tsgo-test.js +6 -0
package/dist/lib/type-utils.d.ts +25 -0
package/dist/lib/type-utils.js +22 -0
package/dist/lib/utils.d.ts +3 -2
package/dist/lib/utils.js +3 -2
package/dist/mcp/compatible-stdio-transport.js +1 -1
package/dist/mcp/local/local-backend.d.ts +29 -56
package/dist/mcp/local/local-backend.js +808 -1118
package/dist/mcp/resources.js +35 -25
package/dist/mcp/server.d.ts +1 -1
package/dist/mcp/server.js +5 -5
package/dist/mcp/tools.js +24 -25
package/dist/storage/repo-manager.d.ts +2 -12
package/dist/storage/repo-manager.js +1 -47
package/dist/types/pipeline.d.ts +8 -5
package/dist/types/pipeline.js +5 -0
package/package.json +18 -11
package/dist/cli/serve.d.ts +0 -5
package/dist/cli/serve.js +0 -8
package/dist/core/incremental/child-process.d.ts +0 -8
package/dist/core/incremental/child-process.js +0 -649
package/dist/core/incremental/refresh-coordinator.d.ts +0 -32
package/dist/core/incremental/refresh-coordinator.js +0 -147
package/dist/core/lbug/csv-generator.d.ts +0 -28
package/dist/core/lbug/csv-generator.js +0 -355
package/dist/core/lbug/lbug-adapter.d.ts +0 -96
package/dist/core/lbug/lbug-adapter.js +0 -753
package/dist/core/lbug/schema.d.ts +0 -46
package/dist/core/lbug/schema.js +0 -402
package/dist/mcp/core/embedder.d.ts +0 -24
package/dist/mcp/core/embedder.js +0 -168
package/dist/mcp/core/lbug-adapter.d.ts +0 -29
package/dist/mcp/core/lbug-adapter.js +0 -330
package/dist/server/api.d.ts +0 -5
package/dist/server/api.js +0 -340
package/dist/server/mcp-http.d.ts +0 -7
package/dist/server/mcp-http.js +0 -95
package/models/mlx-embedder.py +0 -185

package/dist/core/db/schema.d.ts ADDED Viewed

@@ -0,0 +1,108 @@
+/**
+ * @file Single source of truth for the code knowledge graph schema.
+ *
+ * ALL types in the system are derived from these const declarations.
+ * The compiler enforces exhaustiveness — adding a new node label or edge
+ * type requires updating every switch/map that handles them.
+ */
+import { type Brand } from '../../lib/type-utils.js';
+export { assertNever } from '../../lib/type-utils.js';
+export declare const NODE_LABELS: readonly ["File", "Folder", "Function", "Class", "Interface", "Method", "CodeElement", "Community", "Process", "Struct", "Enum", "Macro", "Typedef", "Union", "Namespace", "Trait", "Impl", "TypeAlias", "Const", "Static", "Property", "Record", "Delegate", "Annotation", "Constructor", "Template", "Module"];
+/** Union of all valid node labels — derived from the const tuple */
+export type NodeLabel = typeof NODE_LABELS[number];
+/** Compile-time check: ensure a value is a valid NodeLabel */
+export declare function assertNodeLabel(value: string): asserts value is NodeLabel;
+export declare const EDGE_TYPES: readonly ["CONTAINS", "DEFINES", "IMPORTS", "CALLS", "EXTENDS", "IMPLEMENTS", "HAS_METHOD", "OVERRIDES", "MEMBER_OF", "STEP_IN_PROCESS", "DEPENDS_ON", "PROVIDES"];
+/** Union of all valid edge types — derived from the const tuple */
+export type EdgeType = typeof EDGE_TYPES[number];
+/** Compile-time check: ensure a value is a valid EdgeType */
+export declare function assertEdgeType(value: string): asserts value is EdgeType;
+/** A node ID (format: "Label:filePath:name") */
+export type NodeId = Brand<string, 'NodeId'>;
+/** An edge ID (format: "sourceId_type_targetId") */
+export type EdgeId = Brand<string, 'EdgeId'>;
+/** Construct a NodeId (runtime validation + compile-time branding) */
+export declare function toNodeId(raw: string): NodeId;
+/** Construct an EdgeId */
+export declare function toEdgeId(raw: string): EdgeId;
+/** A node row as stored in the `nodes` table */
+export interface NodeRow {
+    readonly id: NodeId;
+    readonly label: NodeLabel;
+    readonly name: string;
+    readonly filePath: string;
+    readonly startLine: number | null;
+    readonly endLine: number | null;
+    readonly isExported: number | null;
+    readonly content: string;
+    readonly description: string;
+    readonly heuristicLabel: string | null;
+    readonly cohesion: number | null;
+    readonly symbolCount: number | null;
+    readonly keywords: string | null;
+    readonly enrichedBy: 'heuristic' | 'llm' | null;
+    readonly processType: 'intra_community' | 'cross_community' | null;
+    readonly stepCount: number | null;
+    readonly communities: string | null;
+    readonly entryPointId: string | null;
+    readonly terminalId: string | null;
+    readonly parameterCount: number | null;
+    readonly returnType: string | null;
+    readonly nameExpanded: string;
+}
+/** An edge row as stored in the `edges` table */
+export interface EdgeRow {
+    readonly id: EdgeId;
+    readonly sourceId: NodeId;
+    readonly targetId: NodeId;
+    readonly type: EdgeType;
+    readonly confidence: number;
+    readonly reason: string;
+    readonly step: number;
+    readonly callLine: number | null;
+}
+/** An embedding row as stored in the `embeddings` table */
+export interface EmbeddingRow {
+    readonly nodeId: NodeId;
+    readonly embedding: Buffer;
+    readonly textHash: string | null;
+}
+/** Fields required to insert a node */
+export interface NodeInsert {
+    readonly id: NodeId;
+    readonly label: NodeLabel;
+    readonly name?: string;
+    readonly filePath?: string;
+    readonly startLine?: number | null;
+    readonly endLine?: number | null;
+    readonly isExported?: number | null;
+    readonly content?: string;
+    readonly description?: string;
+    readonly heuristicLabel?: string | null;
+    readonly cohesion?: number | null;
+    readonly symbolCount?: number | null;
+    readonly keywords?: string | null;
+    readonly enrichedBy?: 'heuristic' | 'llm' | null;
+    readonly processType?: 'intra_community' | 'cross_community' | null;
+    readonly stepCount?: number | null;
+    readonly communities?: string | null;
+    readonly entryPointId?: string | null;
+    readonly terminalId?: string | null;
+    readonly parameterCount?: number | null;
+    readonly returnType?: string | null;
+    readonly nameExpanded?: string;
+}
+/** Fields required to insert an edge */
+export interface EdgeInsert {
+    readonly id: EdgeId;
+    readonly sourceId: NodeId;
+    readonly targetId: NodeId;
+    readonly type: EdgeType;
+    readonly confidence?: number;
+    readonly reason?: string;
+    readonly step?: number;
+    readonly callLine?: number | null;
+}
+/** Legacy edge table name constant (kept for compatibility) */
+export declare const REL_TABLE_NAME = "CodeRelation";
+export declare const SCHEMA_SQL = "\n-- Nodes: unified table for all code elements\nCREATE TABLE IF NOT EXISTS nodes (\n  id TEXT PRIMARY KEY,\n  label TEXT NOT NULL,\n  name TEXT NOT NULL DEFAULT '',\n  filePath TEXT NOT NULL DEFAULT '',\n  startLine INTEGER,\n  endLine INTEGER,\n  isExported INTEGER,\n  content TEXT NOT NULL DEFAULT '',\n  description TEXT NOT NULL DEFAULT '',\n  heuristicLabel TEXT,\n  cohesion REAL,\n  symbolCount INTEGER,\n  keywords TEXT,\n  enrichedBy TEXT,\n  processType TEXT,\n  stepCount INTEGER,\n  communities TEXT,\n  entryPointId TEXT,\n  terminalId TEXT,\n  parameterCount INTEGER,\n  returnType TEXT,\n  nameExpanded TEXT DEFAULT ''\n);\n\nCREATE INDEX IF NOT EXISTS idx_nodes_label ON nodes(label);\nCREATE INDEX IF NOT EXISTS idx_nodes_name ON nodes(name);\nCREATE INDEX IF NOT EXISTS idx_nodes_filePath ON nodes(filePath);\nCREATE INDEX IF NOT EXISTS idx_nodes_label_name ON nodes(label, name);\nCREATE INDEX IF NOT EXISTS idx_nodes_filePath_lines ON nodes(filePath, startLine, endLine);\n\n-- Edges: single table for all relationships\nCREATE TABLE IF NOT EXISTS edges (\n  id TEXT PRIMARY KEY,\n  sourceId TEXT NOT NULL,\n  targetId TEXT NOT NULL,\n  type TEXT NOT NULL,\n  confidence REAL NOT NULL DEFAULT 1.0,\n  reason TEXT NOT NULL DEFAULT '',\n  step INTEGER NOT NULL DEFAULT 0,\n  callLine INTEGER\n);\n\nCREATE INDEX IF NOT EXISTS idx_edges_sourceId ON edges(sourceId);\nCREATE INDEX IF NOT EXISTS idx_edges_targetId ON edges(targetId);\nCREATE INDEX IF NOT EXISTS idx_edges_type ON edges(type);\nCREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(sourceId, type);\nCREATE INDEX IF NOT EXISTS idx_edges_target_type ON edges(targetId, type);\n\n-- Embeddings: vector storage\nCREATE TABLE IF NOT EXISTS embeddings (\n  nodeId TEXT PRIMARY KEY,\n  embedding BLOB NOT NULL,\n  textHash TEXT\n);\n\n-- FTS5 virtual table (auto-updated via triggers)\nCREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5(\n  name,\n  nameExpanded,\n  filePath,\n  content,\n  content='nodes',\n  content_rowid='rowid'\n);\n\nCREATE TRIGGER IF NOT EXISTS nodes_fts_ai AFTER INSERT ON nodes BEGIN\n  INSERT INTO nodes_fts(rowid, name, nameExpanded, filePath, content) VALUES (new.rowid, new.name, new.nameExpanded, new.filePath, new.content);\nEND;\nCREATE TRIGGER IF NOT EXISTS nodes_fts_ad AFTER DELETE ON nodes BEGIN\n  INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, filePath, content) VALUES ('delete', old.rowid, old.name, old.nameExpanded, old.filePath, old.content);\nEND;\nCREATE TRIGGER IF NOT EXISTS nodes_fts_au AFTER UPDATE ON nodes BEGIN\n  INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, filePath, content) VALUES ('delete', old.rowid, old.name, old.nameExpanded, old.filePath, old.content);\n  INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, filePath, content) VALUES (new.rowid, new.name, new.nameExpanded, new.filePath, new.content);\nEND;\n";

package/dist/core/db/schema.js ADDED Viewed

@@ -0,0 +1,136 @@
+// code-mapper/src/core/db/schema.ts
+/**
+ * @file Single source of truth for the code knowledge graph schema.
+ *
+ * ALL types in the system are derived from these const declarations.
+ * The compiler enforces exhaustiveness — adding a new node label or edge
+ * type requires updating every switch/map that handles them.
+ */
+import {} from '../../lib/type-utils.js';
+export { assertNever } from '../../lib/type-utils.js';
+// ---------------------------------------------------------------------------
+// Node labels — const tuple is the single source of truth
+// ---------------------------------------------------------------------------
+export const NODE_LABELS = [
+    'File', 'Folder', 'Function', 'Class', 'Interface', 'Method', 'CodeElement',
+    'Community', 'Process',
+    'Struct', 'Enum', 'Macro', 'Typedef', 'Union', 'Namespace', 'Trait', 'Impl',
+    'TypeAlias', 'Const', 'Static', 'Property', 'Record', 'Delegate', 'Annotation',
+    'Constructor', 'Template', 'Module',
+];
+/** Compile-time check: ensure a value is a valid NodeLabel */
+export function assertNodeLabel(value) {
+    if (!NODE_LABELS.includes(value)) {
+        throw new TypeError(`Invalid node label: ${value}`);
+    }
+}
+// ---------------------------------------------------------------------------
+// Edge types — const tuple is the single source of truth
+// ---------------------------------------------------------------------------
+export const EDGE_TYPES = [
+    'CONTAINS', 'DEFINES', 'IMPORTS', 'CALLS', 'EXTENDS', 'IMPLEMENTS',
+    'HAS_METHOD', 'OVERRIDES', 'MEMBER_OF', 'STEP_IN_PROCESS',
+    'DEPENDS_ON', 'PROVIDES',
+];
+/** Compile-time check: ensure a value is a valid EdgeType */
+export function assertEdgeType(value) {
+    if (!EDGE_TYPES.includes(value)) {
+        throw new TypeError(`Invalid edge type: ${value}`);
+    }
+}
+/** Construct a NodeId (runtime validation + compile-time branding) */
+export function toNodeId(raw) {
+    if (!raw)
+        throw new TypeError('NodeId cannot be empty');
+    return raw;
+}
+/** Construct an EdgeId */
+export function toEdgeId(raw) {
+    if (!raw)
+        throw new TypeError('EdgeId cannot be empty');
+    return raw;
+}
+/** Legacy edge table name constant (kept for compatibility) */
+export const REL_TABLE_NAME = 'CodeRelation';
+// ---------------------------------------------------------------------------
+// SQL schema — the DDL statements that create the SQLite tables
+// ---------------------------------------------------------------------------
+export const SCHEMA_SQL = `
+-- Nodes: unified table for all code elements
+CREATE TABLE IF NOT EXISTS nodes (
+  id TEXT PRIMARY KEY,
+  label TEXT NOT NULL,
+  name TEXT NOT NULL DEFAULT '',
+  filePath TEXT NOT NULL DEFAULT '',
+  startLine INTEGER,
+  endLine INTEGER,
+  isExported INTEGER,
+  content TEXT NOT NULL DEFAULT '',
+  description TEXT NOT NULL DEFAULT '',
+  heuristicLabel TEXT,
+  cohesion REAL,
+  symbolCount INTEGER,
+  keywords TEXT,
+  enrichedBy TEXT,
+  processType TEXT,
+  stepCount INTEGER,
+  communities TEXT,
+  entryPointId TEXT,
+  terminalId TEXT,
+  parameterCount INTEGER,
+  returnType TEXT,
+  nameExpanded TEXT DEFAULT ''
+);
+CREATE INDEX IF NOT EXISTS idx_nodes_label ON nodes(label);
+CREATE INDEX IF NOT EXISTS idx_nodes_name ON nodes(name);
+CREATE INDEX IF NOT EXISTS idx_nodes_filePath ON nodes(filePath);
+CREATE INDEX IF NOT EXISTS idx_nodes_label_name ON nodes(label, name);
+CREATE INDEX IF NOT EXISTS idx_nodes_filePath_lines ON nodes(filePath, startLine, endLine);
+-- Edges: single table for all relationships
+CREATE TABLE IF NOT EXISTS edges (
+  id TEXT PRIMARY KEY,
+  sourceId TEXT NOT NULL,
+  targetId TEXT NOT NULL,
+  type TEXT NOT NULL,
+  confidence REAL NOT NULL DEFAULT 1.0,
+  reason TEXT NOT NULL DEFAULT '',
+  step INTEGER NOT NULL DEFAULT 0,
+  callLine INTEGER
+);
+CREATE INDEX IF NOT EXISTS idx_edges_sourceId ON edges(sourceId);
+CREATE INDEX IF NOT EXISTS idx_edges_targetId ON edges(targetId);
+CREATE INDEX IF NOT EXISTS idx_edges_type ON edges(type);
+CREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(sourceId, type);
+CREATE INDEX IF NOT EXISTS idx_edges_target_type ON edges(targetId, type);
+-- Embeddings: vector storage
+CREATE TABLE IF NOT EXISTS embeddings (
+  nodeId TEXT PRIMARY KEY,
+  embedding BLOB NOT NULL,
+  textHash TEXT
+);
+-- FTS5 virtual table (auto-updated via triggers)
+CREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5(
+  name,
+  nameExpanded,
+  filePath,
+  content,
+  content='nodes',
+  content_rowid='rowid'
+);
+CREATE TRIGGER IF NOT EXISTS nodes_fts_ai AFTER INSERT ON nodes BEGIN
+  INSERT INTO nodes_fts(rowid, name, nameExpanded, filePath, content) VALUES (new.rowid, new.name, new.nameExpanded, new.filePath, new.content);
+END;
+CREATE TRIGGER IF NOT EXISTS nodes_fts_ad AFTER DELETE ON nodes BEGIN
+  INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, filePath, content) VALUES ('delete', old.rowid, old.name, old.nameExpanded, old.filePath, old.content);
+END;
+CREATE TRIGGER IF NOT EXISTS nodes_fts_au AFTER UPDATE ON nodes BEGIN
+  INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, filePath, content) VALUES ('delete', old.rowid, old.name, old.nameExpanded, old.filePath, old.content);
+  INSERT INTO nodes_fts(nodes_fts, rowid, name, nameExpanded, filePath, content) VALUES (new.rowid, new.name, new.nameExpanded, new.filePath, new.content);
+END;
+`;

package/dist/core/embeddings/embedder.d.ts CHANGED Viewed

@@ -2,23 +2,24 @@
  * @file embedder.ts
  * @description MLX-accelerated code embedder via Python subprocess
  *
- * Replaces the previous ONNX/transformers.js embedder with Jina Code 1.5B
- * running on Apple Silicon Metal via MLX. Fail-fast — no fallback.
+ * Spawns a persistent Python process running Jina Code 1.5B on Apple Silicon
+ * Metal via MLX. Communicates via newline-delimited JSON over stdio.
  *
- * Model: jinaai/jina-code-embeddings-1.5b-mlx (1.54B params, 1536 dims, 32K context)
- * Matryoshka truncation to 256 dims for optimal speed/quality tradeoff
+ * Architecture: request queue with sequential processing. Each sendAndReceive()
+ * waits for its specific response — no global resolver that can be stolen by
+ * out-of-order messages.
+ *
+ * Model: jinaai/jina-code-embeddings-1.5b-mlx (1.54B params, 256-dim Matryoshka)
  */
 import { type EmbeddingConfig, type ModelProgress } from './types.js';
-/** Progress callback for model loading */
 export type ModelProgressCallback = (progress: ModelProgress) => void;
-/** Get the current inference device */
 export declare const getCurrentDevice: () => string | null;
+export declare const isEmbedderReady: () => boolean;
+export declare const getEmbeddingDims: () => number;
 /**
- * Initialize the MLX embedder (spawns Python subprocess, loads model)
+ * Initialize the MLX embedder (spawns Python subprocess, waits for model load)
  */
-export declare const initEmbedder: (_onProgress?: ModelProgressCallback, _config?: Partial<EmbeddingConfig>) => Promise<any>;
-/** Check if the embedder is initialized and ready */
-export declare const isEmbedderReady: () => boolean;
+export declare const initEmbedder: (_onProgress?: ModelProgressCallback, _config?: Partial<EmbeddingConfig>) => Promise<void>;
 /** Get the embedder instance — not applicable for MLX, returns null */
 export declare const getEmbedder: () => any;
 /**
@@ -26,10 +27,18 @@ export declare const getEmbedder: () => any;
  */
 export declare const embedText: (text: string) => Promise<Float32Array>;
 /**
- * Embed multiple texts in a single batch
+ * Embed multiple texts in batches.
+ *
+ * Sends chunks of 100 texts to Python — keeps JSON responses manageable
+ * over stdio while letting Python's internal length-tiered batching
+ * optimize GPU utilization within each chunk.
  */
 export declare const embedBatch: (texts: string[]) => Promise<Float32Array[]>;
-/** Convert Float32Array to number[] for LadybugDB storage */
+/**
+ * Embed a query text for semantic search (cached, uses "query" prompt type)
+ */
+export declare const embedQuery: (query: string) => Promise<number[]>;
+/** Convert Float32Array to number[] for database storage */
 export declare const embeddingToArray: (embedding: Float32Array) => number[];
 /** Dispose the embedder subprocess */
 export declare const disposeEmbedder: () => Promise<void>;

package/dist/core/embeddings/embedder.js CHANGED Viewed

@@ -3,43 +3,42 @@
  * @file embedder.ts
  * @description MLX-accelerated code embedder via Python subprocess
  *
- * Replaces the previous ONNX/transformers.js embedder with Jina Code 1.5B
- * running on Apple Silicon Metal via MLX. Fail-fast — no fallback.
+ * Spawns a persistent Python process running Jina Code 1.5B on Apple Silicon
+ * Metal via MLX. Communicates via newline-delimited JSON over stdio.
  *
- * Model: jinaai/jina-code-embeddings-1.5b-mlx (1.54B params, 1536 dims, 32K context)
- * Matryoshka truncation to 256 dims for optimal speed/quality tradeoff
+ * Architecture: request queue with sequential processing. Each sendAndReceive()
+ * waits for its specific response — no global resolver that can be stolen by
+ * out-of-order messages.
+ *
+ * Model: jinaai/jina-code-embeddings-1.5b-mlx (1.54B params, 256-dim Matryoshka)
  */
-import { spawn, execFileSync } from 'child_process';
+import { spawn } from 'child_process';
 import path from 'path';
 import { fileURLToPath } from 'url';
+import { queryEmbeddingCache } from '../search/query-cache.js';
 import { DEFAULT_EMBEDDING_CONFIG } from './types.js';
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
-// Path to MLX embedder script (relative to compiled dist/)
 const MLX_SCRIPT = path.resolve(__dirname, '..', '..', '..', 'models', 'mlx-embedder.py');
-// Singleton subprocess
+// ---------------------------------------------------------------------------
+// Singleton state
+// ---------------------------------------------------------------------------
 let mlxProcess = null;
-let pendingResolve = null;
-let pendingReject = null;
-let lineBuffer = '';
 let ready = false;
-/** Get the current inference device */
+let lineBuffer = '';
+/** Queued requests waiting for responses — FIFO order matches Python's processing */
+const responseQueue = [];
+/** Promise that resolves when the process is ready (model loaded) */
+let readyPromise = null;
+let readyResolve = null;
 export const getCurrentDevice = () => ready ? 'mlx-metal' : null;
+export const isEmbedderReady = () => ready;
+export const getEmbeddingDims = () => DEFAULT_EMBEDDING_CONFIG.dimensions;
 function ensureProcess() {
     if (mlxProcess && !mlxProcess.killed)
         return mlxProcess;
-    // Check prerequisites
-    try {
-        execFileSync('python3', ['-c', 'import mlx; import tokenizers'], {
-            timeout: 5000,
-            stdio: ['pipe', 'pipe', 'pipe'],
-        });
-    }
-    catch {
-        throw new Error('MLX embedder requires Python 3 + MLX on Apple Silicon.\n' +
-            'Install: pip3 install mlx tokenizers huggingface_hub\n' +
-            'The embedding model will download automatically on first use (~3GB).');
-    }
+    // Create ready promise before spawning so we don't miss the message
+    readyPromise = new Promise(resolve => { readyResolve = resolve; });
     mlxProcess = spawn('python3', [MLX_SCRIPT], {
         stdio: ['pipe', 'pipe', 'pipe'],
         env: { ...process.env, TOKENIZERS_PARALLELISM: 'false' },
@@ -54,24 +53,29 @@ function ensureProcess() {
                 continue;
             try {
                 const msg = JSON.parse(line);
+                // Startup ready message — NOT a response to any request
                 if (msg.status === 'ready' && !ready) {
                     ready = true;
-                    console.error(`Code Mapper: MLX embedder ready (${msg.device}, loaded in ${msg.load_ms}ms)`);
+                    console.error(`Code Mapper: MLX embedder ready (${msg.device ?? 'unknown'}, loaded in ${msg.load_ms ?? '?'}ms)`);
+                    readyResolve?.();
+                    readyResolve = null;
+                    continue; // Don't dispatch to response queue
                 }
-                if (pendingResolve) {
-                    const resolve = pendingResolve;
-                    pendingResolve = null;
-                    pendingReject = null;
-                    resolve(msg);
+                // Response to a queued request — dispatch FIFO
+                const pending = responseQueue.shift();
+                if (pending) {
+                    pending.resolve(msg);
+                }
+                else {
+                    console.error(`Code Mapper: MLX embedder unexpected message (no pending request): ${line.slice(0, 100)}`);
                 }
             }
             catch {
-                // Non-JSON output — ignore
+                // Non-JSON output — ignore (Python progress bars, etc.)
             }
         }
     });
     mlxProcess.stderr.on('data', (chunk) => {
-        // Forward stderr for debugging
         const msg = chunk.toString().trim();
         if (msg)
             console.error(`[mlx-embedder] ${msg}`);
@@ -79,49 +83,60 @@ function ensureProcess() {
     mlxProcess.on('exit', (code) => {
         ready = false;
         mlxProcess = null;
-        if (pendingReject) {
-            const reject = pendingReject;
-            pendingResolve = null;
-            pendingReject = null;
-            reject(new Error(`MLX embedder exited with code ${code}`));
+        // Reject all pending requests
+        const err = new Error(`MLX embedder exited with code ${code}`);
+        for (const pending of responseQueue) {
+            pending.reject(err);
         }
+        responseQueue.length = 0;
+        // Also resolve readyPromise so init doesn't hang
+        readyResolve?.();
+        readyResolve = null;
     });
     return mlxProcess;
 }
+/**
+ * Send a request and wait for its response.
+ *
+ * Requests are queued FIFO — Python processes them in order and sends
+ * responses in the same order. Each caller gets exactly its own response.
+ */
 function sendAndReceive(request) {
     return new Promise((resolve, reject) => {
         const proc = ensureProcess();
-        pendingResolve = resolve;
-        pendingReject = reject;
+        responseQueue.push({ resolve, reject });
         proc.stdin.write(JSON.stringify(request) + '\n');
     });
 }
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
 /**
- * Initialize the MLX embedder (spawns Python subprocess, loads model)
+ * Initialize the MLX embedder (spawns Python subprocess, waits for model load)
  */
 export const initEmbedder = async (_onProgress, _config = {}) => {
     if (ready)
         return;
     ensureProcess();
-    // Wait for the "ready" message from the Python process
-    const msg = await sendAndReceive({ cmd: 'ping' });
-    if (msg.error) {
-        throw new Error(`MLX embedder failed: ${msg.error}`);
+    // Wait for the automatic "ready" message from Python (model loaded)
+    // No ping needed — Python sends ready on its own after loading the model
+    await readyPromise;
+    if (!ready) {
+        throw new Error('MLX embedder failed to start — process exited before ready');
     }
-    return msg;
 };
-/** Check if the embedder is initialized and ready */
-export const isEmbedderReady = () => ready;
 /** Get the embedder instance — not applicable for MLX, returns null */
 export const getEmbedder = () => {
     if (!ready)
         throw new Error('MLX embedder not initialized. Call initEmbedder() first.');
-    return null; // No JS-side instance — inference happens in Python
+    return null;
 };
 /**
  * Embed a single text string
  */
 export const embedText = async (text) => {
+    if (!ready)
+        await initEmbedder();
     const result = await sendAndReceive({
         texts: [text],
         task: 'nl2code',
@@ -133,11 +148,21 @@ export const embedText = async (text) => {
     return new Float32Array(result.embeddings[0]);
 };
 /**
- * Embed multiple texts in a single batch
+ * Embed multiple texts in batches.
+ *
+ * Sends chunks of 100 texts to Python — keeps JSON responses manageable
+ * over stdio while letting Python's internal length-tiered batching
+ * optimize GPU utilization within each chunk.
  */
 export const embedBatch = async (texts) => {
     if (texts.length === 0)
         return [];
+    if (!ready)
+        await initEmbedder();
+    // Send all texts to Python in one call — Python does optimal length-tiered
+    // batching internally for Metal GPU. No need to double-batch at the Node level.
+    console.error(`Code Mapper: embedBatch sending ${texts.length} texts to MLX...`);
+    const t0 = Date.now();
     const result = await sendAndReceive({
         texts,
         task: 'nl2code',
@@ -146,9 +171,35 @@ export const embedBatch = async (texts) => {
     });
     if (result.error)
         throw new Error(`Batch embedding failed: ${result.error}`);
+    if (!result.embeddings || !Array.isArray(result.embeddings)) {
+        throw new Error(`Batch embedding returned invalid response: ${JSON.stringify(result).slice(0, 200)}`);
+    }
+    const elapsed = Date.now() - t0;
+    console.error(`Code Mapper: embedBatch complete — ${result.embeddings.length} embeddings in ${elapsed}ms (${result.ms ?? '?'}ms inference)`);
     return result.embeddings.map((e) => new Float32Array(e));
 };
-/** Convert Float32Array to number[] for LadybugDB storage */
+/**
+ * Embed a query text for semantic search (cached, uses "query" prompt type)
+ */
+export const embedQuery = async (query) => {
+    const cached = queryEmbeddingCache.get(query);
+    if (cached)
+        return cached;
+    if (!ready)
+        await initEmbedder();
+    const result = await sendAndReceive({
+        texts: [query],
+        task: 'nl2code',
+        type: 'query',
+        dims: DEFAULT_EMBEDDING_CONFIG.dimensions,
+    });
+    if (result.error)
+        throw new Error(`Query embedding failed: ${result.error}`);
+    const embedding = result.embeddings[0];
+    queryEmbeddingCache.set(query, embedding);
+    return embedding;
+};
+/** Convert Float32Array to number[] for database storage */
 export const embeddingToArray = (embedding) => {
     return Array.from(embedding);
 };
@@ -157,7 +208,6 @@ export const disposeEmbedder = async () => {
     if (mlxProcess && !mlxProcess.killed) {
         try {
             mlxProcess.stdin.write(JSON.stringify({ cmd: 'quit' }) + '\n');
-            // Give it a moment to exit gracefully
             await new Promise(resolve => setTimeout(resolve, 500));
         }
         catch { }
@@ -168,4 +218,8 @@ export const disposeEmbedder = async () => {
         mlxProcess = null;
     }
     ready = false;
+    readyPromise = null;
+    readyResolve = null;
+    responseQueue.length = 0;
+    queryEmbeddingCache.clear();
 };

package/dist/core/embeddings/embedding-pipeline.d.ts CHANGED Viewed

@@ -1,41 +1,67 @@
 /**
  * @file embedding-pipeline.ts
  * @description Orchestrates the background embedding process:
- * 1) Query embeddable nodes from LadybugDB
+ * 1) Query embeddable nodes from SQLite
  * 2) Generate text representations
  * 3) Batch embed using transformers.js
- * 4) Store embeddings in LadybugDB
- * 5) Create vector index for semantic search
+ * 4) Store embeddings in SQLite
+ * 5) Vector search via brute-force cosine similarity in adapter.ts
  */
 import { type EmbeddingProgress, type EmbeddingConfig, type SemanticSearchResult } from './types.js';
+import type Database from 'better-sqlite3';
 /** Progress callback type */
 export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
+/** Graph context for a node: callers, callees, and community module */
+export interface GraphContext {
+    callers: string[];
+    callees: string[];
+    module: string;
+}
 /**
- * Run the full embedding pipeline (load model, embed nodes, create index)
- * @param executeQuery - Execute Cypher queries against LadybugDB
- * @param executeWithReusedStatement - Execute with reused prepared statement
+ * Fetch graph context (callers, callees, community module) for a set of nodes.
+ *
+ * This enrichment adds relationship context so that embedding text like
+ * "import resolution pipeline" matches `processImports` because its caller
+ * "runPipelineFromRepo" contains "pipeline".
+ *
+ * Reusable by both the full analyze pipeline and incremental refresh.
+ *
+ * @param db - Open SQLite database instance
+ * @param nodes - Nodes to fetch context for (must have `id` field)
+ * @returns Map from node ID to graph context
+ */
+export declare function fetchGraphContext(db: Database.Database, nodes: ReadonlyArray<{
+    id: string;
+}>): Map<string, GraphContext>;
+/**
+ * Enrich embedding text with graph context (callers, callees, module).
+ *
+ * Inserts context lines (Module, Called by, Calls) after the header
+ * section of the generated text, before the code snippet.
+ *
+ * @param text - Base embedding text from generateEmbeddingText
+ * @param ctx - Graph context for this node
+ * @returns Enriched text
+ */
+export declare function enrichTextWithGraphContext(text: string, ctx: GraphContext): string;
+/**
+ * Run the full embedding pipeline (load model, embed nodes, store in SQLite)
+ * @param db - Open SQLite database instance
  * @param onProgress - Progress callback
  * @param config - Configuration override
  * @param skipNodeIds - Node IDs that already have embeddings (incremental mode)
  */
-export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>) => Promise<void>;
+export declare function runEmbeddingPipeline(db: Database.Database, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>): Promise<void>;
 /**
- * Perform semantic search via the CodeEmbedding vector index
- * @param executeQuery - Execute Cypher queries
- * @param query - Search query text
- * @param k - Number of results (default: 10)
- * @param maxDistance - Maximum cosine distance threshold (default: 0.5)
- * @returns Search results ordered by relevance
- */
-export declare const semanticSearch: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, maxDistance?: number) => Promise<SemanticSearchResult[]>;
-/**
- * Semantic search with flattened results (graph expansion placeholder)
+ * Semantic vector search against a SQLite database.
  *
- * For full graph traversal, use the execute_vector_cypher tool directly
+ * Uses brute-force cosine similarity via adapter.searchVector, then
+ * enriches results with node metadata. This mirrors the pattern in
+ * local-backend.ts but as a standalone function for hybrid search.
  *
- * @param executeQuery - Execute Cypher queries
+ * @param db - Open SQLite database instance
  * @param query - Search query text
- * @param k - Number of semantic matches (default: 5)
- * @param _hops - Unused, kept for API compatibility
+ * @param k - Number of results (default: 10)
+ * @param maxDistance - Maximum cosine distance threshold (default: from types.ts)
  */
-export declare const semanticSearchWithContext: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, _hops?: number) => Promise<any[]>;
+export declare function semanticSearchSqlite(db: Database.Database, query: string, k?: number): Promise<SemanticSearchResult[]>;