npm - @zuvia-software-solutions/code-mapper - Versions diffs - 2.2.2 → 2.2.3 - Mend

@zuvia-software-solutions/code-mapper 2.2.2 → 2.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/models/jina-v5-small-mlx/config.json +19 -0
package/models/jina-v5-small-mlx/model.py +260 -0
package/models/mlx-embedder.py +483 -0
package/package.json +5 -2

package/models/jina-v5-small-mlx/config.json ADDED Viewed

@@ -0,0 +1,19 @@
+{
+  "model_type": "qwen3",
+  "hidden_size": 1024,
+  "num_hidden_layers": 28,
+  "intermediate_size": 3072,
+  "num_attention_heads": 16,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "vocab_size": 151936,
+  "max_position_embeddings": 32768,
+  "rope_theta": 3500000,
+  "rope_parameters": {
+    "rope_theta": 3500000,
+    "rope_type": "default"
+  },
+  "head_dim": 128,
+  "tie_word_embeddings": true,
+  "rope_scaling": null
+}

package/models/jina-v5-small-mlx/model.py ADDED Viewed

@@ -0,0 +1,260 @@
+"""
+Jina Embeddings v5 Text Small - MLX Implementation
+Pure MLX port of jina-embeddings-v5-text-small (Qwen3-0.6B backbone).
+Zero dependency on PyTorch or transformers.
+Features:
+- Causal attention (decoder architecture)
+- QKNorm (q_norm/k_norm per head)
+- Last-token pooling
+- L2 normalization
+- Matryoshka embedding dimensions: [32, 64, 128, 256, 512, 768, 1024]
+- Max sequence length: 32768 tokens
+- Embedding dimension: 1024
+Architecture:
+- RoPE (rope_theta from config)
+- SwiGLU MLP
+- RMSNorm
+- QKNorm (RMSNorm on Q/K per head)
+- No attention bias
+"""
+from dataclasses import dataclass
+from typing import Any, Dict, Optional, Union
+import mlx.core as mx
+import mlx.nn as nn
+@dataclass
+class ModelArgs:
+    model_type: str
+    hidden_size: int
+    num_hidden_layers: int
+    intermediate_size: int
+    num_attention_heads: int
+    rms_norm_eps: float
+    vocab_size: int
+    num_key_value_heads: int
+    max_position_embeddings: int
+    head_dim: int
+    tie_word_embeddings: bool
+    rope_parameters: Optional[Dict[str, Union[float, str]]] = None
+    rope_theta: Optional[float] = None
+    rope_scaling: Optional[Dict[str, Union[float, str]]] = None
+class Attention(nn.Module):
+    def __init__(self, args: ModelArgs):
+        super().__init__()
+        dim = args.hidden_size
+        self.n_heads = n_heads = args.num_attention_heads
+        self.n_kv_heads = n_kv_heads = args.num_key_value_heads
+        head_dim = args.head_dim
+        self.scale = head_dim**-0.5
+        self.head_dim = head_dim
+        self.q_proj = nn.Linear(dim, n_heads * head_dim, bias=False)
+        self.k_proj = nn.Linear(dim, n_kv_heads * head_dim, bias=False)
+        self.v_proj = nn.Linear(dim, n_kv_heads * head_dim, bias=False)
+        self.o_proj = nn.Linear(n_heads * head_dim, dim, bias=False)
+        # Qwen3 has QKNorm
+        self.q_norm = nn.RMSNorm(head_dim, eps=args.rms_norm_eps)
+        self.k_norm = nn.RMSNorm(head_dim, eps=args.rms_norm_eps)
+        # Resolve rope_theta from config
+        if args.rope_parameters and 'rope_theta' in args.rope_parameters:
+            rope_theta = float(args.rope_parameters['rope_theta'])
+        elif args.rope_theta:
+            rope_theta = float(args.rope_theta)
+        else:
+            rope_theta = 10000.0
+        self.rope_theta = rope_theta
+    def __call__(
+        self,
+        x: mx.array,
+        mask: Optional[mx.array] = None,
+    ) -> mx.array:
+        B, L, D = x.shape
+        queries, keys, values = self.q_proj(x), self.k_proj(x), self.v_proj(x)
+        # Reshape and apply QKNorm
+        queries = self.q_norm(queries.reshape(B, L, self.n_heads, -1)).transpose(0, 2, 1, 3)
+        keys = self.k_norm(keys.reshape(B, L, self.n_kv_heads, -1)).transpose(0, 2, 1, 3)
+        values = values.reshape(B, L, self.n_kv_heads, -1).transpose(0, 2, 1, 3)
+        # RoPE via mx.fast
+        queries = mx.fast.rope(queries, self.head_dim, traditional=False, base=self.rope_theta, scale=1.0, offset=0)
+        keys = mx.fast.rope(keys, self.head_dim, traditional=False, base=self.rope_theta, scale=1.0, offset=0)
+        # Scaled dot-product attention (handles GQA natively)
+        output = mx.fast.scaled_dot_product_attention(
+            queries, keys, values,
+            mask=mask.astype(queries.dtype) if mask is not None else None,
+            scale=self.scale,
+        )
+        output = output.transpose(0, 2, 1, 3).reshape(B, L, -1)
+        return self.o_proj(output)
+class MLP(nn.Module):
+    def __init__(self, dim, hidden_dim):
+        super().__init__()
+        self.gate_proj = nn.Linear(dim, hidden_dim, bias=False)
+        self.down_proj = nn.Linear(hidden_dim, dim, bias=False)
+        self.up_proj = nn.Linear(dim, hidden_dim, bias=False)
+    def __call__(self, x) -> mx.array:
+        gate = nn.silu(self.gate_proj(x))
+        return self.down_proj(gate * self.up_proj(x))
+class TransformerBlock(nn.Module):
+    def __init__(self, args: ModelArgs):
+        super().__init__()
+        self.self_attn = Attention(args)
+        self.mlp = MLP(args.hidden_size, args.intermediate_size)
+        self.input_layernorm = nn.RMSNorm(args.hidden_size, eps=args.rms_norm_eps)
+        self.post_attention_layernorm = nn.RMSNorm(args.hidden_size, eps=args.rms_norm_eps)
+    def __call__(
+        self,
+        x: mx.array,
+        mask: Optional[mx.array] = None,
+    ) -> mx.array:
+        r = self.self_attn(self.input_layernorm(x), mask)
+        h = x + r
+        r = self.mlp(self.post_attention_layernorm(h))
+        out = h + r
+        return out
+class Qwen3Model(nn.Module):
+    def __init__(self, args: ModelArgs):
+        super().__init__()
+        self.embed_tokens = nn.Embedding(args.vocab_size, args.hidden_size)
+        self.layers = [TransformerBlock(args=args) for _ in range(args.num_hidden_layers)]
+        self.norm = nn.RMSNorm(args.hidden_size, eps=args.rms_norm_eps)
+    def __call__(self, inputs: mx.array, mask: Optional[mx.array] = None):
+        h = self.embed_tokens(inputs)
+        for layer in self.layers:
+            h = layer(h, mask)
+        return self.norm(h)
+class JinaEmbeddingModel(nn.Module):
+    """Jina v5-text-small embedding model with last-token pooling."""
+    def __init__(self, config: dict):
+        super().__init__()
+        args = ModelArgs(**config)
+        self.model = Qwen3Model(args)
+        self.config = config
+    def __call__(
+        self,
+        input_ids: mx.array,
+        attention_mask: Optional[mx.array] = None,
+    ):
+        batch_size, seq_len = input_ids.shape
+        # Causal mask (Qwen3 is a decoder model)
+        causal_mask = mx.tril(mx.ones((seq_len, seq_len)))
+        causal_mask = mx.where(causal_mask == 0, -1e4, 0.0)
+        causal_mask = causal_mask[None, None, :, :]
+        # Combine with padding mask
+        if attention_mask is not None:
+            padding_mask = mx.where(attention_mask == 0, -1e4, 0.0)
+            padding_mask = padding_mask[:, None, None, :]
+            mask = causal_mask + padding_mask
+        else:
+            mask = causal_mask
+        hidden_states = self.model(input_ids, mask)
+        # Last token pooling
+        if attention_mask is not None:
+            sequence_lengths = mx.sum(attention_mask, axis=1) - 1
+            batch_indices = mx.arange(hidden_states.shape[0])
+            embeddings = hidden_states[batch_indices, sequence_lengths]
+        else:
+            embeddings = hidden_states[:, -1, :]
+        # L2 normalization
+        norms = mx.linalg.norm(embeddings, axis=1, keepdims=True)
+        embeddings = embeddings / norms
+        return embeddings
+    def encode(
+        self,
+        texts: list[str],
+        tokenizer,
+        max_length: int = 8192,
+        truncate_dim: Optional[int] = None,
+        task_type: str = "retrieval.query",
+    ):
+        """
+        Encode texts to embeddings.
+        Args:
+            texts: List of input texts
+            tokenizer: Tokenizer instance (from tokenizers library)
+            max_length: Maximum sequence length
+            truncate_dim: Optional Matryoshka dimension [32, 64, 128, 256, 512, 768, 1024]
+            task_type: Task prefix ("retrieval.query", "retrieval.passage", etc.)
+        Returns:
+            Embeddings array [batch, dim]
+        """
+        prefix_map = {
+            "retrieval.query": "Query: ",
+            "retrieval.passage": "Document: ",
+            "classification": "Document: ",
+            "text-matching": "Document: ",
+            "clustering": "Document: ",
+        }
+        prefix = prefix_map.get(task_type, "")
+        if prefix:
+            texts = [prefix + text for text in texts]
+        encodings = tokenizer.encode_batch(texts)
+        max_len = min(max_length, max(len(enc.ids) for enc in encodings))
+        input_ids = []
+        attention_mask = []
+        for encoding in encodings:
+            ids = encoding.ids[:max_len]
+            mask = encoding.attention_mask[:max_len]
+            pad_len = max_len - len(ids)
+            if pad_len > 0:
+                ids = ids + [0] * pad_len
+                mask = mask + [0] * pad_len
+            input_ids.append(ids)
+            attention_mask.append(mask)
+        input_ids = mx.array(input_ids)
+        attention_mask = mx.array(attention_mask)
+        embeddings = self(input_ids, attention_mask)
+        if truncate_dim is not None:
+            embeddings = embeddings[:, :truncate_dim]
+            norms = mx.linalg.norm(embeddings, axis=1, keepdims=True)
+            embeddings = embeddings / norms
+        return embeddings

package/models/mlx-embedder.py ADDED Viewed

@@ -0,0 +1,483 @@
+#!/usr/bin/env python3
+"""
+MLX-accelerated code embedder for Apple Silicon.
+TWO MODES:
+1. Batch mode (main use): reads nodes directly from SQLite, embeds, writes back.
+   No IPC overhead — everything happens in one process.
+   Usage: python3 mlx-embedder.py batch <db_path> [--dims 256] [--max-tokens 2048]
+2. Interactive mode (for MCP query embedding): reads JSON from stdin.
+   Usage: python3 mlx-embedder.py [interactive]
+Model: Jina Embeddings v5 Text Small Retrieval (677M params, Qwen3-0.6B backbone)
+Optimized with int4 quantization (Linear) + int6 quantization (Embedding).
+"""
+import sys
+import os
+import json
+import time
+import struct
+import hashlib
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+import mlx.core as mx
+import mlx.nn as nn
+from tokenizers import Tokenizer
+MODEL_DIR = os.path.dirname(os.path.abspath(__file__)) + "/jina-v5-small-mlx"
+def ensure_model_downloaded():
+    """Download model weights from HuggingFace if not present."""
+    weights_path = os.path.join(MODEL_DIR, "model.safetensors")
+    if os.path.exists(weights_path):
+        return
+    print(json.dumps({"phase": "downloading", "message": "Downloading embedding model (~1.1GB, first time only)..."}), flush=True)
+    try:
+        from huggingface_hub import hf_hub_download
+        import shutil
+        repo = "jinaai/jina-embeddings-v5-text-small-retrieval-mlx"
+        for fname in ["model.safetensors", "tokenizer.json", "vocab.json", "merges.txt", "tokenizer_config.json"]:
+            dest = os.path.join(MODEL_DIR, fname)
+            if not os.path.exists(dest):
+                path = hf_hub_download(repo, fname)
+                shutil.copy(path, dest)
+        print(json.dumps({"phase": "downloaded", "message": "Model downloaded successfully"}), flush=True)
+    except ImportError:
+        raise RuntimeError(
+            "Model weights not found. Install huggingface_hub to auto-download:\n"
+            "  pip3 install huggingface_hub\n"
+            "Or manually download from: https://huggingface.co/jinaai/jina-embeddings-v5-text-small-retrieval-mlx"
+        )
+def load_model():
+    """Load model, quantize for speed. Auto-downloads weights on first use."""
+    ensure_model_downloaded()
+    sys.path.insert(0, MODEL_DIR)
+    from model import JinaEmbeddingModel
+    with open(os.path.join(MODEL_DIR, "config.json")) as f:
+        config = json.load(f)
+    model = JinaEmbeddingModel(config)
+    weights = mx.load(os.path.join(MODEL_DIR, "model.safetensors"))
+    model.load_weights(list(weights.items()))
+    nn.quantize(model.model, bits=4, group_size=64,
+                class_predicate=lambda _, m: isinstance(m, nn.Linear))
+    nn.quantize(model.model, bits=6, group_size=64,
+                class_predicate=lambda _, m: isinstance(m, nn.Embedding))
+    mx.eval(model.parameters())
+    tokenizer = Tokenizer.from_file(os.path.join(MODEL_DIR, "tokenizer.json"))
+    return model, tokenizer
+def get_batch_size_for_tokens(token_count):
+    """Optimal batch size based on actual token count."""
+    if token_count <= 64:   return 256
+    if token_count <= 128:  return 128
+    if token_count <= 256:  return 64
+    if token_count <= 512:  return 32
+    if token_count <= 1024: return 16
+    return 8
+def embed_tiered(model, tokenizer, texts, task_type="retrieval.passage", truncate_dim=256, max_tokens=2048):
+    """Embed texts with token-aware batching. Tokenizes first, batches by token count.
+    Returns embeddings in the ORIGINAL input order."""
+    if not texts:
+        return []
+    # Add task prefix
+    prefix_map = {"retrieval.query": "Query: ", "retrieval.passage": "Document: "}
+    prefix = prefix_map.get(task_type, "")
+    prefixed = [prefix + t for t in texts] if prefix else texts
+    # Tokenize everything in one call (fast — Rust HF tokenizer)
+    encodings = tokenizer.encode_batch(prefixed)
+    # Sort by token length for minimal padding
+    indexed = sorted(range(len(texts)), key=lambda i: len(encodings[i].ids))
+    all_embeddings = [None] * len(texts)
+    i = 0
+    while i < len(indexed):
+        peek_idx = indexed[min(i + 1, len(indexed) - 1)]
+        tok_count = min(len(encodings[peek_idx].ids), max_tokens)
+        batch_size = get_batch_size_for_tokens(tok_count)
+        batch_indices = []
+        batch_encs = []
+        while len(batch_encs) < batch_size and i < len(indexed):
+            orig_idx = indexed[i]
+            batch_indices.append(orig_idx)
+            batch_encs.append(encodings[orig_idx])
+            i += 1
+        max_len = min(max_tokens, max(len(e.ids) for e in batch_encs))
+        input_ids = []
+        attention_mask = []
+        for enc in batch_encs:
+            ids = enc.ids[:max_len]
+            mask = enc.attention_mask[:max_len]
+            pad = max_len - len(ids)
+            if pad > 0:
+                ids = ids + [0] * pad
+                mask = mask + [0] * pad
+            input_ids.append(ids)
+            attention_mask.append(mask)
+        embs = model(mx.array(input_ids), mx.array(attention_mask))
+        if truncate_dim and truncate_dim < embs.shape[1]:
+            embs = embs[:, :truncate_dim]
+        norms = mx.linalg.norm(embs, axis=1, keepdims=True)
+        embs = embs / norms
+        mx.eval(embs)
+        emb_list = embs.tolist()
+        for j, orig_idx in enumerate(batch_indices):
+            all_embeddings[orig_idx] = emb_list[j]
+    return all_embeddings
+def float_list_to_blob(floats):
+    """Convert list of floats to a binary blob (Float32Array compatible)."""
+    return struct.pack(f'{len(floats)}f', *floats)
+def md5(text):
+    return hashlib.md5(text.encode()).hexdigest()
+# =========================================================================
+# BATCH MODE — read from SQLite, embed, write back. Zero IPC.
+# =========================================================================
+def batch_mode(db_path, dims=256, max_tokens=2048):
+    import sqlite3
+    t0_total = time.time()
+    # Load model
+    print(json.dumps({"phase": "loading", "message": "Loading MLX model..."}), flush=True)
+    model, tokenizer = load_model()
+    load_ms = int((time.time() - t0_total) * 1000)
+    print(json.dumps({"phase": "loaded", "load_ms": load_ms, "device": str(mx.default_device())}), flush=True)
+    # Open database
+    db = sqlite3.connect(db_path)
+    db.execute("PRAGMA journal_mode=WAL")
+    db.execute("PRAGMA synchronous=NORMAL")
+    # Ensure textHash column exists (migration)
+    try:
+        db.execute("SELECT textHash FROM embeddings LIMIT 0")
+    except sqlite3.OperationalError:
+        db.execute("ALTER TABLE embeddings ADD COLUMN textHash TEXT")
+    # Query embeddable nodes — skip test/fixture files (BM25 covers them)
+    labels = ('Function', 'Class', 'Method', 'Interface')
+    placeholders = ','.join('?' * len(labels))
+    all_rows = db.execute(
+        f"SELECT id, name, label, filePath, content, startLine, endLine, nameExpanded FROM nodes WHERE label IN ({placeholders})",
+        labels
+    ).fetchall()
+    # Filter out test files — they're searchable via BM25 keyword matching
+    test_patterns = ('/test/', '/tests/', '/spec/', '/fixtures/', '/__tests__/', '/__mocks__/',
+                     '.test.', '.spec.', '_test.', '_spec.')
+    rows = [r for r in all_rows if not any(p in (r[3] or '') for p in test_patterns)]
+    skipped_tests = len(all_rows) - len(rows)
+    print(json.dumps({"phase": "queried", "nodes": len(rows), "skipped_tests": skipped_tests}), flush=True)
+    if not rows:
+        print(json.dumps({"phase": "done", "embedded": 0, "skipped": 0, "ms": 0}), flush=True)
+        db.close()
+        return
+    # Fetch graph context (callers, callees, module) for richer embedding text
+    node_ids = [r[0] for r in rows]
+    id_set = set(node_ids)
+    # Batch fetch callers
+    caller_map = {}
+    callee_map = {}
+    module_map = {}
+    # Chunk the IN clause to avoid SQLite variable limits
+    CHUNK = 500
+    for ci in range(0, len(node_ids), CHUNK):
+        chunk_ids = node_ids[ci:ci+CHUNK]
+        ph = ','.join('?' * len(chunk_ids))
+        for row in db.execute(f"SELECT e.targetId, n.name FROM edges e JOIN nodes n ON n.id = e.sourceId WHERE e.targetId IN ({ph}) AND e.type = 'CALLS' AND e.confidence >= 0.7 LIMIT {len(chunk_ids)*3}", chunk_ids):
+            caller_map.setdefault(row[0], []).append(row[1])
+        for row in db.execute(f"SELECT e.sourceId, n.name FROM edges e JOIN nodes n ON n.id = e.targetId WHERE e.sourceId IN ({ph}) AND e.type = 'CALLS' AND e.confidence >= 0.7 LIMIT {len(chunk_ids)*3}", chunk_ids):
+            callee_map.setdefault(row[0], []).append(row[1])
+        for row in db.execute(f"SELECT e.sourceId, c.heuristicLabel FROM edges e JOIN nodes c ON c.id = e.targetId WHERE e.sourceId IN ({ph}) AND e.type = 'MEMBER_OF' AND c.label = 'Community' LIMIT {len(chunk_ids)}", chunk_ids):
+            module_map[row[0]] = row[1]
+    print(json.dumps({"phase": "context", "with_callers": len(caller_map), "with_module": len(module_map)}), flush=True)
+    # Get existing text hashes for skip detection
+    existing_hashes = {}
+    for row in db.execute("SELECT nodeId, textHash FROM embeddings WHERE textHash IS NOT NULL"):
+        existing_hashes[row[0]] = row[1]
+    # Generate embedding texts + hashes
+    # Optimized: semantic summary (name + comment + signature + context)
+    # instead of raw code dump. 55% fewer tokens, equal search quality.
+    to_embed = []  # (node_id, text, hash)
+    skipped = 0
+    def extract_first_comment(content):
+        """Extract JSDoc/comment as natural language description (max 3 lines)."""
+        if not content:
+            return ""
+        lines = content.split("\n")
+        comment_lines = []
+        in_block = False
+        for l in lines:
+            t = l.strip()
+            if t.startswith("/**") or t.startswith("/*"):
+                in_block = True
+                inner = t.lstrip("/").lstrip("*").strip().rstrip("*/").strip()
+                if inner and not inner.startswith("@"):
+                    comment_lines.append(inner)
+                if "*/" in t:
+                    in_block = False
+                continue
+            if in_block:
+                if "*/" in t:
+                    in_block = False
+                    continue
+                inner = t.lstrip("*").strip()
+                if inner and not inner.startswith("@"):
+                    comment_lines.append(inner)
+                if len(comment_lines) >= 3:
+                    break
+                continue
+            if t.startswith("//"):
+                inner = t[2:].strip()
+                if inner:
+                    comment_lines.append(inner)
+                if len(comment_lines) >= 3:
+                    break
+                continue
+            if t.startswith("#") and not t.startswith("#!"):
+                inner = t[1:].strip()
+                if inner:
+                    comment_lines.append(inner)
+                if len(comment_lines) >= 3:
+                    break
+                continue
+            if comment_lines:
+                break
+        return " ".join(comment_lines)
+    def extract_signature(content, label):
+        """Extract code signature without full body."""
+        if not content:
+            return ""
+        lines = content.split("\n")
+        if label == "Interface":
+            return "\n".join(lines[:30]).strip() if len(lines) <= 30 else "\n".join(lines[:30]) + "\n  // ..."
+        if label == "Class":
+            sigs = []
+            for l in lines[:60]:
+                t = l.strip()
+                if not t or t.startswith("//") or t.startswith("*") or t.startswith("/*"):
+                    continue
+                if any(kw in t for kw in ("class ", "private ", "public ", "protected ", "readonly ", "static ", "abstract ")):
+                    sigs.append(t)
+                if len(sigs) >= 20:
+                    break
+            return "\n".join(sigs)
+        return "\n".join(lines[:min(8, len(lines))]).strip()
+    for row in rows:
+        nid, name, label, filePath, content, startLine, endLine, nameExpanded = row
+        content = content or ""
+        file_name = filePath.rsplit('/', 1)[-1] if filePath else ""
+        # Build semantic embedding text
+        parts = [f"{label}: {name}"]
+        # nameExpanded: natural language bridge (e.g. "checkStaleness" → "check staleness")
+        if nameExpanded and nameExpanded != name.lower():
+            parts.append(nameExpanded)
+        # First comment as natural language description
+        comment = extract_first_comment(content)
+        if comment:
+            parts.append(comment)
+        # File + module location
+        loc = f"File: {file_name}"
+        module = module_map.get(nid, "")
+        if module:
+            loc += f" | Module: {module}"
+        parts.append(loc)
+        # Graph context
+        callers = caller_map.get(nid, [])[:5]
+        callees = callee_map.get(nid, [])[:5]
+        if callers:
+            parts.append(f"Called by: {', '.join(callers)}")
+        if callees:
+            parts.append(f"Calls: {', '.join(callees)}")
+        # Code signature (not full body)
+        sig = extract_signature(content, label)
+        if sig:
+            parts.extend(["", sig])
+        text = '\n'.join(parts)
+        text_hash = md5(text)
+        # Skip if hash unchanged
+        if existing_hashes.get(nid) == text_hash:
+            skipped += 1
+            continue
+        to_embed.append((nid, text, text_hash))
+    print(json.dumps({"phase": "prepared", "to_embed": len(to_embed), "skipped": skipped}), flush=True)
+    if not to_embed:
+        print(json.dumps({"phase": "done", "embedded": 0, "skipped": skipped, "ms": int((time.time() - t0_total) * 1000)}), flush=True)
+        db.close()
+        return
+    # Deduplicate — embed unique texts only, copy vectors to duplicates.
+    # Identical embedding texts produce identical vectors; no quality loss.
+    unique_by_hash = {}  # text_hash -> { text, node_ids: [(nid, text_hash)] }
+    for nid, text, text_hash in to_embed:
+        if text_hash in unique_by_hash:
+            unique_by_hash[text_hash]["node_ids"].append((nid, text_hash))
+        else:
+            unique_by_hash[text_hash] = {"text": text, "node_ids": [(nid, text_hash)]}
+    unique_texts = [v["text"] for v in unique_by_hash.values()]
+    deduped = len(to_embed) - len(unique_texts)
+    # Embed only unique texts
+    t0_embed = time.time()
+    embeddings = embed_tiered(model, tokenizer, unique_texts, "retrieval.passage", dims, max_tokens)
+    embed_ms = int((time.time() - t0_embed) * 1000)
+    print(json.dumps({"phase": "embedded", "count": len(unique_texts), "deduped": deduped, "ms": embed_ms}), flush=True)
+    # Write to database — copy embedding to all nodes sharing the same hash
+    t0_write = time.time()
+    db.execute("BEGIN")
+    for i, (text_hash, entry) in enumerate(unique_by_hash.items()):
+        emb = embeddings[i]
+        if emb is None:
+            continue
+        blob = float_list_to_blob(emb)
+        for nid, th in entry["node_ids"]:
+            db.execute("INSERT OR REPLACE INTO embeddings (nodeId, embedding, textHash) VALUES (?, ?, ?)",
+                       (nid, blob, th))
+    db.execute("COMMIT")
+    write_ms = int((time.time() - t0_write) * 1000)
+    total_ms = int((time.time() - t0_total) * 1000)
+    print(json.dumps({
+        "phase": "done",
+        "embedded": len(to_embed),
+        "skipped": skipped,
+        "embed_ms": embed_ms,
+        "write_ms": write_ms,
+        "total_ms": total_ms,
+    }), flush=True)
+    db.close()
+# =========================================================================
+# INTERACTIVE MODE — stdin/stdout JSON for MCP query embedding
+# =========================================================================
+def interactive_mode():
+    t0 = time.time()
+    model, tokenizer = load_model()
+    load_ms = int((time.time() - t0) * 1000)
+    print(json.dumps({
+        "status": "ready",
+        "model": "jina-v5-text-small-retrieval",
+        "device": str(mx.default_device()),
+        "load_ms": load_ms,
+        "precision": "int4-g64",
+    }), flush=True)
+    for line in sys.stdin:
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            req = json.loads(line)
+        except json.JSONDecodeError:
+            print(json.dumps({"error": "Invalid JSON"}), flush=True)
+            continue
+        if "cmd" in req:
+            if req["cmd"] == "ping":
+                print(json.dumps({"status": "ready"}), flush=True)
+            elif req["cmd"] == "quit":
+                break
+            continue
+        texts = req.get("texts", [])
+        prompt_type = req.get("type", "passage")
+        dims = req.get("dims", 256)
+        task_type = "retrieval.query" if prompt_type == "query" else "retrieval.passage"
+        t0 = time.time()
+        try:
+            embeddings = embed_tiered(model, tokenizer, texts, task_type, dims)
+            elapsed_ms = int((time.time() - t0) * 1000)
+            print(json.dumps({
+                "embeddings": embeddings,
+                "count": len(embeddings),
+                "dims": dims,
+                "ms": elapsed_ms,
+            }), flush=True)
+        except Exception as e:
+            print(json.dumps({"error": str(e)}), flush=True)
+# =========================================================================
+# MAIN
+# =========================================================================
+if __name__ == "__main__":
+    if len(sys.argv) >= 3 and sys.argv[1] == "batch":
+        db_path = sys.argv[2]
+        dims = 256
+        max_tokens = 2048
+        for i, arg in enumerate(sys.argv[3:], 3):
+            if arg == "--dims" and i + 1 < len(sys.argv):
+                dims = int(sys.argv[i + 1])
+            if arg == "--max-tokens" and i + 1 < len(sys.argv):
+                max_tokens = int(sys.argv[i + 1])
+        batch_mode(db_path, dims, max_tokens)
+    else:
+        interactive_mode()

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zuvia-software-solutions/code-mapper",
-  "version": "2.2.2",
+  "version": "2.2.3",
   "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
   "author": "Abhigyan Patwari",
   "license": "PolyForm-Noncommercial-1.0.0",
@@ -34,7 +34,10 @@
     "hooks",
     "scripts",
     "skills",
-    "vendor"
+    "vendor",
+    "models/mlx-embedder.py",
+    "models/jina-v5-small-mlx/model.py",
+    "models/jina-v5-small-mlx/config.json"
   ],
   "scripts": {
     "build": "tsc",