@zuvia-software-solutions/code-mapper 2.2.2 → 2.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ {
2
+ "model_type": "qwen3",
3
+ "hidden_size": 1024,
4
+ "num_hidden_layers": 28,
5
+ "intermediate_size": 3072,
6
+ "num_attention_heads": 16,
7
+ "num_key_value_heads": 8,
8
+ "rms_norm_eps": 1e-06,
9
+ "vocab_size": 151936,
10
+ "max_position_embeddings": 32768,
11
+ "rope_theta": 3500000,
12
+ "rope_parameters": {
13
+ "rope_theta": 3500000,
14
+ "rope_type": "default"
15
+ },
16
+ "head_dim": 128,
17
+ "tie_word_embeddings": true,
18
+ "rope_scaling": null
19
+ }
@@ -0,0 +1,260 @@
1
+ """
2
+ Jina Embeddings v5 Text Small - MLX Implementation
3
+
4
+ Pure MLX port of jina-embeddings-v5-text-small (Qwen3-0.6B backbone).
5
+ Zero dependency on PyTorch or transformers.
6
+
7
+ Features:
8
+ - Causal attention (decoder architecture)
9
+ - QKNorm (q_norm/k_norm per head)
10
+ - Last-token pooling
11
+ - L2 normalization
12
+ - Matryoshka embedding dimensions: [32, 64, 128, 256, 512, 768, 1024]
13
+ - Max sequence length: 32768 tokens
14
+ - Embedding dimension: 1024
15
+
16
+ Architecture:
17
+ - RoPE (rope_theta from config)
18
+ - SwiGLU MLP
19
+ - RMSNorm
20
+ - QKNorm (RMSNorm on Q/K per head)
21
+ - No attention bias
22
+ """
23
+
24
+ from dataclasses import dataclass
25
+ from typing import Any, Dict, Optional, Union
26
+
27
+ import mlx.core as mx
28
+ import mlx.nn as nn
29
+
30
+
31
+ @dataclass
32
+ class ModelArgs:
33
+ model_type: str
34
+ hidden_size: int
35
+ num_hidden_layers: int
36
+ intermediate_size: int
37
+ num_attention_heads: int
38
+ rms_norm_eps: float
39
+ vocab_size: int
40
+ num_key_value_heads: int
41
+ max_position_embeddings: int
42
+ head_dim: int
43
+ tie_word_embeddings: bool
44
+ rope_parameters: Optional[Dict[str, Union[float, str]]] = None
45
+ rope_theta: Optional[float] = None
46
+ rope_scaling: Optional[Dict[str, Union[float, str]]] = None
47
+
48
+
49
+ class Attention(nn.Module):
50
+ def __init__(self, args: ModelArgs):
51
+ super().__init__()
52
+
53
+ dim = args.hidden_size
54
+ self.n_heads = n_heads = args.num_attention_heads
55
+ self.n_kv_heads = n_kv_heads = args.num_key_value_heads
56
+
57
+ head_dim = args.head_dim
58
+ self.scale = head_dim**-0.5
59
+ self.head_dim = head_dim
60
+
61
+ self.q_proj = nn.Linear(dim, n_heads * head_dim, bias=False)
62
+ self.k_proj = nn.Linear(dim, n_kv_heads * head_dim, bias=False)
63
+ self.v_proj = nn.Linear(dim, n_kv_heads * head_dim, bias=False)
64
+ self.o_proj = nn.Linear(n_heads * head_dim, dim, bias=False)
65
+
66
+ # Qwen3 has QKNorm
67
+ self.q_norm = nn.RMSNorm(head_dim, eps=args.rms_norm_eps)
68
+ self.k_norm = nn.RMSNorm(head_dim, eps=args.rms_norm_eps)
69
+
70
+ # Resolve rope_theta from config
71
+ if args.rope_parameters and 'rope_theta' in args.rope_parameters:
72
+ rope_theta = float(args.rope_parameters['rope_theta'])
73
+ elif args.rope_theta:
74
+ rope_theta = float(args.rope_theta)
75
+ else:
76
+ rope_theta = 10000.0
77
+ self.rope_theta = rope_theta
78
+
79
+ def __call__(
80
+ self,
81
+ x: mx.array,
82
+ mask: Optional[mx.array] = None,
83
+ ) -> mx.array:
84
+ B, L, D = x.shape
85
+
86
+ queries, keys, values = self.q_proj(x), self.k_proj(x), self.v_proj(x)
87
+
88
+ # Reshape and apply QKNorm
89
+ queries = self.q_norm(queries.reshape(B, L, self.n_heads, -1)).transpose(0, 2, 1, 3)
90
+ keys = self.k_norm(keys.reshape(B, L, self.n_kv_heads, -1)).transpose(0, 2, 1, 3)
91
+ values = values.reshape(B, L, self.n_kv_heads, -1).transpose(0, 2, 1, 3)
92
+
93
+ # RoPE via mx.fast
94
+ queries = mx.fast.rope(queries, self.head_dim, traditional=False, base=self.rope_theta, scale=1.0, offset=0)
95
+ keys = mx.fast.rope(keys, self.head_dim, traditional=False, base=self.rope_theta, scale=1.0, offset=0)
96
+
97
+ # Scaled dot-product attention (handles GQA natively)
98
+ output = mx.fast.scaled_dot_product_attention(
99
+ queries, keys, values,
100
+ mask=mask.astype(queries.dtype) if mask is not None else None,
101
+ scale=self.scale,
102
+ )
103
+
104
+ output = output.transpose(0, 2, 1, 3).reshape(B, L, -1)
105
+ return self.o_proj(output)
106
+
107
+
108
+ class MLP(nn.Module):
109
+ def __init__(self, dim, hidden_dim):
110
+ super().__init__()
111
+ self.gate_proj = nn.Linear(dim, hidden_dim, bias=False)
112
+ self.down_proj = nn.Linear(hidden_dim, dim, bias=False)
113
+ self.up_proj = nn.Linear(dim, hidden_dim, bias=False)
114
+
115
+ def __call__(self, x) -> mx.array:
116
+ gate = nn.silu(self.gate_proj(x))
117
+ return self.down_proj(gate * self.up_proj(x))
118
+
119
+
120
+ class TransformerBlock(nn.Module):
121
+ def __init__(self, args: ModelArgs):
122
+ super().__init__()
123
+ self.self_attn = Attention(args)
124
+ self.mlp = MLP(args.hidden_size, args.intermediate_size)
125
+ self.input_layernorm = nn.RMSNorm(args.hidden_size, eps=args.rms_norm_eps)
126
+ self.post_attention_layernorm = nn.RMSNorm(args.hidden_size, eps=args.rms_norm_eps)
127
+
128
+ def __call__(
129
+ self,
130
+ x: mx.array,
131
+ mask: Optional[mx.array] = None,
132
+ ) -> mx.array:
133
+ r = self.self_attn(self.input_layernorm(x), mask)
134
+ h = x + r
135
+ r = self.mlp(self.post_attention_layernorm(h))
136
+ out = h + r
137
+ return out
138
+
139
+
140
+ class Qwen3Model(nn.Module):
141
+ def __init__(self, args: ModelArgs):
142
+ super().__init__()
143
+ self.embed_tokens = nn.Embedding(args.vocab_size, args.hidden_size)
144
+ self.layers = [TransformerBlock(args=args) for _ in range(args.num_hidden_layers)]
145
+ self.norm = nn.RMSNorm(args.hidden_size, eps=args.rms_norm_eps)
146
+
147
+ def __call__(self, inputs: mx.array, mask: Optional[mx.array] = None):
148
+ h = self.embed_tokens(inputs)
149
+ for layer in self.layers:
150
+ h = layer(h, mask)
151
+ return self.norm(h)
152
+
153
+
154
+ class JinaEmbeddingModel(nn.Module):
155
+ """Jina v5-text-small embedding model with last-token pooling."""
156
+
157
+ def __init__(self, config: dict):
158
+ super().__init__()
159
+ args = ModelArgs(**config)
160
+ self.model = Qwen3Model(args)
161
+ self.config = config
162
+
163
+ def __call__(
164
+ self,
165
+ input_ids: mx.array,
166
+ attention_mask: Optional[mx.array] = None,
167
+ ):
168
+ batch_size, seq_len = input_ids.shape
169
+
170
+ # Causal mask (Qwen3 is a decoder model)
171
+ causal_mask = mx.tril(mx.ones((seq_len, seq_len)))
172
+ causal_mask = mx.where(causal_mask == 0, -1e4, 0.0)
173
+ causal_mask = causal_mask[None, None, :, :]
174
+
175
+ # Combine with padding mask
176
+ if attention_mask is not None:
177
+ padding_mask = mx.where(attention_mask == 0, -1e4, 0.0)
178
+ padding_mask = padding_mask[:, None, None, :]
179
+ mask = causal_mask + padding_mask
180
+ else:
181
+ mask = causal_mask
182
+
183
+ hidden_states = self.model(input_ids, mask)
184
+
185
+ # Last token pooling
186
+ if attention_mask is not None:
187
+ sequence_lengths = mx.sum(attention_mask, axis=1) - 1
188
+ batch_indices = mx.arange(hidden_states.shape[0])
189
+ embeddings = hidden_states[batch_indices, sequence_lengths]
190
+ else:
191
+ embeddings = hidden_states[:, -1, :]
192
+
193
+ # L2 normalization
194
+ norms = mx.linalg.norm(embeddings, axis=1, keepdims=True)
195
+ embeddings = embeddings / norms
196
+
197
+ return embeddings
198
+
199
+ def encode(
200
+ self,
201
+ texts: list[str],
202
+ tokenizer,
203
+ max_length: int = 8192,
204
+ truncate_dim: Optional[int] = None,
205
+ task_type: str = "retrieval.query",
206
+ ):
207
+ """
208
+ Encode texts to embeddings.
209
+
210
+ Args:
211
+ texts: List of input texts
212
+ tokenizer: Tokenizer instance (from tokenizers library)
213
+ max_length: Maximum sequence length
214
+ truncate_dim: Optional Matryoshka dimension [32, 64, 128, 256, 512, 768, 1024]
215
+ task_type: Task prefix ("retrieval.query", "retrieval.passage", etc.)
216
+
217
+ Returns:
218
+ Embeddings array [batch, dim]
219
+ """
220
+ prefix_map = {
221
+ "retrieval.query": "Query: ",
222
+ "retrieval.passage": "Document: ",
223
+ "classification": "Document: ",
224
+ "text-matching": "Document: ",
225
+ "clustering": "Document: ",
226
+ }
227
+ prefix = prefix_map.get(task_type, "")
228
+
229
+ if prefix:
230
+ texts = [prefix + text for text in texts]
231
+
232
+ encodings = tokenizer.encode_batch(texts)
233
+
234
+ max_len = min(max_length, max(len(enc.ids) for enc in encodings))
235
+ input_ids = []
236
+ attention_mask = []
237
+
238
+ for encoding in encodings:
239
+ ids = encoding.ids[:max_len]
240
+ mask = encoding.attention_mask[:max_len]
241
+
242
+ pad_len = max_len - len(ids)
243
+ if pad_len > 0:
244
+ ids = ids + [0] * pad_len
245
+ mask = mask + [0] * pad_len
246
+
247
+ input_ids.append(ids)
248
+ attention_mask.append(mask)
249
+
250
+ input_ids = mx.array(input_ids)
251
+ attention_mask = mx.array(attention_mask)
252
+
253
+ embeddings = self(input_ids, attention_mask)
254
+
255
+ if truncate_dim is not None:
256
+ embeddings = embeddings[:, :truncate_dim]
257
+ norms = mx.linalg.norm(embeddings, axis=1, keepdims=True)
258
+ embeddings = embeddings / norms
259
+
260
+ return embeddings
@@ -0,0 +1,483 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ MLX-accelerated code embedder for Apple Silicon.
4
+
5
+ TWO MODES:
6
+ 1. Batch mode (main use): reads nodes directly from SQLite, embeds, writes back.
7
+ No IPC overhead — everything happens in one process.
8
+ Usage: python3 mlx-embedder.py batch <db_path> [--dims 256] [--max-tokens 2048]
9
+
10
+ 2. Interactive mode (for MCP query embedding): reads JSON from stdin.
11
+ Usage: python3 mlx-embedder.py [interactive]
12
+
13
+ Model: Jina Embeddings v5 Text Small Retrieval (677M params, Qwen3-0.6B backbone)
14
+ Optimized with int4 quantization (Linear) + int6 quantization (Embedding).
15
+ """
16
+
17
+ import sys
18
+ import os
19
+ import json
20
+ import time
21
+ import struct
22
+ import hashlib
23
+
24
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
25
+
26
+ import mlx.core as mx
27
+ import mlx.nn as nn
28
+ from tokenizers import Tokenizer
29
+
30
+ MODEL_DIR = os.path.dirname(os.path.abspath(__file__)) + "/jina-v5-small-mlx"
31
+
32
+
33
+
34
+
35
+
36
+
37
+ def ensure_model_downloaded():
38
+ """Download model weights from HuggingFace if not present."""
39
+ weights_path = os.path.join(MODEL_DIR, "model.safetensors")
40
+ if os.path.exists(weights_path):
41
+ return
42
+
43
+ print(json.dumps({"phase": "downloading", "message": "Downloading embedding model (~1.1GB, first time only)..."}), flush=True)
44
+ try:
45
+ from huggingface_hub import hf_hub_download
46
+ import shutil
47
+ repo = "jinaai/jina-embeddings-v5-text-small-retrieval-mlx"
48
+ for fname in ["model.safetensors", "tokenizer.json", "vocab.json", "merges.txt", "tokenizer_config.json"]:
49
+ dest = os.path.join(MODEL_DIR, fname)
50
+ if not os.path.exists(dest):
51
+ path = hf_hub_download(repo, fname)
52
+ shutil.copy(path, dest)
53
+ print(json.dumps({"phase": "downloaded", "message": "Model downloaded successfully"}), flush=True)
54
+ except ImportError:
55
+ raise RuntimeError(
56
+ "Model weights not found. Install huggingface_hub to auto-download:\n"
57
+ " pip3 install huggingface_hub\n"
58
+ "Or manually download from: https://huggingface.co/jinaai/jina-embeddings-v5-text-small-retrieval-mlx"
59
+ )
60
+
61
+
62
+ def load_model():
63
+ """Load model, quantize for speed. Auto-downloads weights on first use."""
64
+ ensure_model_downloaded()
65
+
66
+ sys.path.insert(0, MODEL_DIR)
67
+ from model import JinaEmbeddingModel
68
+
69
+ with open(os.path.join(MODEL_DIR, "config.json")) as f:
70
+ config = json.load(f)
71
+
72
+ model = JinaEmbeddingModel(config)
73
+ weights = mx.load(os.path.join(MODEL_DIR, "model.safetensors"))
74
+ model.load_weights(list(weights.items()))
75
+
76
+ nn.quantize(model.model, bits=4, group_size=64,
77
+ class_predicate=lambda _, m: isinstance(m, nn.Linear))
78
+ nn.quantize(model.model, bits=6, group_size=64,
79
+ class_predicate=lambda _, m: isinstance(m, nn.Embedding))
80
+ mx.eval(model.parameters())
81
+
82
+ tokenizer = Tokenizer.from_file(os.path.join(MODEL_DIR, "tokenizer.json"))
83
+ return model, tokenizer
84
+
85
+
86
+ def get_batch_size_for_tokens(token_count):
87
+ """Optimal batch size based on actual token count."""
88
+ if token_count <= 64: return 256
89
+ if token_count <= 128: return 128
90
+ if token_count <= 256: return 64
91
+ if token_count <= 512: return 32
92
+ if token_count <= 1024: return 16
93
+ return 8
94
+
95
+
96
+ def embed_tiered(model, tokenizer, texts, task_type="retrieval.passage", truncate_dim=256, max_tokens=2048):
97
+ """Embed texts with token-aware batching. Tokenizes first, batches by token count.
98
+ Returns embeddings in the ORIGINAL input order."""
99
+ if not texts:
100
+ return []
101
+
102
+ # Add task prefix
103
+ prefix_map = {"retrieval.query": "Query: ", "retrieval.passage": "Document: "}
104
+ prefix = prefix_map.get(task_type, "")
105
+ prefixed = [prefix + t for t in texts] if prefix else texts
106
+
107
+ # Tokenize everything in one call (fast — Rust HF tokenizer)
108
+ encodings = tokenizer.encode_batch(prefixed)
109
+
110
+ # Sort by token length for minimal padding
111
+ indexed = sorted(range(len(texts)), key=lambda i: len(encodings[i].ids))
112
+
113
+ all_embeddings = [None] * len(texts)
114
+ i = 0
115
+
116
+ while i < len(indexed):
117
+ peek_idx = indexed[min(i + 1, len(indexed) - 1)]
118
+ tok_count = min(len(encodings[peek_idx].ids), max_tokens)
119
+ batch_size = get_batch_size_for_tokens(tok_count)
120
+
121
+ batch_indices = []
122
+ batch_encs = []
123
+ while len(batch_encs) < batch_size and i < len(indexed):
124
+ orig_idx = indexed[i]
125
+ batch_indices.append(orig_idx)
126
+ batch_encs.append(encodings[orig_idx])
127
+ i += 1
128
+
129
+ max_len = min(max_tokens, max(len(e.ids) for e in batch_encs))
130
+ input_ids = []
131
+ attention_mask = []
132
+ for enc in batch_encs:
133
+ ids = enc.ids[:max_len]
134
+ mask = enc.attention_mask[:max_len]
135
+ pad = max_len - len(ids)
136
+ if pad > 0:
137
+ ids = ids + [0] * pad
138
+ mask = mask + [0] * pad
139
+ input_ids.append(ids)
140
+ attention_mask.append(mask)
141
+
142
+ embs = model(mx.array(input_ids), mx.array(attention_mask))
143
+ if truncate_dim and truncate_dim < embs.shape[1]:
144
+ embs = embs[:, :truncate_dim]
145
+ norms = mx.linalg.norm(embs, axis=1, keepdims=True)
146
+ embs = embs / norms
147
+ mx.eval(embs)
148
+
149
+ emb_list = embs.tolist()
150
+ for j, orig_idx in enumerate(batch_indices):
151
+ all_embeddings[orig_idx] = emb_list[j]
152
+
153
+ return all_embeddings
154
+
155
+
156
+ def float_list_to_blob(floats):
157
+ """Convert list of floats to a binary blob (Float32Array compatible)."""
158
+ return struct.pack(f'{len(floats)}f', *floats)
159
+
160
+
161
+ def md5(text):
162
+ return hashlib.md5(text.encode()).hexdigest()
163
+
164
+
165
+ # =========================================================================
166
+ # BATCH MODE — read from SQLite, embed, write back. Zero IPC.
167
+ # =========================================================================
168
+
169
+ def batch_mode(db_path, dims=256, max_tokens=2048):
170
+ import sqlite3
171
+
172
+ t0_total = time.time()
173
+
174
+ # Load model
175
+ print(json.dumps({"phase": "loading", "message": "Loading MLX model..."}), flush=True)
176
+ model, tokenizer = load_model()
177
+ load_ms = int((time.time() - t0_total) * 1000)
178
+ print(json.dumps({"phase": "loaded", "load_ms": load_ms, "device": str(mx.default_device())}), flush=True)
179
+
180
+ # Open database
181
+ db = sqlite3.connect(db_path)
182
+ db.execute("PRAGMA journal_mode=WAL")
183
+ db.execute("PRAGMA synchronous=NORMAL")
184
+
185
+ # Ensure textHash column exists (migration)
186
+ try:
187
+ db.execute("SELECT textHash FROM embeddings LIMIT 0")
188
+ except sqlite3.OperationalError:
189
+ db.execute("ALTER TABLE embeddings ADD COLUMN textHash TEXT")
190
+
191
+ # Query embeddable nodes — skip test/fixture files (BM25 covers them)
192
+ labels = ('Function', 'Class', 'Method', 'Interface')
193
+ placeholders = ','.join('?' * len(labels))
194
+ all_rows = db.execute(
195
+ f"SELECT id, name, label, filePath, content, startLine, endLine, nameExpanded FROM nodes WHERE label IN ({placeholders})",
196
+ labels
197
+ ).fetchall()
198
+
199
+ # Filter out test files — they're searchable via BM25 keyword matching
200
+ test_patterns = ('/test/', '/tests/', '/spec/', '/fixtures/', '/__tests__/', '/__mocks__/',
201
+ '.test.', '.spec.', '_test.', '_spec.')
202
+ rows = [r for r in all_rows if not any(p in (r[3] or '') for p in test_patterns)]
203
+ skipped_tests = len(all_rows) - len(rows)
204
+
205
+ print(json.dumps({"phase": "queried", "nodes": len(rows), "skipped_tests": skipped_tests}), flush=True)
206
+
207
+ if not rows:
208
+ print(json.dumps({"phase": "done", "embedded": 0, "skipped": 0, "ms": 0}), flush=True)
209
+ db.close()
210
+ return
211
+
212
+ # Fetch graph context (callers, callees, module) for richer embedding text
213
+ node_ids = [r[0] for r in rows]
214
+ id_set = set(node_ids)
215
+
216
+ # Batch fetch callers
217
+ caller_map = {}
218
+ callee_map = {}
219
+ module_map = {}
220
+
221
+ # Chunk the IN clause to avoid SQLite variable limits
222
+ CHUNK = 500
223
+ for ci in range(0, len(node_ids), CHUNK):
224
+ chunk_ids = node_ids[ci:ci+CHUNK]
225
+ ph = ','.join('?' * len(chunk_ids))
226
+
227
+ for row in db.execute(f"SELECT e.targetId, n.name FROM edges e JOIN nodes n ON n.id = e.sourceId WHERE e.targetId IN ({ph}) AND e.type = 'CALLS' AND e.confidence >= 0.7 LIMIT {len(chunk_ids)*3}", chunk_ids):
228
+ caller_map.setdefault(row[0], []).append(row[1])
229
+
230
+ for row in db.execute(f"SELECT e.sourceId, n.name FROM edges e JOIN nodes n ON n.id = e.targetId WHERE e.sourceId IN ({ph}) AND e.type = 'CALLS' AND e.confidence >= 0.7 LIMIT {len(chunk_ids)*3}", chunk_ids):
231
+ callee_map.setdefault(row[0], []).append(row[1])
232
+
233
+ for row in db.execute(f"SELECT e.sourceId, c.heuristicLabel FROM edges e JOIN nodes c ON c.id = e.targetId WHERE e.sourceId IN ({ph}) AND e.type = 'MEMBER_OF' AND c.label = 'Community' LIMIT {len(chunk_ids)}", chunk_ids):
234
+ module_map[row[0]] = row[1]
235
+
236
+ print(json.dumps({"phase": "context", "with_callers": len(caller_map), "with_module": len(module_map)}), flush=True)
237
+
238
+ # Get existing text hashes for skip detection
239
+ existing_hashes = {}
240
+ for row in db.execute("SELECT nodeId, textHash FROM embeddings WHERE textHash IS NOT NULL"):
241
+ existing_hashes[row[0]] = row[1]
242
+
243
+ # Generate embedding texts + hashes
244
+ # Optimized: semantic summary (name + comment + signature + context)
245
+ # instead of raw code dump. 55% fewer tokens, equal search quality.
246
+ to_embed = [] # (node_id, text, hash)
247
+ skipped = 0
248
+
249
+ def extract_first_comment(content):
250
+ """Extract JSDoc/comment as natural language description (max 3 lines)."""
251
+ if not content:
252
+ return ""
253
+ lines = content.split("\n")
254
+ comment_lines = []
255
+ in_block = False
256
+ for l in lines:
257
+ t = l.strip()
258
+ if t.startswith("/**") or t.startswith("/*"):
259
+ in_block = True
260
+ inner = t.lstrip("/").lstrip("*").strip().rstrip("*/").strip()
261
+ if inner and not inner.startswith("@"):
262
+ comment_lines.append(inner)
263
+ if "*/" in t:
264
+ in_block = False
265
+ continue
266
+ if in_block:
267
+ if "*/" in t:
268
+ in_block = False
269
+ continue
270
+ inner = t.lstrip("*").strip()
271
+ if inner and not inner.startswith("@"):
272
+ comment_lines.append(inner)
273
+ if len(comment_lines) >= 3:
274
+ break
275
+ continue
276
+ if t.startswith("//"):
277
+ inner = t[2:].strip()
278
+ if inner:
279
+ comment_lines.append(inner)
280
+ if len(comment_lines) >= 3:
281
+ break
282
+ continue
283
+ if t.startswith("#") and not t.startswith("#!"):
284
+ inner = t[1:].strip()
285
+ if inner:
286
+ comment_lines.append(inner)
287
+ if len(comment_lines) >= 3:
288
+ break
289
+ continue
290
+ if comment_lines:
291
+ break
292
+ return " ".join(comment_lines)
293
+
294
+ def extract_signature(content, label):
295
+ """Extract code signature without full body."""
296
+ if not content:
297
+ return ""
298
+ lines = content.split("\n")
299
+ if label == "Interface":
300
+ return "\n".join(lines[:30]).strip() if len(lines) <= 30 else "\n".join(lines[:30]) + "\n // ..."
301
+ if label == "Class":
302
+ sigs = []
303
+ for l in lines[:60]:
304
+ t = l.strip()
305
+ if not t or t.startswith("//") or t.startswith("*") or t.startswith("/*"):
306
+ continue
307
+ if any(kw in t for kw in ("class ", "private ", "public ", "protected ", "readonly ", "static ", "abstract ")):
308
+ sigs.append(t)
309
+ if len(sigs) >= 20:
310
+ break
311
+ return "\n".join(sigs)
312
+ return "\n".join(lines[:min(8, len(lines))]).strip()
313
+
314
+ for row in rows:
315
+ nid, name, label, filePath, content, startLine, endLine, nameExpanded = row
316
+ content = content or ""
317
+ file_name = filePath.rsplit('/', 1)[-1] if filePath else ""
318
+
319
+ # Build semantic embedding text
320
+ parts = [f"{label}: {name}"]
321
+
322
+ # nameExpanded: natural language bridge (e.g. "checkStaleness" → "check staleness")
323
+ if nameExpanded and nameExpanded != name.lower():
324
+ parts.append(nameExpanded)
325
+
326
+ # First comment as natural language description
327
+ comment = extract_first_comment(content)
328
+ if comment:
329
+ parts.append(comment)
330
+
331
+ # File + module location
332
+ loc = f"File: {file_name}"
333
+ module = module_map.get(nid, "")
334
+ if module:
335
+ loc += f" | Module: {module}"
336
+ parts.append(loc)
337
+
338
+ # Graph context
339
+ callers = caller_map.get(nid, [])[:5]
340
+ callees = callee_map.get(nid, [])[:5]
341
+ if callers:
342
+ parts.append(f"Called by: {', '.join(callers)}")
343
+ if callees:
344
+ parts.append(f"Calls: {', '.join(callees)}")
345
+
346
+ # Code signature (not full body)
347
+ sig = extract_signature(content, label)
348
+ if sig:
349
+ parts.extend(["", sig])
350
+
351
+ text = '\n'.join(parts)
352
+ text_hash = md5(text)
353
+
354
+ # Skip if hash unchanged
355
+ if existing_hashes.get(nid) == text_hash:
356
+ skipped += 1
357
+ continue
358
+
359
+ to_embed.append((nid, text, text_hash))
360
+
361
+ print(json.dumps({"phase": "prepared", "to_embed": len(to_embed), "skipped": skipped}), flush=True)
362
+
363
+ if not to_embed:
364
+ print(json.dumps({"phase": "done", "embedded": 0, "skipped": skipped, "ms": int((time.time() - t0_total) * 1000)}), flush=True)
365
+ db.close()
366
+ return
367
+
368
+ # Deduplicate — embed unique texts only, copy vectors to duplicates.
369
+ # Identical embedding texts produce identical vectors; no quality loss.
370
+ unique_by_hash = {} # text_hash -> { text, node_ids: [(nid, text_hash)] }
371
+ for nid, text, text_hash in to_embed:
372
+ if text_hash in unique_by_hash:
373
+ unique_by_hash[text_hash]["node_ids"].append((nid, text_hash))
374
+ else:
375
+ unique_by_hash[text_hash] = {"text": text, "node_ids": [(nid, text_hash)]}
376
+ unique_texts = [v["text"] for v in unique_by_hash.values()]
377
+ deduped = len(to_embed) - len(unique_texts)
378
+
379
+ # Embed only unique texts
380
+ t0_embed = time.time()
381
+ embeddings = embed_tiered(model, tokenizer, unique_texts, "retrieval.passage", dims, max_tokens)
382
+ embed_ms = int((time.time() - t0_embed) * 1000)
383
+
384
+ print(json.dumps({"phase": "embedded", "count": len(unique_texts), "deduped": deduped, "ms": embed_ms}), flush=True)
385
+
386
+ # Write to database — copy embedding to all nodes sharing the same hash
387
+ t0_write = time.time()
388
+ db.execute("BEGIN")
389
+ for i, (text_hash, entry) in enumerate(unique_by_hash.items()):
390
+ emb = embeddings[i]
391
+ if emb is None:
392
+ continue
393
+ blob = float_list_to_blob(emb)
394
+ for nid, th in entry["node_ids"]:
395
+ db.execute("INSERT OR REPLACE INTO embeddings (nodeId, embedding, textHash) VALUES (?, ?, ?)",
396
+ (nid, blob, th))
397
+ db.execute("COMMIT")
398
+ write_ms = int((time.time() - t0_write) * 1000)
399
+
400
+ total_ms = int((time.time() - t0_total) * 1000)
401
+ print(json.dumps({
402
+ "phase": "done",
403
+ "embedded": len(to_embed),
404
+ "skipped": skipped,
405
+ "embed_ms": embed_ms,
406
+ "write_ms": write_ms,
407
+ "total_ms": total_ms,
408
+ }), flush=True)
409
+
410
+ db.close()
411
+
412
+
413
+ # =========================================================================
414
+ # INTERACTIVE MODE — stdin/stdout JSON for MCP query embedding
415
+ # =========================================================================
416
+
417
+ def interactive_mode():
418
+ t0 = time.time()
419
+ model, tokenizer = load_model()
420
+ load_ms = int((time.time() - t0) * 1000)
421
+
422
+ print(json.dumps({
423
+ "status": "ready",
424
+ "model": "jina-v5-text-small-retrieval",
425
+ "device": str(mx.default_device()),
426
+ "load_ms": load_ms,
427
+ "precision": "int4-g64",
428
+ }), flush=True)
429
+
430
+ for line in sys.stdin:
431
+ line = line.strip()
432
+ if not line:
433
+ continue
434
+
435
+ try:
436
+ req = json.loads(line)
437
+ except json.JSONDecodeError:
438
+ print(json.dumps({"error": "Invalid JSON"}), flush=True)
439
+ continue
440
+
441
+ if "cmd" in req:
442
+ if req["cmd"] == "ping":
443
+ print(json.dumps({"status": "ready"}), flush=True)
444
+ elif req["cmd"] == "quit":
445
+ break
446
+ continue
447
+
448
+ texts = req.get("texts", [])
449
+ prompt_type = req.get("type", "passage")
450
+ dims = req.get("dims", 256)
451
+ task_type = "retrieval.query" if prompt_type == "query" else "retrieval.passage"
452
+
453
+ t0 = time.time()
454
+ try:
455
+ embeddings = embed_tiered(model, tokenizer, texts, task_type, dims)
456
+ elapsed_ms = int((time.time() - t0) * 1000)
457
+ print(json.dumps({
458
+ "embeddings": embeddings,
459
+ "count": len(embeddings),
460
+ "dims": dims,
461
+ "ms": elapsed_ms,
462
+ }), flush=True)
463
+ except Exception as e:
464
+ print(json.dumps({"error": str(e)}), flush=True)
465
+
466
+
467
+ # =========================================================================
468
+ # MAIN
469
+ # =========================================================================
470
+
471
+ if __name__ == "__main__":
472
+ if len(sys.argv) >= 3 and sys.argv[1] == "batch":
473
+ db_path = sys.argv[2]
474
+ dims = 256
475
+ max_tokens = 2048
476
+ for i, arg in enumerate(sys.argv[3:], 3):
477
+ if arg == "--dims" and i + 1 < len(sys.argv):
478
+ dims = int(sys.argv[i + 1])
479
+ if arg == "--max-tokens" and i + 1 < len(sys.argv):
480
+ max_tokens = int(sys.argv[i + 1])
481
+ batch_mode(db_path, dims, max_tokens)
482
+ else:
483
+ interactive_mode()
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zuvia-software-solutions/code-mapper",
3
- "version": "2.2.2",
3
+ "version": "2.2.3",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",
@@ -34,7 +34,10 @@
34
34
  "hooks",
35
35
  "scripts",
36
36
  "skills",
37
- "vendor"
37
+ "vendor",
38
+ "models/mlx-embedder.py",
39
+ "models/jina-v5-small-mlx/model.py",
40
+ "models/jina-v5-small-mlx/config.json"
38
41
  ],
39
42
  "scripts": {
40
43
  "build": "tsc",