npm - @zuvia-software-solutions/code-mapper - Versions diffs - 2.4.1 → 2.5.1 - Mend

@zuvia-software-solutions/code-mapper 2.4.1 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/cli/analyze.d.ts +0 -1
package/dist/cli/analyze.js +11 -87
package/dist/cli/index.js +2 -2
package/dist/core/embeddings/index.d.ts +2 -3
package/dist/core/embeddings/index.js +2 -3
package/dist/core/embeddings/nl-embed-worker.d.ts +8 -0
package/dist/core/embeddings/nl-embed-worker.js +38 -0
package/dist/core/embeddings/nl-embedder.d.ts +1 -1
package/dist/core/embeddings/nl-embedder.js +199 -30
package/dist/core/incremental/refresh.js +18 -26
package/dist/core/ingestion/call-processor.js +11 -5
package/dist/core/semantic/tsgo-service.js +16 -0
package/dist/mcp/local/local-backend.js +40 -27
package/dist/mcp/server.js +2 -2
package/dist/mcp/tools.js +1 -0
package/package.json +2 -5
package/models/jina-code-0.5b-mlx/config.json +0 -73
package/models/jina-code-0.5b-mlx/model.py +0 -127
package/models/mlx-embedder.py +0 -604

package/dist/mcp/local/local-backend.js CHANGED Viewed

@@ -346,10 +346,10 @@ export class LocalBackend {
             }
             this.loadNlEmbeddingCache(id); // NL cache loaded regardless (cheap, may not exist)
         }
-        // Pre-warm MLX embedder so first query has zero model-load latency
+        // Pre-warm bge-small embedder so first query has zero model-load latency
         if (anyEmbeddings) {
-            import('../../core/embeddings/embedder.js').then(({ initEmbedder }) => {
-                initEmbedder().catch(() => { });
+            import('../../core/embeddings/nl-embedder.js').then(({ initNlEmbedder }) => {
+                initNlEmbedder().catch(() => { });
             }).catch(() => { });
         }
         return this.repos.size > 0;
@@ -1119,19 +1119,7 @@ export class LocalBackend {
             ...(r.startLine != null ? { startLine: r.startLine } : {}),
             ...(r.endLine != null ? { endLine: r.endLine } : {}),
         }));
-        // NL semantic results get high weight — proven 100% recall on conceptual queries
-        const nlForRRF = nlSemanticResults.map((r) => ({
-            nodeId: String(r.nodeId ?? ''), name: String(r.name ?? ''), label: String(r.type ?? ''),
-            filePath: String(r.filePath ?? ''), distance: Number(r.distance ?? 1),
-            ...(r.startLine != null ? { startLine: r.startLine } : {}),
-            ...(r.endLine != null ? { endLine: r.endLine } : {}),
-        }));
-        // Merge code + NL semantic into one semantic list (best of both worlds)
-        const combinedSemantic = [...semanticForRRF, ...nlForRRF]
-            .sort((a, b) => a.distance - b.distance)
-            .filter((r, i, arr) => arr.findIndex(x => x.nodeId === r.nodeId) === i) // dedupe by nodeId
-            .slice(0, searchLimit);
-        let rrfMerged = mergeWithRRF(bm25ForRRF, combinedSemantic, { limit: searchLimit });
+        let rrfMerged = mergeWithRRF(bm25ForRRF, semanticForRRF, { limit: searchLimit });
         // Store NL match reasons for display
         const nlMatchReasons = new Map();
         for (const r of nlSemanticResults) {
@@ -1139,6 +1127,27 @@ export class LocalBackend {
                 nlMatchReasons.set(r.nodeId, r.match_reason);
             }
         }
+        // Inject NL semantic results directly — they bridge the vocabulary gap
+        // that BM25 and code embeddings miss. Insert at high score so they
+        // appear in results even when BM25 finds unrelated "prevent" matches.
+        if (nlSemanticResults.length > 0) {
+            const mainIds = new Set(rrfMerged.map(r => r.nodeId || r.filePath));
+            const topMainScore = rrfMerged[0]?.score ?? 0.01;
+            for (let i = 0; i < Math.min(nlSemanticResults.length, 5); i++) {
+                const nlr = nlSemanticResults[i];
+                if (mainIds.has(nlr.nodeId))
+                    continue; // already in results
+                // Score NL results high — at or above the top BM25 result
+                const nlScore = topMainScore * (1.0 - i * 0.1);
+                rrfMerged.push({
+                    filePath: nlr.filePath, score: nlScore, rank: i + 1,
+                    sources: ['semantic'], nodeId: nlr.nodeId, name: nlr.name,
+                    label: nlr.type, startLine: nlr.startLine, endLine: nlr.endLine,
+                });
+            }
+            rrfMerged.sort((a, b) => b.score - a.score);
+            rrfMerged = rrfMerged.slice(0, searchLimit);
+        }
         // Merge refs + fileWords into the RRF results (lower weight)
         if (refsForRRF.length > 0 || fileWordsForRRF.length > 0) {
             const supplemental = mergeWithRRF(refsForRRF, fileWordsForRRF.map((r) => ({
@@ -1200,14 +1209,13 @@ export class LocalBackend {
                 data.match_reason = reason;
             return { score: rrf.score, data };
         });
-        // Filter noise: remove test files, config files, docs from results by default
+        // Filter non-code files (JSON, MD, YAML). Test files are included by default.
+        // Agents can pass exclude_tests: true to filter test files when not needed.
         merged = merged.filter(item => {
             const fp = String(item.data.filePath ?? '').toLowerCase();
-            if (isTestFilePath(fp))
-                return false;
             if (fp.endsWith('.json') || fp.endsWith('.md') || fp.endsWith('.yml') || fp.endsWith('.yaml'))
                 return false;
-            if (fp.includes('/skills/') || fp.includes('/fixtures/') || fp.includes('/eval/'))
+            if (params.exclude_tests && isTestFilePath(fp))
                 return false;
             return true;
         });
@@ -1591,8 +1599,8 @@ export class LocalBackend {
                     return [];
             }
             const { DEFAULT_MAX_SEMANTIC_DISTANCE } = await import('../../core/search/types.js');
-            const { embedQuery } = await import('../../core/embeddings/embedder.js');
-            const queryVec = await embedQuery(query);
+            const { nlEmbed } = await import('../../core/embeddings/nl-embedder.js');
+            const queryVec = await nlEmbed(query);
             // In-memory cosine search — no disk I/O
             const vecResults = this.searchEmbeddingsInMemory(repo.id, queryVec, limit, DEFAULT_MAX_SEMANTIC_DISTANCE);
             if (vecResults.length === 0)
@@ -1626,9 +1634,14 @@ export class LocalBackend {
      */
     async nlSemanticSearch(repo, query, limit) {
         try {
-            const cache = this.nlEmbeddingCaches.get(repo.id);
-            if (!cache || cache.nodeIds.length === 0)
-                return [];
+            let cache = this.nlEmbeddingCaches.get(repo.id);
+            if (!cache || cache.nodeIds.length === 0) {
+                // Try loading on demand
+                this.loadNlEmbeddingCache(repo.id);
+                cache = this.nlEmbeddingCaches.get(repo.id);
+                if (!cache || cache.nodeIds.length === 0)
+                    return [];
+            }
             const { nlEmbed } = await import('../../core/embeddings/nl-embedder.js');
             const queryVec = await nlEmbed(query);
             const vecResults = this.searchNlEmbeddingsInMemory(repo.id, queryVec, limit, 0.5);
@@ -1748,8 +1761,8 @@ export class LocalBackend {
             const cache = this.embeddingCaches.get(repo.id);
             if (!cache || cache.nodeIds.length === 0)
                 return [];
-            const { embedQuery } = await import('../../core/embeddings/embedder.js');
-            const queryVec = await embedQuery(query);
+            const { nlEmbed } = await import('../../core/embeddings/nl-embedder.js');
+            const queryVec = await nlEmbed(query);
             const neighbors = this.searchEmbeddingsInMemory(repo.id, queryVec, 5, 0.7);
             // Extract symbol names from nodeIds (format: "Label:filePath:name")
             return neighbors.map(n => {

package/dist/mcp/server.js CHANGED Viewed

@@ -13,8 +13,8 @@ import { getResourceDefinitions, getResourceTemplates, readResource } from './re
 // the MCP tool descriptions. Hints wasted ~40 tokens per response.
 /** Create a configured MCP Server with all handlers registered (transport-agnostic) */
 export function createMCPServer(backend) {
-    // Preload embedding model in background so first query doesn't pay cold-start cost
-    import('../core/embeddings/embedder.js').then(m => m.initEmbedder()).catch(() => { });
+    // Preload bge-small embedding model in background so first query doesn't pay cold-start cost
+    import('../core/embeddings/nl-embedder.js').then(m => m.initNlEmbedder()).catch(() => { });
     const require = createRequire(import.meta.url);
     const pkgVersion = require('../../package.json').version;
     const server = new Server({

package/dist/mcp/tools.js CHANGED Viewed

@@ -44,6 +44,7 @@ Hybrid ranking: BM25 keyword + semantic vector search, ranked by Reciprocal Rank
                 limit: { type: 'number', description: 'Max processes to return (default: 5)', default: 5 },
                 max_symbols: { type: 'number', description: 'Max symbols per process (default: 10)', default: 10 },
                 include_content: { type: 'boolean', description: 'Include full symbol source code (default: false)', default: false },
+                exclude_tests: { type: 'boolean', description: 'Exclude test/spec/fixture files from results (default: false)', default: false },
                 repo: { type: 'string', description: 'Repository name or path. Omit if only one repo is indexed.' },
             },
             required: ['query'],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zuvia-software-solutions/code-mapper",
-  "version": "2.4.1",
+  "version": "2.5.1",
   "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
   "author": "Abhigyan Patwari",
   "license": "PolyForm-Noncommercial-1.0.0",
@@ -34,10 +34,7 @@
     "hooks",
     "scripts",
     "skills",
-    "vendor",
-    "models/mlx-embedder.py",
-    "models/jina-code-0.5b-mlx/model.py",
-    "models/jina-code-0.5b-mlx/config.json"
+    "vendor"
   ],
   "scripts": {
     "build": "tsc",

package/models/jina-code-0.5b-mlx/config.json DELETED Viewed

@@ -1,73 +0,0 @@
-{
-  "architectures": [
-    "Qwen2ForCausalLM"
-  ],
-  "attention_dropout": 0.0,
-  "bos_token_id": 151643,
-  "eos_token_id": 151643,
-  "hidden_act": "silu",
-  "hidden_size": 896,
-  "initializer_range": 0.02,
-  "intermediate_size": 4864,
-  "layer_types": [
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention"
-  ],
-  "matryoshka_dims": [
-    64,
-    128,
-    256,
-    512,
-    896
-  ],
-  "max_position_embeddings": 32768,
-  "max_window_layers": 24,
-  "model_type": "qwen2",
-  "num_attention_heads": 14,
-  "num_hidden_layers": 24,
-  "num_key_value_heads": 2,
-  "prompt_names": [
-    "query",
-    "passage"
-  ],
-  "rms_norm_eps": 1e-06,
-  "rope_scaling": null,
-  "rope_theta": 1000000.0,
-  "sliding_window": null,
-  "task_names": [
-    "nl2code",
-    "qa",
-    "code2code",
-    "code2nl",
-    "code2completion"
-  ],
-  "tie_word_embeddings": true,
-  "tokenizer_class": "Qwen2TokenizerFast",
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.53.0",
-  "use_cache": true,
-  "use_sliding_window": false,
-  "vocab_size": 151936
-}

package/models/jina-code-0.5b-mlx/model.py DELETED Viewed

@@ -1,127 +0,0 @@
-from dataclasses import dataclass
-from typing import Optional, List
-import mlx.core as mx
-import mlx.nn as nn
-@dataclass
-class ModelArgs:
-    hidden_size: int
-    num_hidden_layers: int
-    intermediate_size: int
-    num_attention_heads: int
-    rms_norm_eps: float
-    vocab_size: int
-    num_key_value_heads: int
-    max_position_embeddings: int
-    rope_theta: float = 1000000.0
-    tie_word_embeddings: bool = True
-class Attention(nn.Module):
-    def __init__(self, args):
-        super().__init__()
-        dim = args.hidden_size
-        self.n_heads = args.num_attention_heads
-        self.n_kv_heads = args.num_key_value_heads
-        self.head_dim = dim // self.n_heads
-        self.scale = self.head_dim ** -0.5
-        self.rope_theta = args.rope_theta
-        self.q_proj = nn.Linear(dim, self.n_heads * self.head_dim, bias=True)
-        self.k_proj = nn.Linear(dim, self.n_kv_heads * self.head_dim, bias=True)
-        self.v_proj = nn.Linear(dim, self.n_kv_heads * self.head_dim, bias=True)
-        self.o_proj = nn.Linear(self.n_heads * self.head_dim, dim, bias=False)
-    def __call__(self, x, mask=None):
-        B, L, D = x.shape
-        q = self.q_proj(x).reshape(B, L, self.n_heads, self.head_dim).transpose(0, 2, 1, 3)
-        k = self.k_proj(x).reshape(B, L, self.n_kv_heads, self.head_dim).transpose(0, 2, 1, 3)
-        v = self.v_proj(x).reshape(B, L, self.n_kv_heads, self.head_dim).transpose(0, 2, 1, 3)
-        q = mx.fast.rope(q, self.head_dim, traditional=False, base=self.rope_theta, scale=1.0, offset=0)
-        k = mx.fast.rope(k, self.head_dim, traditional=False, base=self.rope_theta, scale=1.0, offset=0)
-        out = mx.fast.scaled_dot_product_attention(q, k, v, mask=mask.astype(q.dtype) if mask is not None else None, scale=self.scale)
-        return self.o_proj(out.transpose(0, 2, 1, 3).reshape(B, L, -1))
-class MLP(nn.Module):
-    def __init__(self, dim, hidden):
-        super().__init__()
-        self.gate_proj = nn.Linear(dim, hidden, bias=False)
-        self.down_proj = nn.Linear(hidden, dim, bias=False)
-        self.up_proj = nn.Linear(dim, hidden, bias=False)
-    def __call__(self, x):
-        return self.down_proj(nn.silu(self.gate_proj(x)) * self.up_proj(x))
-class TransformerBlock(nn.Module):
-    def __init__(self, args):
-        super().__init__()
-        self.self_attn = Attention(args)
-        self.mlp = MLP(args.hidden_size, args.intermediate_size)
-        self.input_layernorm = nn.RMSNorm(args.hidden_size, eps=args.rms_norm_eps)
-        self.post_attention_layernorm = nn.RMSNorm(args.hidden_size, eps=args.rms_norm_eps)
-    def __call__(self, x, mask=None):
-        h = x + self.self_attn(self.input_layernorm(x), mask)
-        return h + self.mlp(self.post_attention_layernorm(h))
-class Qwen2Model(nn.Module):
-    def __init__(self, args):
-        super().__init__()
-        self.embed_tokens = nn.Embedding(args.vocab_size, args.hidden_size)
-        self.layers = [TransformerBlock(args) for _ in range(args.num_hidden_layers)]
-        self.norm = nn.RMSNorm(args.hidden_size, eps=args.rms_norm_eps)
-    def __call__(self, inputs, mask=None):
-        h = self.embed_tokens(inputs)
-        for layer in self.layers:
-            h = layer(h, mask)
-        return self.norm(h)
-class JinaCodeEmbeddingModel(nn.Module):
-    def __init__(self, config):
-        super().__init__()
-        args = ModelArgs(
-            hidden_size=config["hidden_size"],
-            num_hidden_layers=config["num_hidden_layers"],
-            intermediate_size=config["intermediate_size"],
-            num_attention_heads=config["num_attention_heads"],
-            rms_norm_eps=config["rms_norm_eps"],
-            vocab_size=config["vocab_size"],
-            num_key_value_heads=config["num_key_value_heads"],
-            max_position_embeddings=config["max_position_embeddings"],
-            rope_theta=config.get("rope_theta", 1000000.0),
-        )
-        self.model = Qwen2Model(args)
-        self.config = config
-    def __call__(self, input_ids, attention_mask=None):
-        B, L = input_ids.shape
-        causal = mx.tril(mx.ones((L, L)))
-        causal = mx.where(causal == 0, -1e4, 0.0)[None, None, :, :]
-        if attention_mask is not None:
-            pad = mx.where(attention_mask == 0, -1e4, 0.0)[:, None, None, :]
-            mask = causal + pad
-        else:
-            mask = causal
-        h = self.model(input_ids, mask)
-        if attention_mask is not None:
-            seq_lens = mx.sum(attention_mask.astype(mx.int32), axis=1) - 1
-            embs = h[mx.arange(B), seq_lens]
-        else:
-            embs = h[:, -1, :]
-        norms = mx.linalg.norm(embs, axis=1, keepdims=True)
-        return embs / norms
-    def encode(self, texts, tokenizer, max_length=8192, truncate_dim=None, task="nl2code", prompt_type="query"):
-        PREFIXES = {"nl2code": {"query": "Find the most relevant code snippet given the following query:\n", "passage": "Candidate code snippet:\n"}}
-        prefix = PREFIXES.get(task, {}).get(prompt_type, "")
-        if prefix:
-            texts = [prefix + t for t in texts]
-        encodings = tokenizer.encode_batch(texts)
-        ml = min(max_length, max(len(e.ids) for e in encodings))
-        iids, amask = [], []
-        for e in encodings:
-            ids = e.ids[:ml]; m = e.attention_mask[:ml]; p = ml - len(ids)
-            if p > 0: ids = ids + [0]*p; m = m + [0]*p
-            iids.append(ids); amask.append(m)
-        embs = self(mx.array(iids), mx.array(amask))
-        if truncate_dim:
-            embs = embs[:, :truncate_dim]
-            embs = embs / mx.linalg.norm(embs, axis=1, keepdims=True)
-        return embs