@zuvia-software-solutions/code-mapper 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/ai-context.js +1 -1
- package/dist/cli/analyze.d.ts +1 -0
- package/dist/cli/analyze.js +73 -82
- package/dist/cli/augment.js +0 -2
- package/dist/cli/eval-server.d.ts +2 -2
- package/dist/cli/eval-server.js +6 -6
- package/dist/cli/index.js +6 -10
- package/dist/cli/mcp.d.ts +1 -3
- package/dist/cli/mcp.js +3 -3
- package/dist/cli/refresh.d.ts +2 -2
- package/dist/cli/refresh.js +24 -29
- package/dist/cli/status.js +4 -13
- package/dist/cli/tool.d.ts +5 -4
- package/dist/cli/tool.js +8 -10
- package/dist/config/ignore-service.js +14 -34
- package/dist/core/augmentation/engine.js +53 -83
- package/dist/core/db/adapter.d.ts +99 -0
- package/dist/core/db/adapter.js +402 -0
- package/dist/core/db/graph-loader.d.ts +27 -0
- package/dist/core/db/graph-loader.js +148 -0
- package/dist/core/db/queries.d.ts +160 -0
- package/dist/core/db/queries.js +441 -0
- package/dist/core/db/schema.d.ts +108 -0
- package/dist/core/db/schema.js +136 -0
- package/dist/core/embeddings/embedder.d.ts +21 -12
- package/dist/core/embeddings/embedder.js +104 -50
- package/dist/core/embeddings/embedding-pipeline.d.ts +48 -22
- package/dist/core/embeddings/embedding-pipeline.js +220 -262
- package/dist/core/embeddings/text-generator.js +4 -19
- package/dist/core/embeddings/types.d.ts +1 -1
- package/dist/core/graph/graph.d.ts +1 -1
- package/dist/core/graph/graph.js +1 -0
- package/dist/core/graph/types.d.ts +11 -9
- package/dist/core/graph/types.js +4 -1
- package/dist/core/incremental/refresh.d.ts +46 -0
- package/dist/core/incremental/refresh.js +464 -0
- package/dist/core/incremental/types.d.ts +2 -1
- package/dist/core/incremental/types.js +42 -44
- package/dist/core/ingestion/ast-cache.js +1 -0
- package/dist/core/ingestion/call-processor.d.ts +15 -3
- package/dist/core/ingestion/call-processor.js +448 -60
- package/dist/core/ingestion/cluster-enricher.d.ts +1 -1
- package/dist/core/ingestion/cluster-enricher.js +2 -0
- package/dist/core/ingestion/community-processor.d.ts +1 -1
- package/dist/core/ingestion/community-processor.js +8 -3
- package/dist/core/ingestion/export-detection.d.ts +1 -1
- package/dist/core/ingestion/export-detection.js +1 -1
- package/dist/core/ingestion/filesystem-walker.js +1 -1
- package/dist/core/ingestion/heritage-processor.d.ts +2 -2
- package/dist/core/ingestion/heritage-processor.js +22 -11
- package/dist/core/ingestion/import-processor.d.ts +2 -2
- package/dist/core/ingestion/import-processor.js +24 -9
- package/dist/core/ingestion/language-config.js +7 -4
- package/dist/core/ingestion/mro-processor.d.ts +1 -1
- package/dist/core/ingestion/mro-processor.js +23 -11
- package/dist/core/ingestion/named-binding-extraction.js +5 -5
- package/dist/core/ingestion/parsing-processor.d.ts +4 -4
- package/dist/core/ingestion/parsing-processor.js +26 -18
- package/dist/core/ingestion/pipeline.d.ts +4 -2
- package/dist/core/ingestion/pipeline.js +50 -20
- package/dist/core/ingestion/process-processor.d.ts +2 -2
- package/dist/core/ingestion/process-processor.js +28 -14
- package/dist/core/ingestion/resolution-context.d.ts +1 -1
- package/dist/core/ingestion/resolution-context.js +14 -4
- package/dist/core/ingestion/resolvers/csharp.js +4 -3
- package/dist/core/ingestion/resolvers/go.js +3 -1
- package/dist/core/ingestion/resolvers/jvm.js +13 -4
- package/dist/core/ingestion/resolvers/standard.js +2 -2
- package/dist/core/ingestion/resolvers/utils.js +6 -2
- package/dist/core/ingestion/route-stitcher.d.ts +15 -0
- package/dist/core/ingestion/route-stitcher.js +92 -0
- package/dist/core/ingestion/structure-processor.d.ts +1 -1
- package/dist/core/ingestion/structure-processor.js +3 -2
- package/dist/core/ingestion/symbol-table.d.ts +2 -0
- package/dist/core/ingestion/symbol-table.js +5 -1
- package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
- package/dist/core/ingestion/tree-sitter-queries.js +177 -0
- package/dist/core/ingestion/type-env.js +20 -0
- package/dist/core/ingestion/type-extractors/csharp.js +4 -3
- package/dist/core/ingestion/type-extractors/go.js +23 -12
- package/dist/core/ingestion/type-extractors/php.js +18 -10
- package/dist/core/ingestion/type-extractors/ruby.js +15 -3
- package/dist/core/ingestion/type-extractors/rust.js +3 -2
- package/dist/core/ingestion/type-extractors/shared.js +3 -2
- package/dist/core/ingestion/type-extractors/typescript.js +11 -5
- package/dist/core/ingestion/utils.d.ts +27 -4
- package/dist/core/ingestion/utils.js +145 -100
- package/dist/core/ingestion/workers/parse-worker.d.ts +1 -0
- package/dist/core/ingestion/workers/parse-worker.js +97 -29
- package/dist/core/ingestion/workers/worker-pool.js +3 -0
- package/dist/core/search/bm25-index.d.ts +15 -8
- package/dist/core/search/bm25-index.js +48 -98
- package/dist/core/search/hybrid-search.d.ts +9 -3
- package/dist/core/search/hybrid-search.js +30 -25
- package/dist/core/search/reranker.js +9 -7
- package/dist/core/search/types.d.ts +0 -4
- package/dist/core/semantic/tsgo-service.d.ts +5 -1
- package/dist/core/semantic/tsgo-service.js +161 -66
- package/dist/lib/tsgo-test.d.ts +2 -0
- package/dist/lib/tsgo-test.js +6 -0
- package/dist/lib/type-utils.d.ts +25 -0
- package/dist/lib/type-utils.js +22 -0
- package/dist/lib/utils.d.ts +3 -2
- package/dist/lib/utils.js +3 -2
- package/dist/mcp/compatible-stdio-transport.js +1 -1
- package/dist/mcp/local/local-backend.d.ts +29 -56
- package/dist/mcp/local/local-backend.js +808 -1118
- package/dist/mcp/resources.js +35 -25
- package/dist/mcp/server.d.ts +1 -1
- package/dist/mcp/server.js +5 -5
- package/dist/mcp/tools.js +24 -25
- package/dist/storage/repo-manager.d.ts +2 -12
- package/dist/storage/repo-manager.js +1 -47
- package/dist/types/pipeline.d.ts +8 -5
- package/dist/types/pipeline.js +5 -0
- package/package.json +18 -11
- package/dist/cli/serve.d.ts +0 -5
- package/dist/cli/serve.js +0 -8
- package/dist/core/incremental/child-process.d.ts +0 -8
- package/dist/core/incremental/child-process.js +0 -649
- package/dist/core/incremental/refresh-coordinator.d.ts +0 -32
- package/dist/core/incremental/refresh-coordinator.js +0 -147
- package/dist/core/lbug/csv-generator.d.ts +0 -28
- package/dist/core/lbug/csv-generator.js +0 -355
- package/dist/core/lbug/lbug-adapter.d.ts +0 -96
- package/dist/core/lbug/lbug-adapter.js +0 -753
- package/dist/core/lbug/schema.d.ts +0 -46
- package/dist/core/lbug/schema.js +0 -402
- package/dist/mcp/core/embedder.d.ts +0 -24
- package/dist/mcp/core/embedder.js +0 -168
- package/dist/mcp/core/lbug-adapter.d.ts +0 -29
- package/dist/mcp/core/lbug-adapter.js +0 -330
- package/dist/server/api.d.ts +0 -5
- package/dist/server/api.js +0 -340
- package/dist/server/mcp-http.d.ts +0 -7
- package/dist/server/mcp-http.js +0 -95
- package/models/mlx-embedder.py +0 -185
package/dist/server/mcp-http.js
DELETED
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
// code-mapper/src/server/mcp-http.ts
|
|
2
|
-
/** @file mcp-http.ts
|
|
3
|
-
* @description Mounts Code Mapper MCP server on Express using StreamableHTTP transport
|
|
4
|
-
* Each client gets a stateful session; LocalBackend is shared (thread-safe)
|
|
5
|
-
* Sessions are evicted on close or after idle timeout */
|
|
6
|
-
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
|
|
7
|
-
import { createMCPServer } from '../mcp/server.js';
|
|
8
|
-
import { randomUUID } from 'crypto';
|
|
9
|
-
/** Idle sessions are evicted after 30 minutes */
|
|
10
|
-
const SESSION_TTL_MS = 30 * 60 * 1000;
|
|
11
|
-
/** Cleanup sweep runs every 5 minutes */
|
|
12
|
-
const CLEANUP_INTERVAL_MS = 5 * 60 * 1000;
|
|
13
|
-
export function mountMCPEndpoints(app, backend) {
|
|
14
|
-
const sessions = new Map();
|
|
15
|
-
// Periodic cleanup of idle sessions
|
|
16
|
-
const cleanupTimer = setInterval(() => {
|
|
17
|
-
const now = Date.now();
|
|
18
|
-
for (const [id, session] of sessions) {
|
|
19
|
-
if (now - session.lastActivity > SESSION_TTL_MS) {
|
|
20
|
-
try {
|
|
21
|
-
session.server.close();
|
|
22
|
-
}
|
|
23
|
-
catch { }
|
|
24
|
-
sessions.delete(id);
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
}, CLEANUP_INTERVAL_MS);
|
|
28
|
-
if (cleanupTimer && typeof cleanupTimer === 'object' && 'unref' in cleanupTimer) {
|
|
29
|
-
cleanupTimer.unref();
|
|
30
|
-
}
|
|
31
|
-
const handleMcpRequest = async (req, res) => {
|
|
32
|
-
const sessionId = req.headers['mcp-session-id'];
|
|
33
|
-
if (sessionId && sessions.has(sessionId)) {
|
|
34
|
-
// Existing session — delegate to its transport
|
|
35
|
-
const session = sessions.get(sessionId);
|
|
36
|
-
session.lastActivity = Date.now();
|
|
37
|
-
await session.transport.handleRequest(req, res, req.body);
|
|
38
|
-
}
|
|
39
|
-
else if (sessionId) {
|
|
40
|
-
// Unknown/expired session ID (per MCP spec, tell client to re-initialize)
|
|
41
|
-
res.status(404).json({
|
|
42
|
-
jsonrpc: '2.0',
|
|
43
|
-
error: { code: -32001, message: 'Session not found. Re-initialize.' },
|
|
44
|
-
id: null,
|
|
45
|
-
});
|
|
46
|
-
}
|
|
47
|
-
else if (req.method === 'POST') {
|
|
48
|
-
// No session ID — new client initializing
|
|
49
|
-
const transport = new StreamableHTTPServerTransport({
|
|
50
|
-
sessionIdGenerator: () => randomUUID(),
|
|
51
|
-
});
|
|
52
|
-
const server = createMCPServer(backend);
|
|
53
|
-
await server.connect(transport);
|
|
54
|
-
await transport.handleRequest(req, res, req.body);
|
|
55
|
-
if (transport.sessionId) {
|
|
56
|
-
sessions.set(transport.sessionId, { server, transport, lastActivity: Date.now() });
|
|
57
|
-
transport.onclose = () => {
|
|
58
|
-
sessions.delete(transport.sessionId);
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
else {
|
|
63
|
-
res.status(400).json({
|
|
64
|
-
jsonrpc: '2.0',
|
|
65
|
-
error: { code: -32000, message: 'No valid session. Send a POST to initialize.' },
|
|
66
|
-
id: null,
|
|
67
|
-
});
|
|
68
|
-
}
|
|
69
|
-
};
|
|
70
|
-
app.all('/api/mcp', (req, res) => {
|
|
71
|
-
void handleMcpRequest(req, res).catch((err) => {
|
|
72
|
-
console.error('MCP HTTP request failed:', err);
|
|
73
|
-
if (res.headersSent)
|
|
74
|
-
return;
|
|
75
|
-
res.status(500).json({
|
|
76
|
-
jsonrpc: '2.0',
|
|
77
|
-
error: { code: -32000, message: 'Internal MCP server error' },
|
|
78
|
-
id: null,
|
|
79
|
-
});
|
|
80
|
-
});
|
|
81
|
-
});
|
|
82
|
-
const cleanup = async () => {
|
|
83
|
-
clearInterval(cleanupTimer);
|
|
84
|
-
const closers = [...sessions.values()].map(async (session) => {
|
|
85
|
-
try {
|
|
86
|
-
await Promise.resolve(session.server.close());
|
|
87
|
-
}
|
|
88
|
-
catch { }
|
|
89
|
-
});
|
|
90
|
-
sessions.clear();
|
|
91
|
-
await Promise.allSettled(closers);
|
|
92
|
-
};
|
|
93
|
-
console.log('MCP HTTP endpoints mounted at /api/mcp');
|
|
94
|
-
return cleanup;
|
|
95
|
-
}
|
package/models/mlx-embedder.py
DELETED
|
@@ -1,185 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
MLX-accelerated code embedder for Apple Silicon.
|
|
4
|
-
|
|
5
|
-
Runs as a persistent subprocess — reads JSON batches from stdin,
|
|
6
|
-
outputs embeddings to stdout. Node.js parent communicates via IPC.
|
|
7
|
-
|
|
8
|
-
Protocol:
|
|
9
|
-
Input (one JSON per line): {"texts": [...], "task": "nl2code", "type": "query"|"passage", "dims": 256}
|
|
10
|
-
Output (one JSON per line): {"embeddings": [[...], [...]], "ms": 42}
|
|
11
|
-
|
|
12
|
-
Special commands:
|
|
13
|
-
{"cmd": "ping"} -> {"status": "ready", "model": "jina-code-1.5b", "device": "gpu"}
|
|
14
|
-
{"cmd": "quit"} -> exits
|
|
15
|
-
|
|
16
|
-
Optimizations:
|
|
17
|
-
- Length-tiered batching: short texts in large batches, long texts in small batches
|
|
18
|
-
- Sorted by length within tiers to minimize padding waste
|
|
19
|
-
- Results returned in original input order
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
import sys
|
|
23
|
-
import os
|
|
24
|
-
import json
|
|
25
|
-
import time
|
|
26
|
-
|
|
27
|
-
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
28
|
-
|
|
29
|
-
try:
|
|
30
|
-
import mlx.core as mx
|
|
31
|
-
from tokenizers import Tokenizer
|
|
32
|
-
except ImportError:
|
|
33
|
-
print(json.dumps({"error": "MLX not installed. Run: pip3 install mlx tokenizers huggingface_hub"}), flush=True)
|
|
34
|
-
sys.exit(1)
|
|
35
|
-
|
|
36
|
-
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
37
|
-
MODEL_DIR = os.path.join(SCRIPT_DIR, "jina-code-1.5b-mlx")
|
|
38
|
-
HF_REPO = "jinaai/jina-code-embeddings-1.5b-mlx"
|
|
39
|
-
|
|
40
|
-
def ensure_model():
|
|
41
|
-
"""Download the model if not present."""
|
|
42
|
-
safetensors = os.path.join(MODEL_DIR, "model.safetensors")
|
|
43
|
-
if os.path.exists(safetensors):
|
|
44
|
-
return
|
|
45
|
-
|
|
46
|
-
print(json.dumps({"status": "downloading", "model": HF_REPO}), flush=True)
|
|
47
|
-
try:
|
|
48
|
-
from huggingface_hub import snapshot_download
|
|
49
|
-
snapshot_download(HF_REPO, local_dir=MODEL_DIR)
|
|
50
|
-
except Exception as e:
|
|
51
|
-
print(json.dumps({"error": f"Model download failed: {e}"}), flush=True)
|
|
52
|
-
sys.exit(1)
|
|
53
|
-
|
|
54
|
-
# Length-tiered batch sizes — shorter texts can batch more efficiently
|
|
55
|
-
# Char thresholds and max batch sizes per tier
|
|
56
|
-
TIERS = [
|
|
57
|
-
(500, 128), # very short (signatures, interfaces)
|
|
58
|
-
(2000, 64), # short (small functions)
|
|
59
|
-
(8000, 24), # medium (typical functions)
|
|
60
|
-
(32000, 8), # long (large functions, classes)
|
|
61
|
-
(999999, 4), # very long (entire files)
|
|
62
|
-
]
|
|
63
|
-
|
|
64
|
-
def load_model():
|
|
65
|
-
sys.path.insert(0, MODEL_DIR)
|
|
66
|
-
from model import JinaCodeEmbeddingModel
|
|
67
|
-
|
|
68
|
-
with open(os.path.join(MODEL_DIR, "config.json")) as f:
|
|
69
|
-
config = json.load(f)
|
|
70
|
-
|
|
71
|
-
model = JinaCodeEmbeddingModel(config)
|
|
72
|
-
weights = mx.load(os.path.join(MODEL_DIR, "model.safetensors"))
|
|
73
|
-
model.load_weights(list(weights.items()))
|
|
74
|
-
mx.eval(model.parameters())
|
|
75
|
-
|
|
76
|
-
tokenizer = Tokenizer.from_file(os.path.join(MODEL_DIR, "tokenizer.json"))
|
|
77
|
-
|
|
78
|
-
return model, tokenizer
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def get_batch_size_for_length(char_len):
|
|
82
|
-
for threshold, batch_size in TIERS:
|
|
83
|
-
if char_len <= threshold:
|
|
84
|
-
return batch_size
|
|
85
|
-
return 4
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def embed_tiered(model, tokenizer, texts, task="nl2code", prompt_type="passage", truncate_dim=256):
|
|
89
|
-
"""Embed texts with length-tiered batching for optimal throughput.
|
|
90
|
-
Returns embeddings in the ORIGINAL input order."""
|
|
91
|
-
if not texts:
|
|
92
|
-
return []
|
|
93
|
-
|
|
94
|
-
# Create index-text pairs, sort by length
|
|
95
|
-
indexed = sorted(enumerate(texts), key=lambda x: len(x[1]))
|
|
96
|
-
|
|
97
|
-
# Process in length-tiered batches
|
|
98
|
-
all_embeddings = [None] * len(texts)
|
|
99
|
-
i = 0
|
|
100
|
-
|
|
101
|
-
while i < len(indexed):
|
|
102
|
-
# Determine batch size based on the length of the longest text in this batch
|
|
103
|
-
char_len = len(indexed[min(i + 1, len(indexed) - 1)][1]) # peek at a representative
|
|
104
|
-
batch_size = get_batch_size_for_length(char_len)
|
|
105
|
-
|
|
106
|
-
batch_indices = []
|
|
107
|
-
batch_texts = []
|
|
108
|
-
|
|
109
|
-
# Fill batch with texts of similar length
|
|
110
|
-
while len(batch_texts) < batch_size and i < len(indexed):
|
|
111
|
-
orig_idx, text = indexed[i]
|
|
112
|
-
batch_indices.append(orig_idx)
|
|
113
|
-
batch_texts.append(text)
|
|
114
|
-
i += 1
|
|
115
|
-
|
|
116
|
-
# Run inference
|
|
117
|
-
embeddings = model.encode(
|
|
118
|
-
batch_texts,
|
|
119
|
-
tokenizer,
|
|
120
|
-
task=task,
|
|
121
|
-
prompt_type=prompt_type,
|
|
122
|
-
truncate_dim=truncate_dim,
|
|
123
|
-
)
|
|
124
|
-
mx.eval(embeddings)
|
|
125
|
-
|
|
126
|
-
# Store results at original indices
|
|
127
|
-
emb_list = embeddings.tolist()
|
|
128
|
-
for j, orig_idx in enumerate(batch_indices):
|
|
129
|
-
all_embeddings[orig_idx] = emb_list[j]
|
|
130
|
-
|
|
131
|
-
return all_embeddings
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
def main():
|
|
135
|
-
ensure_model()
|
|
136
|
-
t0 = time.time()
|
|
137
|
-
model, tokenizer = load_model()
|
|
138
|
-
load_ms = int((time.time() - t0) * 1000)
|
|
139
|
-
|
|
140
|
-
print(json.dumps({
|
|
141
|
-
"status": "ready",
|
|
142
|
-
"model": "jina-code-1.5b-mlx",
|
|
143
|
-
"device": str(mx.default_device()),
|
|
144
|
-
"load_ms": load_ms,
|
|
145
|
-
}), flush=True)
|
|
146
|
-
|
|
147
|
-
for line in sys.stdin:
|
|
148
|
-
line = line.strip()
|
|
149
|
-
if not line:
|
|
150
|
-
continue
|
|
151
|
-
|
|
152
|
-
try:
|
|
153
|
-
req = json.loads(line)
|
|
154
|
-
except json.JSONDecodeError:
|
|
155
|
-
print(json.dumps({"error": "Invalid JSON"}), flush=True)
|
|
156
|
-
continue
|
|
157
|
-
|
|
158
|
-
if "cmd" in req:
|
|
159
|
-
if req["cmd"] == "ping":
|
|
160
|
-
print(json.dumps({"status": "ready"}), flush=True)
|
|
161
|
-
elif req["cmd"] == "quit":
|
|
162
|
-
break
|
|
163
|
-
continue
|
|
164
|
-
|
|
165
|
-
texts = req.get("texts", [])
|
|
166
|
-
task = req.get("task", "nl2code")
|
|
167
|
-
prompt_type = req.get("type", "passage")
|
|
168
|
-
dims = req.get("dims", 256)
|
|
169
|
-
|
|
170
|
-
t0 = time.time()
|
|
171
|
-
try:
|
|
172
|
-
embeddings = embed_tiered(model, tokenizer, texts, task, prompt_type, dims)
|
|
173
|
-
elapsed_ms = int((time.time() - t0) * 1000)
|
|
174
|
-
|
|
175
|
-
print(json.dumps({
|
|
176
|
-
"embeddings": embeddings,
|
|
177
|
-
"count": len(embeddings),
|
|
178
|
-
"dims": dims,
|
|
179
|
-
"ms": elapsed_ms,
|
|
180
|
-
}), flush=True)
|
|
181
|
-
except Exception as e:
|
|
182
|
-
print(json.dumps({"error": str(e)}), flush=True)
|
|
183
|
-
|
|
184
|
-
if __name__ == "__main__":
|
|
185
|
-
main()
|