@zuvia-software-solutions/code-mapper 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/dist/cli/ai-context.js +1 -1
  2. package/dist/cli/analyze.d.ts +1 -0
  3. package/dist/cli/analyze.js +73 -82
  4. package/dist/cli/augment.js +0 -2
  5. package/dist/cli/eval-server.d.ts +2 -2
  6. package/dist/cli/eval-server.js +6 -6
  7. package/dist/cli/index.js +6 -10
  8. package/dist/cli/mcp.d.ts +1 -3
  9. package/dist/cli/mcp.js +3 -3
  10. package/dist/cli/refresh.d.ts +2 -2
  11. package/dist/cli/refresh.js +24 -29
  12. package/dist/cli/status.js +4 -13
  13. package/dist/cli/tool.d.ts +5 -4
  14. package/dist/cli/tool.js +8 -10
  15. package/dist/config/ignore-service.js +14 -34
  16. package/dist/core/augmentation/engine.js +53 -83
  17. package/dist/core/db/adapter.d.ts +99 -0
  18. package/dist/core/db/adapter.js +402 -0
  19. package/dist/core/db/graph-loader.d.ts +27 -0
  20. package/dist/core/db/graph-loader.js +148 -0
  21. package/dist/core/db/queries.d.ts +160 -0
  22. package/dist/core/db/queries.js +441 -0
  23. package/dist/core/db/schema.d.ts +108 -0
  24. package/dist/core/db/schema.js +136 -0
  25. package/dist/core/embeddings/embedder.d.ts +21 -12
  26. package/dist/core/embeddings/embedder.js +104 -50
  27. package/dist/core/embeddings/embedding-pipeline.d.ts +48 -22
  28. package/dist/core/embeddings/embedding-pipeline.js +220 -262
  29. package/dist/core/embeddings/text-generator.js +4 -19
  30. package/dist/core/embeddings/types.d.ts +1 -1
  31. package/dist/core/graph/graph.d.ts +1 -1
  32. package/dist/core/graph/graph.js +1 -0
  33. package/dist/core/graph/types.d.ts +11 -9
  34. package/dist/core/graph/types.js +4 -1
  35. package/dist/core/incremental/refresh.d.ts +46 -0
  36. package/dist/core/incremental/refresh.js +464 -0
  37. package/dist/core/incremental/types.d.ts +2 -1
  38. package/dist/core/incremental/types.js +42 -44
  39. package/dist/core/ingestion/ast-cache.js +1 -0
  40. package/dist/core/ingestion/call-processor.d.ts +15 -3
  41. package/dist/core/ingestion/call-processor.js +448 -60
  42. package/dist/core/ingestion/cluster-enricher.d.ts +1 -1
  43. package/dist/core/ingestion/cluster-enricher.js +2 -0
  44. package/dist/core/ingestion/community-processor.d.ts +1 -1
  45. package/dist/core/ingestion/community-processor.js +8 -3
  46. package/dist/core/ingestion/export-detection.d.ts +1 -1
  47. package/dist/core/ingestion/export-detection.js +1 -1
  48. package/dist/core/ingestion/filesystem-walker.js +1 -1
  49. package/dist/core/ingestion/heritage-processor.d.ts +2 -2
  50. package/dist/core/ingestion/heritage-processor.js +22 -11
  51. package/dist/core/ingestion/import-processor.d.ts +2 -2
  52. package/dist/core/ingestion/import-processor.js +24 -9
  53. package/dist/core/ingestion/language-config.js +7 -4
  54. package/dist/core/ingestion/mro-processor.d.ts +1 -1
  55. package/dist/core/ingestion/mro-processor.js +23 -11
  56. package/dist/core/ingestion/named-binding-extraction.js +5 -5
  57. package/dist/core/ingestion/parsing-processor.d.ts +4 -4
  58. package/dist/core/ingestion/parsing-processor.js +26 -18
  59. package/dist/core/ingestion/pipeline.d.ts +4 -2
  60. package/dist/core/ingestion/pipeline.js +50 -20
  61. package/dist/core/ingestion/process-processor.d.ts +2 -2
  62. package/dist/core/ingestion/process-processor.js +28 -14
  63. package/dist/core/ingestion/resolution-context.d.ts +1 -1
  64. package/dist/core/ingestion/resolution-context.js +14 -4
  65. package/dist/core/ingestion/resolvers/csharp.js +4 -3
  66. package/dist/core/ingestion/resolvers/go.js +3 -1
  67. package/dist/core/ingestion/resolvers/jvm.js +13 -4
  68. package/dist/core/ingestion/resolvers/standard.js +2 -2
  69. package/dist/core/ingestion/resolvers/utils.js +6 -2
  70. package/dist/core/ingestion/route-stitcher.d.ts +15 -0
  71. package/dist/core/ingestion/route-stitcher.js +92 -0
  72. package/dist/core/ingestion/structure-processor.d.ts +1 -1
  73. package/dist/core/ingestion/structure-processor.js +3 -2
  74. package/dist/core/ingestion/symbol-table.d.ts +2 -0
  75. package/dist/core/ingestion/symbol-table.js +5 -1
  76. package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
  77. package/dist/core/ingestion/tree-sitter-queries.js +177 -0
  78. package/dist/core/ingestion/type-env.js +20 -0
  79. package/dist/core/ingestion/type-extractors/csharp.js +4 -3
  80. package/dist/core/ingestion/type-extractors/go.js +23 -12
  81. package/dist/core/ingestion/type-extractors/php.js +18 -10
  82. package/dist/core/ingestion/type-extractors/ruby.js +15 -3
  83. package/dist/core/ingestion/type-extractors/rust.js +3 -2
  84. package/dist/core/ingestion/type-extractors/shared.js +3 -2
  85. package/dist/core/ingestion/type-extractors/typescript.js +11 -5
  86. package/dist/core/ingestion/utils.d.ts +27 -4
  87. package/dist/core/ingestion/utils.js +145 -100
  88. package/dist/core/ingestion/workers/parse-worker.d.ts +1 -0
  89. package/dist/core/ingestion/workers/parse-worker.js +97 -29
  90. package/dist/core/ingestion/workers/worker-pool.js +3 -0
  91. package/dist/core/search/bm25-index.d.ts +15 -8
  92. package/dist/core/search/bm25-index.js +48 -98
  93. package/dist/core/search/hybrid-search.d.ts +9 -3
  94. package/dist/core/search/hybrid-search.js +30 -25
  95. package/dist/core/search/reranker.js +9 -7
  96. package/dist/core/search/types.d.ts +0 -4
  97. package/dist/core/semantic/tsgo-service.d.ts +5 -1
  98. package/dist/core/semantic/tsgo-service.js +161 -66
  99. package/dist/lib/tsgo-test.d.ts +2 -0
  100. package/dist/lib/tsgo-test.js +6 -0
  101. package/dist/lib/type-utils.d.ts +25 -0
  102. package/dist/lib/type-utils.js +22 -0
  103. package/dist/lib/utils.d.ts +3 -2
  104. package/dist/lib/utils.js +3 -2
  105. package/dist/mcp/compatible-stdio-transport.js +1 -1
  106. package/dist/mcp/local/local-backend.d.ts +29 -56
  107. package/dist/mcp/local/local-backend.js +808 -1118
  108. package/dist/mcp/resources.js +35 -25
  109. package/dist/mcp/server.d.ts +1 -1
  110. package/dist/mcp/server.js +5 -5
  111. package/dist/mcp/tools.js +24 -25
  112. package/dist/storage/repo-manager.d.ts +2 -12
  113. package/dist/storage/repo-manager.js +1 -47
  114. package/dist/types/pipeline.d.ts +8 -5
  115. package/dist/types/pipeline.js +5 -0
  116. package/package.json +18 -11
  117. package/dist/cli/serve.d.ts +0 -5
  118. package/dist/cli/serve.js +0 -8
  119. package/dist/core/incremental/child-process.d.ts +0 -8
  120. package/dist/core/incremental/child-process.js +0 -649
  121. package/dist/core/incremental/refresh-coordinator.d.ts +0 -32
  122. package/dist/core/incremental/refresh-coordinator.js +0 -147
  123. package/dist/core/lbug/csv-generator.d.ts +0 -28
  124. package/dist/core/lbug/csv-generator.js +0 -355
  125. package/dist/core/lbug/lbug-adapter.d.ts +0 -96
  126. package/dist/core/lbug/lbug-adapter.js +0 -753
  127. package/dist/core/lbug/schema.d.ts +0 -46
  128. package/dist/core/lbug/schema.js +0 -402
  129. package/dist/mcp/core/embedder.d.ts +0 -24
  130. package/dist/mcp/core/embedder.js +0 -168
  131. package/dist/mcp/core/lbug-adapter.d.ts +0 -29
  132. package/dist/mcp/core/lbug-adapter.js +0 -330
  133. package/dist/server/api.d.ts +0 -5
  134. package/dist/server/api.js +0 -340
  135. package/dist/server/mcp-http.d.ts +0 -7
  136. package/dist/server/mcp-http.js +0 -95
  137. package/models/mlx-embedder.py +0 -185
@@ -1,95 +0,0 @@
1
- // code-mapper/src/server/mcp-http.ts
2
- /** @file mcp-http.ts
3
- * @description Mounts Code Mapper MCP server on Express using StreamableHTTP transport
4
- * Each client gets a stateful session; LocalBackend is shared (thread-safe)
5
- * Sessions are evicted on close or after idle timeout */
6
- import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
7
- import { createMCPServer } from '../mcp/server.js';
8
- import { randomUUID } from 'crypto';
9
- /** Idle sessions are evicted after 30 minutes */
10
- const SESSION_TTL_MS = 30 * 60 * 1000;
11
- /** Cleanup sweep runs every 5 minutes */
12
- const CLEANUP_INTERVAL_MS = 5 * 60 * 1000;
13
- export function mountMCPEndpoints(app, backend) {
14
- const sessions = new Map();
15
- // Periodic cleanup of idle sessions
16
- const cleanupTimer = setInterval(() => {
17
- const now = Date.now();
18
- for (const [id, session] of sessions) {
19
- if (now - session.lastActivity > SESSION_TTL_MS) {
20
- try {
21
- session.server.close();
22
- }
23
- catch { }
24
- sessions.delete(id);
25
- }
26
- }
27
- }, CLEANUP_INTERVAL_MS);
28
- if (cleanupTimer && typeof cleanupTimer === 'object' && 'unref' in cleanupTimer) {
29
- cleanupTimer.unref();
30
- }
31
- const handleMcpRequest = async (req, res) => {
32
- const sessionId = req.headers['mcp-session-id'];
33
- if (sessionId && sessions.has(sessionId)) {
34
- // Existing session — delegate to its transport
35
- const session = sessions.get(sessionId);
36
- session.lastActivity = Date.now();
37
- await session.transport.handleRequest(req, res, req.body);
38
- }
39
- else if (sessionId) {
40
- // Unknown/expired session ID (per MCP spec, tell client to re-initialize)
41
- res.status(404).json({
42
- jsonrpc: '2.0',
43
- error: { code: -32001, message: 'Session not found. Re-initialize.' },
44
- id: null,
45
- });
46
- }
47
- else if (req.method === 'POST') {
48
- // No session ID — new client initializing
49
- const transport = new StreamableHTTPServerTransport({
50
- sessionIdGenerator: () => randomUUID(),
51
- });
52
- const server = createMCPServer(backend);
53
- await server.connect(transport);
54
- await transport.handleRequest(req, res, req.body);
55
- if (transport.sessionId) {
56
- sessions.set(transport.sessionId, { server, transport, lastActivity: Date.now() });
57
- transport.onclose = () => {
58
- sessions.delete(transport.sessionId);
59
- };
60
- }
61
- }
62
- else {
63
- res.status(400).json({
64
- jsonrpc: '2.0',
65
- error: { code: -32000, message: 'No valid session. Send a POST to initialize.' },
66
- id: null,
67
- });
68
- }
69
- };
70
- app.all('/api/mcp', (req, res) => {
71
- void handleMcpRequest(req, res).catch((err) => {
72
- console.error('MCP HTTP request failed:', err);
73
- if (res.headersSent)
74
- return;
75
- res.status(500).json({
76
- jsonrpc: '2.0',
77
- error: { code: -32000, message: 'Internal MCP server error' },
78
- id: null,
79
- });
80
- });
81
- });
82
- const cleanup = async () => {
83
- clearInterval(cleanupTimer);
84
- const closers = [...sessions.values()].map(async (session) => {
85
- try {
86
- await Promise.resolve(session.server.close());
87
- }
88
- catch { }
89
- });
90
- sessions.clear();
91
- await Promise.allSettled(closers);
92
- };
93
- console.log('MCP HTTP endpoints mounted at /api/mcp');
94
- return cleanup;
95
- }
@@ -1,185 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- MLX-accelerated code embedder for Apple Silicon.
4
-
5
- Runs as a persistent subprocess — reads JSON batches from stdin,
6
- outputs embeddings to stdout. Node.js parent communicates via IPC.
7
-
8
- Protocol:
9
- Input (one JSON per line): {"texts": [...], "task": "nl2code", "type": "query"|"passage", "dims": 256}
10
- Output (one JSON per line): {"embeddings": [[...], [...]], "ms": 42}
11
-
12
- Special commands:
13
- {"cmd": "ping"} -> {"status": "ready", "model": "jina-code-1.5b", "device": "gpu"}
14
- {"cmd": "quit"} -> exits
15
-
16
- Optimizations:
17
- - Length-tiered batching: short texts in large batches, long texts in small batches
18
- - Sorted by length within tiers to minimize padding waste
19
- - Results returned in original input order
20
- """
21
-
22
- import sys
23
- import os
24
- import json
25
- import time
26
-
27
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
28
-
29
- try:
30
- import mlx.core as mx
31
- from tokenizers import Tokenizer
32
- except ImportError:
33
- print(json.dumps({"error": "MLX not installed. Run: pip3 install mlx tokenizers huggingface_hub"}), flush=True)
34
- sys.exit(1)
35
-
36
- SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
37
- MODEL_DIR = os.path.join(SCRIPT_DIR, "jina-code-1.5b-mlx")
38
- HF_REPO = "jinaai/jina-code-embeddings-1.5b-mlx"
39
-
40
- def ensure_model():
41
- """Download the model if not present."""
42
- safetensors = os.path.join(MODEL_DIR, "model.safetensors")
43
- if os.path.exists(safetensors):
44
- return
45
-
46
- print(json.dumps({"status": "downloading", "model": HF_REPO}), flush=True)
47
- try:
48
- from huggingface_hub import snapshot_download
49
- snapshot_download(HF_REPO, local_dir=MODEL_DIR)
50
- except Exception as e:
51
- print(json.dumps({"error": f"Model download failed: {e}"}), flush=True)
52
- sys.exit(1)
53
-
54
- # Length-tiered batch sizes — shorter texts can batch more efficiently
55
- # Char thresholds and max batch sizes per tier
56
- TIERS = [
57
- (500, 128), # very short (signatures, interfaces)
58
- (2000, 64), # short (small functions)
59
- (8000, 24), # medium (typical functions)
60
- (32000, 8), # long (large functions, classes)
61
- (999999, 4), # very long (entire files)
62
- ]
63
-
64
- def load_model():
65
- sys.path.insert(0, MODEL_DIR)
66
- from model import JinaCodeEmbeddingModel
67
-
68
- with open(os.path.join(MODEL_DIR, "config.json")) as f:
69
- config = json.load(f)
70
-
71
- model = JinaCodeEmbeddingModel(config)
72
- weights = mx.load(os.path.join(MODEL_DIR, "model.safetensors"))
73
- model.load_weights(list(weights.items()))
74
- mx.eval(model.parameters())
75
-
76
- tokenizer = Tokenizer.from_file(os.path.join(MODEL_DIR, "tokenizer.json"))
77
-
78
- return model, tokenizer
79
-
80
-
81
- def get_batch_size_for_length(char_len):
82
- for threshold, batch_size in TIERS:
83
- if char_len <= threshold:
84
- return batch_size
85
- return 4
86
-
87
-
88
- def embed_tiered(model, tokenizer, texts, task="nl2code", prompt_type="passage", truncate_dim=256):
89
- """Embed texts with length-tiered batching for optimal throughput.
90
- Returns embeddings in the ORIGINAL input order."""
91
- if not texts:
92
- return []
93
-
94
- # Create index-text pairs, sort by length
95
- indexed = sorted(enumerate(texts), key=lambda x: len(x[1]))
96
-
97
- # Process in length-tiered batches
98
- all_embeddings = [None] * len(texts)
99
- i = 0
100
-
101
- while i < len(indexed):
102
- # Determine batch size based on the length of the longest text in this batch
103
- char_len = len(indexed[min(i + 1, len(indexed) - 1)][1]) # peek at a representative
104
- batch_size = get_batch_size_for_length(char_len)
105
-
106
- batch_indices = []
107
- batch_texts = []
108
-
109
- # Fill batch with texts of similar length
110
- while len(batch_texts) < batch_size and i < len(indexed):
111
- orig_idx, text = indexed[i]
112
- batch_indices.append(orig_idx)
113
- batch_texts.append(text)
114
- i += 1
115
-
116
- # Run inference
117
- embeddings = model.encode(
118
- batch_texts,
119
- tokenizer,
120
- task=task,
121
- prompt_type=prompt_type,
122
- truncate_dim=truncate_dim,
123
- )
124
- mx.eval(embeddings)
125
-
126
- # Store results at original indices
127
- emb_list = embeddings.tolist()
128
- for j, orig_idx in enumerate(batch_indices):
129
- all_embeddings[orig_idx] = emb_list[j]
130
-
131
- return all_embeddings
132
-
133
-
134
- def main():
135
- ensure_model()
136
- t0 = time.time()
137
- model, tokenizer = load_model()
138
- load_ms = int((time.time() - t0) * 1000)
139
-
140
- print(json.dumps({
141
- "status": "ready",
142
- "model": "jina-code-1.5b-mlx",
143
- "device": str(mx.default_device()),
144
- "load_ms": load_ms,
145
- }), flush=True)
146
-
147
- for line in sys.stdin:
148
- line = line.strip()
149
- if not line:
150
- continue
151
-
152
- try:
153
- req = json.loads(line)
154
- except json.JSONDecodeError:
155
- print(json.dumps({"error": "Invalid JSON"}), flush=True)
156
- continue
157
-
158
- if "cmd" in req:
159
- if req["cmd"] == "ping":
160
- print(json.dumps({"status": "ready"}), flush=True)
161
- elif req["cmd"] == "quit":
162
- break
163
- continue
164
-
165
- texts = req.get("texts", [])
166
- task = req.get("task", "nl2code")
167
- prompt_type = req.get("type", "passage")
168
- dims = req.get("dims", 256)
169
-
170
- t0 = time.time()
171
- try:
172
- embeddings = embed_tiered(model, tokenizer, texts, task, prompt_type, dims)
173
- elapsed_ms = int((time.time() - t0) * 1000)
174
-
175
- print(json.dumps({
176
- "embeddings": embeddings,
177
- "count": len(embeddings),
178
- "dims": dims,
179
- "ms": elapsed_ms,
180
- }), flush=True)
181
- except Exception as e:
182
- print(json.dumps({"error": str(e)}), flush=True)
183
-
184
- if __name__ == "__main__":
185
- main()