@zuvia-software-solutions/code-mapper 2.0.2 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/analyze.js +55 -8
- package/dist/core/embeddings/embedder.js +25 -15
- package/package.json +1 -1
package/dist/cli/analyze.js
CHANGED
|
@@ -189,7 +189,7 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
189
189
|
updateBar(60, 'Loading into database...');
|
|
190
190
|
// Reset the database (delete and recreate)
|
|
191
191
|
const t0Db = Date.now();
|
|
192
|
-
|
|
192
|
+
let db = resetDb(dbPath);
|
|
193
193
|
let dbMsgCount = 0;
|
|
194
194
|
const dbResult = loadGraphToDb(db, pipelineResult.graph, pipelineResult.repoPath, (msg) => {
|
|
195
195
|
dbMsgCount++;
|
|
@@ -229,14 +229,61 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
229
229
|
embeddingSkipped = false;
|
|
230
230
|
}
|
|
231
231
|
if (!embeddingSkipped) {
|
|
232
|
-
updateBar(90, '
|
|
232
|
+
updateBar(90, 'Generating embeddings...');
|
|
233
233
|
const t0Emb = Date.now();
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
234
|
+
// Close DB so Python can write to it
|
|
235
|
+
closeDb(dbPath);
|
|
236
|
+
// Run Python embedder in batch mode — reads from SQLite, embeds, writes back.
|
|
237
|
+
// Zero IPC overhead: ~3x faster than Node↔Python JSON streaming.
|
|
238
|
+
const { execFile } = await import('child_process');
|
|
239
|
+
const { fileURLToPath } = await import('url');
|
|
240
|
+
const mlxScript = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..', 'models', 'mlx-embedder.py');
|
|
241
|
+
await new Promise((resolve, reject) => {
|
|
242
|
+
const proc = execFile('python3', [mlxScript, 'batch', dbPath, '--dims', '256', '--max-tokens', '2048'], {
|
|
243
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
244
|
+
timeout: 600_000, // 10 min max for huge codebases
|
|
245
|
+
}, (err, _stdout, stderr) => {
|
|
246
|
+
if (err) {
|
|
247
|
+
console.error(stderr || '');
|
|
248
|
+
reject(new Error(`Embedding failed: ${err.message}`));
|
|
249
|
+
}
|
|
250
|
+
else {
|
|
251
|
+
resolve();
|
|
252
|
+
}
|
|
253
|
+
});
|
|
254
|
+
// Stream progress from Python's JSON lines on stdout
|
|
255
|
+
let lineBuf = '';
|
|
256
|
+
proc.stdout?.on('data', (chunk) => {
|
|
257
|
+
lineBuf += chunk.toString();
|
|
258
|
+
const lines = lineBuf.split('\n');
|
|
259
|
+
lineBuf = lines.pop() || '';
|
|
260
|
+
for (const line of lines) {
|
|
261
|
+
if (!line.trim())
|
|
262
|
+
continue;
|
|
263
|
+
try {
|
|
264
|
+
const msg = JSON.parse(line);
|
|
265
|
+
if (msg.phase === 'loaded') {
|
|
266
|
+
updateBar(91, `Model loaded (${msg.load_ms}ms)`);
|
|
267
|
+
}
|
|
268
|
+
else if (msg.phase === 'queried') {
|
|
269
|
+
updateBar(92, `Found ${msg.nodes} embeddable nodes`);
|
|
270
|
+
}
|
|
271
|
+
else if (msg.phase === 'prepared') {
|
|
272
|
+
updateBar(93, `${msg.to_embed} to embed, ${msg.skipped} cached`);
|
|
273
|
+
}
|
|
274
|
+
else if (msg.phase === 'embedded') {
|
|
275
|
+
updateBar(96, `Embedded ${msg.count} nodes (${(msg.ms / 1000).toFixed(1)}s)`);
|
|
276
|
+
}
|
|
277
|
+
else if (msg.phase === 'done') {
|
|
278
|
+
updateBar(98, `Embeddings complete (${msg.embedded} new, ${msg.skipped} cached)`);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
catch { }
|
|
282
|
+
}
|
|
283
|
+
});
|
|
284
|
+
});
|
|
285
|
+
// Reopen DB after Python is done
|
|
286
|
+
db = openDb(dbPath);
|
|
240
287
|
embeddingTime = ((Date.now() - t0Emb) / 1000).toFixed(1);
|
|
241
288
|
}
|
|
242
289
|
// Phase 5: Finalize (98-100%)
|
|
@@ -159,24 +159,34 @@ export const embedBatch = async (texts) => {
|
|
|
159
159
|
return [];
|
|
160
160
|
if (!ready)
|
|
161
161
|
await initEmbedder();
|
|
162
|
-
//
|
|
163
|
-
//
|
|
164
|
-
|
|
162
|
+
// Batch at Node level to keep stdin/stdout JSON messages manageable.
|
|
163
|
+
// Python does internal length-tiered batching within each chunk.
|
|
164
|
+
// 500 texts per chunk balances IPC overhead vs pipe buffer limits.
|
|
165
|
+
const CHUNK_SIZE = 500;
|
|
166
|
+
const allResults = [];
|
|
167
|
+
const totalChunks = Math.ceil(texts.length / CHUNK_SIZE);
|
|
165
168
|
const t0 = Date.now();
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
169
|
+
console.error(`Code Mapper: embedBatch ${texts.length} texts in ${totalChunks} chunk(s)...`);
|
|
170
|
+
for (let i = 0; i < texts.length; i += CHUNK_SIZE) {
|
|
171
|
+
const chunk = texts.slice(i, i + CHUNK_SIZE);
|
|
172
|
+
const result = await sendAndReceive({
|
|
173
|
+
texts: chunk,
|
|
174
|
+
task: 'nl2code',
|
|
175
|
+
type: 'passage',
|
|
176
|
+
dims: DEFAULT_EMBEDDING_CONFIG.dimensions,
|
|
177
|
+
});
|
|
178
|
+
if (result.error)
|
|
179
|
+
throw new Error(`Batch embedding failed: ${result.error}`);
|
|
180
|
+
if (!result.embeddings || !Array.isArray(result.embeddings)) {
|
|
181
|
+
throw new Error(`Batch embedding returned invalid response: ${JSON.stringify(result).slice(0, 200)}`);
|
|
182
|
+
}
|
|
183
|
+
for (const e of result.embeddings) {
|
|
184
|
+
allResults.push(new Float32Array(e));
|
|
185
|
+
}
|
|
176
186
|
}
|
|
177
187
|
const elapsed = Date.now() - t0;
|
|
178
|
-
console.error(`Code Mapper: embedBatch complete — ${
|
|
179
|
-
return
|
|
188
|
+
console.error(`Code Mapper: embedBatch complete — ${allResults.length} embeddings in ${elapsed}ms`);
|
|
189
|
+
return allResults;
|
|
180
190
|
};
|
|
181
191
|
/**
|
|
182
192
|
* Embed a query text for semantic search (cached, uses "query" prompt type)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@zuvia-software-solutions/code-mapper",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.1.1",
|
|
4
4
|
"description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
|
|
5
5
|
"author": "Abhigyan Patwari",
|
|
6
6
|
"license": "PolyForm-Noncommercial-1.0.0",
|