@zuvia-software-solutions/code-mapper 2.6.3 → 2.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
import { pipeline } from '@huggingface/transformers';
|
|
10
10
|
const MODEL_ID = 'Xenova/bge-small-en-v1.5';
|
|
11
11
|
async function main() {
|
|
12
|
-
const extractor = await pipeline('feature-extraction', MODEL_ID, {
|
|
12
|
+
const extractor = await pipeline('feature-extraction', MODEL_ID, { dtype: 'q8' });
|
|
13
13
|
process.send({ type: 'ready' });
|
|
14
14
|
// Process messages from parent
|
|
15
15
|
process.on('message', async (msg) => {
|
|
@@ -30,7 +30,7 @@ interface NlDocument {
|
|
|
30
30
|
source: string;
|
|
31
31
|
text: string;
|
|
32
32
|
}
|
|
33
|
-
/** Build NL documents from a node */
|
|
33
|
+
/** Build NL documents from a node — keyword-dense, minimal tokens */
|
|
34
34
|
export declare function extractNlTexts(node: NodeForNl): NlDocument[];
|
|
35
35
|
/**
|
|
36
36
|
* Build NL embeddings for all eligible nodes in the database.
|
|
@@ -27,7 +27,7 @@ export async function initNlEmbedder() {
|
|
|
27
27
|
if (env.backends?.onnx?.wasm) {
|
|
28
28
|
env.backends.onnx.wasm.numThreads = Math.max(1, cpuCount);
|
|
29
29
|
}
|
|
30
|
-
extractor = await pipeline('feature-extraction', MODEL_ID, {
|
|
30
|
+
extractor = await pipeline('feature-extraction', MODEL_ID, { dtype: 'q8' });
|
|
31
31
|
})();
|
|
32
32
|
return loadPromise;
|
|
33
33
|
}
|
|
@@ -147,11 +147,19 @@ function extractParamNames(content) {
|
|
|
147
147
|
.map(p => expandIdentifier(p))
|
|
148
148
|
.join(', ');
|
|
149
149
|
}
|
|
150
|
-
/**
|
|
150
|
+
/** Strip noise tokens that waste tokenizer budget without adding semantic value */
|
|
151
|
+
function condense(text) {
|
|
152
|
+
return text
|
|
153
|
+
.replace(/---[^-]*---/g, '') // section headers from comments
|
|
154
|
+
.replace(/[{}[\]()'",;:]/g, '') // punctuation
|
|
155
|
+
.replace(/\. /g, ' ') // sentence separators
|
|
156
|
+
.replace(/\s{2,}/g, ' ') // collapse whitespace
|
|
157
|
+
.trim();
|
|
158
|
+
}
|
|
159
|
+
/** Build NL documents from a node — keyword-dense, minimal tokens */
|
|
151
160
|
export function extractNlTexts(node) {
|
|
152
161
|
const docs = [];
|
|
153
|
-
const
|
|
154
|
-
const expandedName = expandIdentifier(name);
|
|
162
|
+
const expandedName = expandIdentifier(node.name);
|
|
155
163
|
const dir = node.filePath.split('/').slice(-3, -1).join('/');
|
|
156
164
|
// 1. Comment-based NL text (primary)
|
|
157
165
|
const comment = extractFullComment(node.content);
|
|
@@ -159,22 +167,21 @@ export function extractNlTexts(node) {
|
|
|
159
167
|
docs.push({
|
|
160
168
|
nodeId: node.id,
|
|
161
169
|
source: 'comment',
|
|
162
|
-
text: `${expandedName}
|
|
170
|
+
text: condense(`${expandedName} ${comment} ${dir}`),
|
|
163
171
|
});
|
|
164
172
|
}
|
|
165
|
-
// 2. Name + params
|
|
173
|
+
// 2. Name + params (always available)
|
|
166
174
|
const params = extractParamNames(node.content);
|
|
167
|
-
const parts = [expandedName];
|
|
168
|
-
if (params)
|
|
169
|
-
parts.push(`Parameters: ${params}`);
|
|
170
|
-
if (dir)
|
|
171
|
-
parts.push(`in ${dir}`);
|
|
172
175
|
if (!comment) {
|
|
173
|
-
|
|
176
|
+
const parts = [expandedName];
|
|
177
|
+
if (params)
|
|
178
|
+
parts.push(params);
|
|
179
|
+
if (dir)
|
|
180
|
+
parts.push(dir);
|
|
174
181
|
docs.push({
|
|
175
182
|
nodeId: node.id,
|
|
176
183
|
source: 'name',
|
|
177
|
-
text: parts.join('
|
|
184
|
+
text: condense(parts.join(' ')),
|
|
178
185
|
});
|
|
179
186
|
}
|
|
180
187
|
// 3. Enum/const values
|
|
@@ -184,7 +191,7 @@ export function extractNlTexts(node) {
|
|
|
184
191
|
docs.push({
|
|
185
192
|
nodeId: node.id,
|
|
186
193
|
source: 'enum',
|
|
187
|
-
text: `${expandedName}
|
|
194
|
+
text: condense(`${expandedName} ${values}`),
|
|
188
195
|
});
|
|
189
196
|
}
|
|
190
197
|
}
|
|
@@ -274,8 +281,9 @@ export async function buildNlEmbeddings(db, onProgress) {
|
|
|
274
281
|
// Find worker script path
|
|
275
282
|
const thisDir = pathMod.dirname(fileURLToPath(import.meta.url));
|
|
276
283
|
const workerScript = pathMod.join(thisDir, 'nl-embed-worker.js');
|
|
277
|
-
// Split work across workers
|
|
278
|
-
|
|
284
|
+
// Split work across workers — larger batches reduce IPC round-trips
|
|
285
|
+
// and let the ONNX runtime amortize overhead across more items
|
|
286
|
+
const ITEMS_PER_BATCH = 256;
|
|
279
287
|
let nextIdx = 0;
|
|
280
288
|
let embedded = 0;
|
|
281
289
|
const getNextBatch = () => {
|
|
@@ -432,3 +440,4 @@ export async function buildNlEmbeddings(db, onProgress) {
|
|
|
432
440
|
}
|
|
433
441
|
return { embedded, skipped, durationMs: Date.now() - t0 };
|
|
434
442
|
}
|
|
443
|
+
// touch
|
|
@@ -311,6 +311,99 @@ export async function refreshFiles(db, repoPath, dirtyFiles) {
|
|
|
311
311
|
}
|
|
312
312
|
}
|
|
313
313
|
// FTS5 auto-updates via triggers — no manual rebuild needed
|
|
314
|
+
// Phase 5: Rebuild graph-level analyses (communities, processes, interface dispatch)
|
|
315
|
+
// These are cheap (<300ms) but critical — stale communities/processes mislead agents.
|
|
316
|
+
// Load full graph from SQLite, re-run analyses, write results back.
|
|
317
|
+
try {
|
|
318
|
+
const { createKnowledgeGraph } = await import('../graph/graph.js');
|
|
319
|
+
const { processCommunities } = await import('../ingestion/community-processor.js');
|
|
320
|
+
const { processProcesses } = await import('../ingestion/process-processor.js');
|
|
321
|
+
const { insertNodesBatch, insertEdgesBatch } = await import('../db/adapter.js');
|
|
322
|
+
const { toNodeId, toEdgeId } = await import('../db/schema.js');
|
|
323
|
+
const graph = createKnowledgeGraph();
|
|
324
|
+
// Load all non-community/process nodes and edges into in-memory graph
|
|
325
|
+
const allNodes = db.prepare('SELECT * FROM nodes WHERE label NOT IN (\'Community\', \'Process\')').all();
|
|
326
|
+
const allEdges = db.prepare('SELECT * FROM edges WHERE type NOT IN (\'MEMBER_OF\', \'STEP_IN_PROCESS\')').all();
|
|
327
|
+
for (const row of allNodes) {
|
|
328
|
+
graph.addNode({
|
|
329
|
+
id: toNodeId(row.id),
|
|
330
|
+
label: row.label,
|
|
331
|
+
properties: {
|
|
332
|
+
name: row.name ?? '', filePath: row.filePath ?? '',
|
|
333
|
+
startLine: row.startLine ?? undefined, endLine: row.endLine ?? undefined,
|
|
334
|
+
isExported: Boolean(row.isExported),
|
|
335
|
+
description: row.description ?? undefined,
|
|
336
|
+
parameterCount: row.parameterCount ?? undefined,
|
|
337
|
+
returnType: row.returnType ?? undefined,
|
|
338
|
+
},
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
for (const row of allEdges) {
|
|
342
|
+
graph.addRelationship({
|
|
343
|
+
id: toEdgeId(row.id),
|
|
344
|
+
sourceId: toNodeId(row.sourceId),
|
|
345
|
+
targetId: toNodeId(row.targetId),
|
|
346
|
+
type: row.type,
|
|
347
|
+
confidence: row.confidence ?? 1.0,
|
|
348
|
+
reason: row.reason ?? '',
|
|
349
|
+
});
|
|
350
|
+
}
|
|
351
|
+
// Delete old community/process data from SQLite
|
|
352
|
+
db.exec('BEGIN');
|
|
353
|
+
db.prepare("DELETE FROM edges WHERE type IN ('MEMBER_OF', 'STEP_IN_PROCESS')").run();
|
|
354
|
+
db.prepare("DELETE FROM nodes WHERE label IN ('Community', 'Process')").run();
|
|
355
|
+
// Re-run community detection + process detection
|
|
356
|
+
const communityResult = await processCommunities(graph, () => { });
|
|
357
|
+
const processResult = await processProcesses(graph, communityResult.memberships, () => { });
|
|
358
|
+
// Write new community/process nodes + edges back to SQLite
|
|
359
|
+
const newNodes = [];
|
|
360
|
+
const newEdges = [];
|
|
361
|
+
for (const node of graph.iterNodes()) {
|
|
362
|
+
if (node.label === 'Community' || node.label === 'Process') {
|
|
363
|
+
newNodes.push({
|
|
364
|
+
id: node.id,
|
|
365
|
+
label: node.label,
|
|
366
|
+
name: node.properties.name ?? '',
|
|
367
|
+
filePath: node.properties.filePath ?? '',
|
|
368
|
+
heuristicLabel: node.properties.heuristicLabel ?? null,
|
|
369
|
+
cohesion: node.properties.cohesion ?? null,
|
|
370
|
+
symbolCount: node.properties.symbolCount ?? null,
|
|
371
|
+
keywords: Array.isArray(node.properties.keywords) ? node.properties.keywords.join(', ') : node.properties.keywords ?? null,
|
|
372
|
+
processType: node.properties.processType ?? null,
|
|
373
|
+
stepCount: node.properties.stepCount ?? null,
|
|
374
|
+
communities: node.properties.communities ?? null,
|
|
375
|
+
entryPointId: node.properties.entryPointId ?? null,
|
|
376
|
+
terminalId: node.properties.terminalId ?? null,
|
|
377
|
+
});
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
for (const rel of graph.iterRelationships()) {
|
|
381
|
+
if (rel.type === 'MEMBER_OF' || rel.type === 'STEP_IN_PROCESS') {
|
|
382
|
+
newEdges.push({
|
|
383
|
+
id: rel.id,
|
|
384
|
+
sourceId: rel.sourceId,
|
|
385
|
+
targetId: rel.targetId,
|
|
386
|
+
type: rel.type,
|
|
387
|
+
confidence: rel.confidence,
|
|
388
|
+
reason: rel.reason,
|
|
389
|
+
step: rel.step,
|
|
390
|
+
});
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
if (newNodes.length > 0)
|
|
394
|
+
insertNodesBatch(db, newNodes);
|
|
395
|
+
if (newEdges.length > 0)
|
|
396
|
+
insertEdgesBatch(db, newEdges);
|
|
397
|
+
db.exec('COMMIT');
|
|
398
|
+
console.error(`Code Mapper: refresh Phase 5 — ${communityResult.communities.length} communities, ${processResult.stats.totalProcesses} processes rebuilt`);
|
|
399
|
+
}
|
|
400
|
+
catch (err) {
|
|
401
|
+
try {
|
|
402
|
+
db.exec('ROLLBACK');
|
|
403
|
+
}
|
|
404
|
+
catch { }
|
|
405
|
+
console.error(`Code Mapper: Phase 5 graph rebuild failed: ${err instanceof Error ? err.message : err}`);
|
|
406
|
+
}
|
|
314
407
|
return {
|
|
315
408
|
filesProcessed: filesToProcess.length, filesSkipped,
|
|
316
409
|
nodesDeleted, nodesInserted, edgesInserted,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@zuvia-software-solutions/code-mapper",
|
|
3
|
-
"version": "2.6.
|
|
3
|
+
"version": "2.6.5",
|
|
4
4
|
"description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
|
|
5
5
|
"author": "Abhigyan Patwari",
|
|
6
6
|
"license": "PolyForm-Noncommercial-1.0.0",
|