@zuvia-software-solutions/code-mapper 2.6.3 → 2.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@
9
9
  import { pipeline } from '@huggingface/transformers';
10
10
  const MODEL_ID = 'Xenova/bge-small-en-v1.5';
11
11
  async function main() {
12
- const extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
12
+ const extractor = await pipeline('feature-extraction', MODEL_ID, { dtype: 'q8' });
13
13
  process.send({ type: 'ready' });
14
14
  // Process messages from parent
15
15
  process.on('message', async (msg) => {
@@ -30,7 +30,7 @@ interface NlDocument {
30
30
  source: string;
31
31
  text: string;
32
32
  }
33
- /** Build NL documents from a node */
33
+ /** Build NL documents from a node — keyword-dense, minimal tokens */
34
34
  export declare function extractNlTexts(node: NodeForNl): NlDocument[];
35
35
  /**
36
36
  * Build NL embeddings for all eligible nodes in the database.
@@ -27,7 +27,7 @@ export async function initNlEmbedder() {
27
27
  if (env.backends?.onnx?.wasm) {
28
28
  env.backends.onnx.wasm.numThreads = Math.max(1, cpuCount);
29
29
  }
30
- extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
30
+ extractor = await pipeline('feature-extraction', MODEL_ID, { dtype: 'q8' });
31
31
  })();
32
32
  return loadPromise;
33
33
  }
@@ -147,11 +147,19 @@ function extractParamNames(content) {
147
147
  .map(p => expandIdentifier(p))
148
148
  .join(', ');
149
149
  }
150
- /** Build NL documents from a node */
150
+ /** Strip noise tokens that waste tokenizer budget without adding semantic value */
151
+ function condense(text) {
152
+ return text
153
+ .replace(/---[^-]*---/g, '') // section headers from comments
154
+ .replace(/[{}[\]()'",;:]/g, '') // punctuation
155
+ .replace(/\. /g, ' ') // sentence separators
156
+ .replace(/\s{2,}/g, ' ') // collapse whitespace
157
+ .trim();
158
+ }
159
+ /** Build NL documents from a node — keyword-dense, minimal tokens */
151
160
  export function extractNlTexts(node) {
152
161
  const docs = [];
153
- const name = node.name;
154
- const expandedName = expandIdentifier(name);
162
+ const expandedName = expandIdentifier(node.name);
155
163
  const dir = node.filePath.split('/').slice(-3, -1).join('/');
156
164
  // 1. Comment-based NL text (primary)
157
165
  const comment = extractFullComment(node.content);
@@ -159,22 +167,21 @@ export function extractNlTexts(node) {
159
167
  docs.push({
160
168
  nodeId: node.id,
161
169
  source: 'comment',
162
- text: `${expandedName}: ${comment}. File: ${dir}`,
170
+ text: condense(`${expandedName} ${comment} ${dir}`),
163
171
  });
164
172
  }
165
- // 2. Name + params + return type (always available)
173
+ // 2. Name + params (always available)
166
174
  const params = extractParamNames(node.content);
167
- const parts = [expandedName];
168
- if (params)
169
- parts.push(`Parameters: ${params}`);
170
- if (dir)
171
- parts.push(`in ${dir}`);
172
175
  if (!comment) {
173
- // Only add name-based doc if no comment (avoid duplication)
176
+ const parts = [expandedName];
177
+ if (params)
178
+ parts.push(params);
179
+ if (dir)
180
+ parts.push(dir);
174
181
  docs.push({
175
182
  nodeId: node.id,
176
183
  source: 'name',
177
- text: parts.join('. '),
184
+ text: condense(parts.join(' ')),
178
185
  });
179
186
  }
180
187
  // 3. Enum/const values
@@ -184,7 +191,7 @@ export function extractNlTexts(node) {
184
191
  docs.push({
185
192
  nodeId: node.id,
186
193
  source: 'enum',
187
- text: `${expandedName}: ${values}`,
194
+ text: condense(`${expandedName} ${values}`),
188
195
  });
189
196
  }
190
197
  }
@@ -274,8 +281,9 @@ export async function buildNlEmbeddings(db, onProgress) {
274
281
  // Find worker script path
275
282
  const thisDir = pathMod.dirname(fileURLToPath(import.meta.url));
276
283
  const workerScript = pathMod.join(thisDir, 'nl-embed-worker.js');
277
- // Split work across workers
278
- const ITEMS_PER_BATCH = 50;
284
+ // Split work across workers — larger batches reduce IPC round-trips
285
+ // and let the ONNX runtime amortize overhead across more items
286
+ const ITEMS_PER_BATCH = 256;
279
287
  let nextIdx = 0;
280
288
  let embedded = 0;
281
289
  const getNextBatch = () => {
@@ -432,3 +440,4 @@ export async function buildNlEmbeddings(db, onProgress) {
432
440
  }
433
441
  return { embedded, skipped, durationMs: Date.now() - t0 };
434
442
  }
443
+ // touch
@@ -311,6 +311,99 @@ export async function refreshFiles(db, repoPath, dirtyFiles) {
311
311
  }
312
312
  }
313
313
  // FTS5 auto-updates via triggers — no manual rebuild needed
314
+ // Phase 5: Rebuild graph-level analyses (communities, processes, interface dispatch)
315
+ // These are cheap (<300ms) but critical — stale communities/processes mislead agents.
316
+ // Load full graph from SQLite, re-run analyses, write results back.
317
+ try {
318
+ const { createKnowledgeGraph } = await import('../graph/graph.js');
319
+ const { processCommunities } = await import('../ingestion/community-processor.js');
320
+ const { processProcesses } = await import('../ingestion/process-processor.js');
321
+ const { insertNodesBatch, insertEdgesBatch } = await import('../db/adapter.js');
322
+ const { toNodeId, toEdgeId } = await import('../db/schema.js');
323
+ const graph = createKnowledgeGraph();
324
+ // Load all non-community/process nodes and edges into in-memory graph
325
+ const allNodes = db.prepare('SELECT * FROM nodes WHERE label NOT IN (\'Community\', \'Process\')').all();
326
+ const allEdges = db.prepare('SELECT * FROM edges WHERE type NOT IN (\'MEMBER_OF\', \'STEP_IN_PROCESS\')').all();
327
+ for (const row of allNodes) {
328
+ graph.addNode({
329
+ id: toNodeId(row.id),
330
+ label: row.label,
331
+ properties: {
332
+ name: row.name ?? '', filePath: row.filePath ?? '',
333
+ startLine: row.startLine ?? undefined, endLine: row.endLine ?? undefined,
334
+ isExported: Boolean(row.isExported),
335
+ description: row.description ?? undefined,
336
+ parameterCount: row.parameterCount ?? undefined,
337
+ returnType: row.returnType ?? undefined,
338
+ },
339
+ });
340
+ }
341
+ for (const row of allEdges) {
342
+ graph.addRelationship({
343
+ id: toEdgeId(row.id),
344
+ sourceId: toNodeId(row.sourceId),
345
+ targetId: toNodeId(row.targetId),
346
+ type: row.type,
347
+ confidence: row.confidence ?? 1.0,
348
+ reason: row.reason ?? '',
349
+ });
350
+ }
351
+ // Delete old community/process data from SQLite
352
+ db.exec('BEGIN');
353
+ db.prepare("DELETE FROM edges WHERE type IN ('MEMBER_OF', 'STEP_IN_PROCESS')").run();
354
+ db.prepare("DELETE FROM nodes WHERE label IN ('Community', 'Process')").run();
355
+ // Re-run community detection + process detection
356
+ const communityResult = await processCommunities(graph, () => { });
357
+ const processResult = await processProcesses(graph, communityResult.memberships, () => { });
358
+ // Write new community/process nodes + edges back to SQLite
359
+ const newNodes = [];
360
+ const newEdges = [];
361
+ for (const node of graph.iterNodes()) {
362
+ if (node.label === 'Community' || node.label === 'Process') {
363
+ newNodes.push({
364
+ id: node.id,
365
+ label: node.label,
366
+ name: node.properties.name ?? '',
367
+ filePath: node.properties.filePath ?? '',
368
+ heuristicLabel: node.properties.heuristicLabel ?? null,
369
+ cohesion: node.properties.cohesion ?? null,
370
+ symbolCount: node.properties.symbolCount ?? null,
371
+ keywords: Array.isArray(node.properties.keywords) ? node.properties.keywords.join(', ') : node.properties.keywords ?? null,
372
+ processType: node.properties.processType ?? null,
373
+ stepCount: node.properties.stepCount ?? null,
374
+ communities: node.properties.communities ?? null,
375
+ entryPointId: node.properties.entryPointId ?? null,
376
+ terminalId: node.properties.terminalId ?? null,
377
+ });
378
+ }
379
+ }
380
+ for (const rel of graph.iterRelationships()) {
381
+ if (rel.type === 'MEMBER_OF' || rel.type === 'STEP_IN_PROCESS') {
382
+ newEdges.push({
383
+ id: rel.id,
384
+ sourceId: rel.sourceId,
385
+ targetId: rel.targetId,
386
+ type: rel.type,
387
+ confidence: rel.confidence,
388
+ reason: rel.reason,
389
+ step: rel.step,
390
+ });
391
+ }
392
+ }
393
+ if (newNodes.length > 0)
394
+ insertNodesBatch(db, newNodes);
395
+ if (newEdges.length > 0)
396
+ insertEdgesBatch(db, newEdges);
397
+ db.exec('COMMIT');
398
+ console.error(`Code Mapper: refresh Phase 5 — ${communityResult.communities.length} communities, ${processResult.stats.totalProcesses} processes rebuilt`);
399
+ }
400
+ catch (err) {
401
+ try {
402
+ db.exec('ROLLBACK');
403
+ }
404
+ catch { }
405
+ console.error(`Code Mapper: Phase 5 graph rebuild failed: ${err instanceof Error ? err.message : err}`);
406
+ }
314
407
  return {
315
408
  filesProcessed: filesToProcess.length, filesSkipped,
316
409
  nodesDeleted, nodesInserted, edgesInserted,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zuvia-software-solutions/code-mapper",
3
- "version": "2.6.3",
3
+ "version": "2.6.5",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",