@zuvia-software-solutions/code-mapper 2.2.0 → 2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/embeddings/embedding-pipeline.js +21 -10
- package/dist/core/embeddings/text-generator.js +6 -1
- package/dist/core/embeddings/types.d.ts +2 -0
- package/dist/core/embeddings/types.js +4 -0
- package/dist/core/incremental/refresh.js +3 -0
- package/dist/core/ingestion/workers/worker-pool.js +2 -2
- package/package.json +1 -1
|
@@ -115,18 +115,29 @@ export function fetchGraphContext(db, nodes) {
|
|
|
115
115
|
* @returns Enriched text
|
|
116
116
|
*/
|
|
117
117
|
export function enrichTextWithGraphContext(text, ctx) {
|
|
118
|
-
|
|
119
|
-
if (ctx.module)
|
|
120
|
-
parts.push(`Module: ${ctx.module}`);
|
|
121
|
-
if (ctx.callers.length > 0)
|
|
122
|
-
parts.push(`Called by: ${ctx.callers.join(', ')}`);
|
|
123
|
-
if (ctx.callees.length > 0)
|
|
124
|
-
parts.push(`Calls: ${ctx.callees.join(', ')}`);
|
|
125
|
-
if (parts.length === 0)
|
|
118
|
+
if (!ctx.module && ctx.callers.length === 0 && ctx.callees.length === 0)
|
|
126
119
|
return text;
|
|
127
120
|
const lines = text.split('\n');
|
|
128
|
-
|
|
129
|
-
|
|
121
|
+
// Append Module to the File: line (matches Python batch format)
|
|
122
|
+
if (ctx.module) {
|
|
123
|
+
const fileIdx = lines.findIndex(l => l.startsWith('File: '));
|
|
124
|
+
if (fileIdx >= 0) {
|
|
125
|
+
lines[fileIdx] += ` | Module: ${ctx.module}`;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
// Insert callers/callees after the File: line
|
|
129
|
+
const insertParts = [];
|
|
130
|
+
if (ctx.callers.length > 0)
|
|
131
|
+
insertParts.push(`Called by: ${ctx.callers.join(', ')}`);
|
|
132
|
+
if (ctx.callees.length > 0)
|
|
133
|
+
insertParts.push(`Calls: ${ctx.callees.join(', ')}`);
|
|
134
|
+
if (insertParts.length > 0) {
|
|
135
|
+
const fileIdx = lines.findIndex(l => l.startsWith('File: '));
|
|
136
|
+
const insertAt = fileIdx >= 0 ? fileIdx + 1 : lines.findIndex(l => l === '') || 2;
|
|
137
|
+
for (let i = insertParts.length - 1; i >= 0; i--) {
|
|
138
|
+
lines.splice(insertAt, 0, insertParts[i] ?? '');
|
|
139
|
+
}
|
|
140
|
+
}
|
|
130
141
|
return lines.join('\n');
|
|
131
142
|
}
|
|
132
143
|
/**
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
* The graph context enrichment (callers, callees, module) is applied
|
|
11
11
|
* separately by the embedding pipeline — this module handles the per-node text.
|
|
12
12
|
*/
|
|
13
|
+
import { expandIdentifier } from '../db/adapter.js';
|
|
13
14
|
/** Extract filename from a file path */
|
|
14
15
|
const getFileName = (filePath) => {
|
|
15
16
|
const parts = filePath.split('/');
|
|
@@ -143,8 +144,12 @@ function extractSignature(content, label) {
|
|
|
143
144
|
export const generateEmbeddingText = (node, _config = {}) => {
|
|
144
145
|
const label = node.label;
|
|
145
146
|
const parts = [];
|
|
146
|
-
// 1. Type + name
|
|
147
|
+
// 1. Type + name + expanded name for natural language bridge
|
|
147
148
|
parts.push(`${label}: ${node.name}`);
|
|
149
|
+
const expanded = expandIdentifier(node.name);
|
|
150
|
+
if (expanded && expanded !== node.name.toLowerCase()) {
|
|
151
|
+
parts.push(expanded);
|
|
152
|
+
}
|
|
148
153
|
// 2. First comment as natural language description
|
|
149
154
|
const comment = extractFirstComment(node.content);
|
|
150
155
|
if (comment)
|
|
@@ -3,6 +3,8 @@ export declare const EMBEDDABLE_LABELS: readonly ["Function", "Class", "Method",
|
|
|
3
3
|
export type EmbeddableLabel = typeof EMBEDDABLE_LABELS[number];
|
|
4
4
|
/** Check if a label is embeddable */
|
|
5
5
|
export declare const isEmbeddableLabel: (label: string) => label is EmbeddableLabel;
|
|
6
|
+
/** Check if a file path is a test/fixture file (skip embedding, BM25 covers it) */
|
|
7
|
+
export declare const isTestFile: (filePath: string) => boolean;
|
|
6
8
|
/** Embedding pipeline lifecycle phases */
|
|
7
9
|
export type EmbeddingPhase = 'idle' | 'loading-model' | 'embedding' | 'indexing' | 'ready' | 'error';
|
|
8
10
|
/** Progress state emitted during embedding pipeline execution */
|
|
@@ -11,6 +11,10 @@ export const EMBEDDABLE_LABELS = [
|
|
|
11
11
|
];
|
|
12
12
|
/** Check if a label is embeddable */
|
|
13
13
|
export const isEmbeddableLabel = (label) => EMBEDDABLE_LABELS.includes(label);
|
|
14
|
+
/** Test file patterns — these are searched via BM25, not semantic embeddings */
|
|
15
|
+
const TEST_PATH_PATTERNS = ['/test/', '/tests/', '/spec/', '/fixtures/', '/__tests__/', '/__mocks__/', '.test.', '.spec.', '_test.', '_spec.'];
|
|
16
|
+
/** Check if a file path is a test/fixture file (skip embedding, BM25 covers it) */
|
|
17
|
+
export const isTestFile = (filePath) => TEST_PATH_PATTERNS.some(p => filePath.includes(p));
|
|
14
18
|
// Jina Code 1.5B MLX — 1.54B params on Apple Silicon Metal
|
|
15
19
|
// Matryoshka truncation to 256 dims (trained at this dim, <1% quality loss vs 1536)
|
|
16
20
|
// Task-specific prefixes: nl2code queries, code passages
|
|
@@ -428,9 +428,12 @@ export async function refreshEmbeddings(db, dirtyFiles, hasEmbeddings) {
|
|
|
428
428
|
deleteEmbeddingsByFile(db, entry.relativePath);
|
|
429
429
|
}
|
|
430
430
|
// Step 2: Query new embeddable nodes for modified/created files
|
|
431
|
+
// Skip test/fixture files — BM25 handles them
|
|
432
|
+
const { isTestFile } = await import('../embeddings/types.js');
|
|
431
433
|
const embeddableSet = new Set(EMBEDDABLE_LABELS);
|
|
432
434
|
const modifiedPaths = dirtyFiles
|
|
433
435
|
.filter(f => f.changeKind === 'modified' || f.changeKind === 'created')
|
|
436
|
+
.filter(f => !isTestFile(f.relativePath))
|
|
434
437
|
.map(f => f.relativePath);
|
|
435
438
|
if (modifiedPaths.length === 0)
|
|
436
439
|
return;
|
|
@@ -6,8 +6,8 @@ import fs from 'node:fs';
|
|
|
6
6
|
import { fileURLToPath } from 'node:url';
|
|
7
7
|
// Max files per postMessage to keep structured-clone memory bounded
|
|
8
8
|
const SUB_BATCH_SIZE = 1500;
|
|
9
|
-
// Per sub-batch timeout
|
|
10
|
-
const SUB_BATCH_TIMEOUT_MS =
|
|
9
|
+
// Per sub-batch timeout — large codebases with big files need more time
|
|
10
|
+
const SUB_BATCH_TIMEOUT_MS = 120_000;
|
|
11
11
|
/** Create a pool of worker threads */
|
|
12
12
|
export const createWorkerPool = (workerUrl, poolSize) => {
|
|
13
13
|
// Validate worker script exists before spawning to prevent MODULE_NOT_FOUND crashes
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@zuvia-software-solutions/code-mapper",
|
|
3
|
-
"version": "2.2.
|
|
3
|
+
"version": "2.2.2",
|
|
4
4
|
"description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
|
|
5
5
|
"author": "Abhigyan Patwari",
|
|
6
6
|
"license": "PolyForm-Noncommercial-1.0.0",
|