@zuvia-software-solutions/code-mapper 2.6.3 → 2.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
import { pipeline } from '@huggingface/transformers';
|
|
10
10
|
const MODEL_ID = 'Xenova/bge-small-en-v1.5';
|
|
11
11
|
async function main() {
|
|
12
|
-
const extractor = await pipeline('feature-extraction', MODEL_ID, {
|
|
12
|
+
const extractor = await pipeline('feature-extraction', MODEL_ID, { dtype: 'q8' });
|
|
13
13
|
process.send({ type: 'ready' });
|
|
14
14
|
// Process messages from parent
|
|
15
15
|
process.on('message', async (msg) => {
|
|
@@ -30,7 +30,7 @@ interface NlDocument {
|
|
|
30
30
|
source: string;
|
|
31
31
|
text: string;
|
|
32
32
|
}
|
|
33
|
-
/** Build NL documents from a node */
|
|
33
|
+
/** Build NL documents from a node — keyword-dense, minimal tokens */
|
|
34
34
|
export declare function extractNlTexts(node: NodeForNl): NlDocument[];
|
|
35
35
|
/**
|
|
36
36
|
* Build NL embeddings for all eligible nodes in the database.
|
|
@@ -27,7 +27,7 @@ export async function initNlEmbedder() {
|
|
|
27
27
|
if (env.backends?.onnx?.wasm) {
|
|
28
28
|
env.backends.onnx.wasm.numThreads = Math.max(1, cpuCount);
|
|
29
29
|
}
|
|
30
|
-
extractor = await pipeline('feature-extraction', MODEL_ID, {
|
|
30
|
+
extractor = await pipeline('feature-extraction', MODEL_ID, { dtype: 'q8' });
|
|
31
31
|
})();
|
|
32
32
|
return loadPromise;
|
|
33
33
|
}
|
|
@@ -147,11 +147,19 @@ function extractParamNames(content) {
|
|
|
147
147
|
.map(p => expandIdentifier(p))
|
|
148
148
|
.join(', ');
|
|
149
149
|
}
|
|
150
|
-
/**
|
|
150
|
+
/** Strip noise tokens that waste tokenizer budget without adding semantic value */
|
|
151
|
+
function condense(text) {
|
|
152
|
+
return text
|
|
153
|
+
.replace(/---[^-]*---/g, '') // section headers from comments
|
|
154
|
+
.replace(/[{}[\]()'",;:]/g, '') // punctuation
|
|
155
|
+
.replace(/\. /g, ' ') // sentence separators
|
|
156
|
+
.replace(/\s{2,}/g, ' ') // collapse whitespace
|
|
157
|
+
.trim();
|
|
158
|
+
}
|
|
159
|
+
/** Build NL documents from a node — keyword-dense, minimal tokens */
|
|
151
160
|
export function extractNlTexts(node) {
|
|
152
161
|
const docs = [];
|
|
153
|
-
const
|
|
154
|
-
const expandedName = expandIdentifier(name);
|
|
162
|
+
const expandedName = expandIdentifier(node.name);
|
|
155
163
|
const dir = node.filePath.split('/').slice(-3, -1).join('/');
|
|
156
164
|
// 1. Comment-based NL text (primary)
|
|
157
165
|
const comment = extractFullComment(node.content);
|
|
@@ -159,22 +167,21 @@ export function extractNlTexts(node) {
|
|
|
159
167
|
docs.push({
|
|
160
168
|
nodeId: node.id,
|
|
161
169
|
source: 'comment',
|
|
162
|
-
text: `${expandedName}
|
|
170
|
+
text: condense(`${expandedName} ${comment} ${dir}`),
|
|
163
171
|
});
|
|
164
172
|
}
|
|
165
|
-
// 2. Name + params
|
|
173
|
+
// 2. Name + params (always available)
|
|
166
174
|
const params = extractParamNames(node.content);
|
|
167
|
-
const parts = [expandedName];
|
|
168
|
-
if (params)
|
|
169
|
-
parts.push(`Parameters: ${params}`);
|
|
170
|
-
if (dir)
|
|
171
|
-
parts.push(`in ${dir}`);
|
|
172
175
|
if (!comment) {
|
|
173
|
-
|
|
176
|
+
const parts = [expandedName];
|
|
177
|
+
if (params)
|
|
178
|
+
parts.push(params);
|
|
179
|
+
if (dir)
|
|
180
|
+
parts.push(dir);
|
|
174
181
|
docs.push({
|
|
175
182
|
nodeId: node.id,
|
|
176
183
|
source: 'name',
|
|
177
|
-
text: parts.join('
|
|
184
|
+
text: condense(parts.join(' ')),
|
|
178
185
|
});
|
|
179
186
|
}
|
|
180
187
|
// 3. Enum/const values
|
|
@@ -184,7 +191,7 @@ export function extractNlTexts(node) {
|
|
|
184
191
|
docs.push({
|
|
185
192
|
nodeId: node.id,
|
|
186
193
|
source: 'enum',
|
|
187
|
-
text: `${expandedName}
|
|
194
|
+
text: condense(`${expandedName} ${values}`),
|
|
188
195
|
});
|
|
189
196
|
}
|
|
190
197
|
}
|
|
@@ -274,8 +281,9 @@ export async function buildNlEmbeddings(db, onProgress) {
|
|
|
274
281
|
// Find worker script path
|
|
275
282
|
const thisDir = pathMod.dirname(fileURLToPath(import.meta.url));
|
|
276
283
|
const workerScript = pathMod.join(thisDir, 'nl-embed-worker.js');
|
|
277
|
-
// Split work across workers
|
|
278
|
-
|
|
284
|
+
// Split work across workers — larger batches reduce IPC round-trips
|
|
285
|
+
// and let the ONNX runtime amortize overhead across more items
|
|
286
|
+
const ITEMS_PER_BATCH = 256;
|
|
279
287
|
let nextIdx = 0;
|
|
280
288
|
let embedded = 0;
|
|
281
289
|
const getNextBatch = () => {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@zuvia-software-solutions/code-mapper",
|
|
3
|
-
"version": "2.6.
|
|
3
|
+
"version": "2.6.4",
|
|
4
4
|
"description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
|
|
5
5
|
"author": "Abhigyan Patwari",
|
|
6
6
|
"license": "PolyForm-Noncommercial-1.0.0",
|