@zuvia-software-solutions/code-mapper 2.6.3 → 2.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@
9
9
  import { pipeline } from '@huggingface/transformers';
10
10
  const MODEL_ID = 'Xenova/bge-small-en-v1.5';
11
11
  async function main() {
12
- const extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
12
+ const extractor = await pipeline('feature-extraction', MODEL_ID, { dtype: 'q8' });
13
13
  process.send({ type: 'ready' });
14
14
  // Process messages from parent
15
15
  process.on('message', async (msg) => {
@@ -30,7 +30,7 @@ interface NlDocument {
30
30
  source: string;
31
31
  text: string;
32
32
  }
33
- /** Build NL documents from a node */
33
+ /** Build NL documents from a node — keyword-dense, minimal tokens */
34
34
  export declare function extractNlTexts(node: NodeForNl): NlDocument[];
35
35
  /**
36
36
  * Build NL embeddings for all eligible nodes in the database.
@@ -27,7 +27,7 @@ export async function initNlEmbedder() {
27
27
  if (env.backends?.onnx?.wasm) {
28
28
  env.backends.onnx.wasm.numThreads = Math.max(1, cpuCount);
29
29
  }
30
- extractor = await pipeline('feature-extraction', MODEL_ID, { quantized: true });
30
+ extractor = await pipeline('feature-extraction', MODEL_ID, { dtype: 'q8' });
31
31
  })();
32
32
  return loadPromise;
33
33
  }
@@ -147,11 +147,19 @@ function extractParamNames(content) {
147
147
  .map(p => expandIdentifier(p))
148
148
  .join(', ');
149
149
  }
150
- /** Build NL documents from a node */
150
+ /** Strip noise tokens that waste tokenizer budget without adding semantic value */
151
+ function condense(text) {
152
+ return text
153
+ .replace(/---[^-]*---/g, '') // section headers from comments
154
+ .replace(/[{}[\]()'",;:]/g, '') // punctuation
155
+ .replace(/\. /g, ' ') // sentence separators
156
+ .replace(/\s{2,}/g, ' ') // collapse whitespace
157
+ .trim();
158
+ }
159
+ /** Build NL documents from a node — keyword-dense, minimal tokens */
151
160
  export function extractNlTexts(node) {
152
161
  const docs = [];
153
- const name = node.name;
154
- const expandedName = expandIdentifier(name);
162
+ const expandedName = expandIdentifier(node.name);
155
163
  const dir = node.filePath.split('/').slice(-3, -1).join('/');
156
164
  // 1. Comment-based NL text (primary)
157
165
  const comment = extractFullComment(node.content);
@@ -159,22 +167,21 @@ export function extractNlTexts(node) {
159
167
  docs.push({
160
168
  nodeId: node.id,
161
169
  source: 'comment',
162
- text: `${expandedName}: ${comment}. File: ${dir}`,
170
+ text: condense(`${expandedName} ${comment} ${dir}`),
163
171
  });
164
172
  }
165
- // 2. Name + params + return type (always available)
173
+ // 2. Name + params (always available)
166
174
  const params = extractParamNames(node.content);
167
- const parts = [expandedName];
168
- if (params)
169
- parts.push(`Parameters: ${params}`);
170
- if (dir)
171
- parts.push(`in ${dir}`);
172
175
  if (!comment) {
173
- // Only add name-based doc if no comment (avoid duplication)
176
+ const parts = [expandedName];
177
+ if (params)
178
+ parts.push(params);
179
+ if (dir)
180
+ parts.push(dir);
174
181
  docs.push({
175
182
  nodeId: node.id,
176
183
  source: 'name',
177
- text: parts.join('. '),
184
+ text: condense(parts.join(' ')),
178
185
  });
179
186
  }
180
187
  // 3. Enum/const values
@@ -184,7 +191,7 @@ export function extractNlTexts(node) {
184
191
  docs.push({
185
192
  nodeId: node.id,
186
193
  source: 'enum',
187
- text: `${expandedName}: ${values}`,
194
+ text: condense(`${expandedName} ${values}`),
188
195
  });
189
196
  }
190
197
  }
@@ -274,8 +281,9 @@ export async function buildNlEmbeddings(db, onProgress) {
274
281
  // Find worker script path
275
282
  const thisDir = pathMod.dirname(fileURLToPath(import.meta.url));
276
283
  const workerScript = pathMod.join(thisDir, 'nl-embed-worker.js');
277
- // Split work across workers
278
- const ITEMS_PER_BATCH = 50;
284
+ // Split work across workers — larger batches reduce IPC round-trips
285
+ // and let the ONNX runtime amortize overhead across more items
286
+ const ITEMS_PER_BATCH = 256;
279
287
  let nextIdx = 0;
280
288
  let embedded = 0;
281
289
  const getNextBatch = () => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zuvia-software-solutions/code-mapper",
3
- "version": "2.6.3",
3
+ "version": "2.6.4",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",