sigmap 6.4.0 → 6.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,19 +32,49 @@ const DEFAULT_WEIGHTS = {
32
32
  graphBoost: 0.4, // additive bonus for 1-hop import neighbors of matching files
33
33
  };
34
34
 
35
+ // Intent-specific weight adjustments
36
+ const INTENT_WEIGHTS = {
37
+ search: DEFAULT_WEIGHTS,
38
+ debug: { ...DEFAULT_WEIGHTS, exactToken: 1.2, pathMatch: 0.6 },
39
+ explain: { ...DEFAULT_WEIGHTS, symbolMatch: 0.8, pathMatch: 0.9 },
40
+ refactor: { ...DEFAULT_WEIGHTS, symbolMatch: 0.9, exactToken: 0.8 },
41
+ review: { ...DEFAULT_WEIGHTS, pathMatch: 1.0, exactToken: 0.9 },
42
+ test: { ...DEFAULT_WEIGHTS, exactToken: 0.7, symbolMatch: 0.4 },
43
+ integrate: { ...DEFAULT_WEIGHTS, graphBoost: 0.7, pathMatch: 1.1 },
44
+ navigate: { ...DEFAULT_WEIGHTS, pathMatch: 1.2, exactToken: 0.9 },
45
+ };
46
+
47
+ // Penalty multipliers for negative signals
48
+ const PENALTY_SIGNALS = {
49
+ testFile: 0.4, // test/spec/__tests__ in path
50
+ generatedCode: 0.3, // dist/build/.next in path
51
+ docsFile: 0.2, // docs/doc/README in path
52
+ nodeModules: 0.0, // node_modules (zero score)
53
+ };
54
+
55
+ function _computePenalty(filePath) {
56
+ const pathLower = filePath.toLowerCase();
57
+ if (pathLower.includes('node_modules')) return PENALTY_SIGNALS.nodeModules;
58
+ if (/(^|\/)(test|tests|spec|__tests__|e2e)($|\/)/.test(pathLower)) return PENALTY_SIGNALS.testFile;
59
+ if (/(^|\/)(dist|build|\.next|\.nuxt|out|\.venv|venv)($|\/)/.test(pathLower)) return PENALTY_SIGNALS.generatedCode;
60
+ if (/(^|\/)(docs|doc|readme|changelog)($|\/)/.test(pathLower)) return PENALTY_SIGNALS.docsFile;
61
+ return 1.0;
62
+ }
63
+
35
64
  /**
36
- * Score a single file against a query.
65
+ * Score a single file against a query, returning detailed signal breakdown.
37
66
  *
38
67
  * @param {string} filePath - relative file path (e.g. 'src/extractors/python.js')
39
68
  * @param {string[]} sigs - signature strings for this file
40
69
  * @param {string[]} queryTokens - pre-tokenized query
41
70
  * @param {object} weights
42
- * @returns {number}
71
+ * @returns {{ score: number, signals: { exactToken: number, symbolMatch: number, prefixMatch: number, pathMatch: number, penalty: number } }}
43
72
  */
44
73
  function scoreFile(filePath, sigs, queryTokens, weights) {
45
- if (!sigs || sigs.length === 0) return 0;
74
+ if (!sigs || sigs.length === 0) return { score: 0, signals: { exactToken: 0, symbolMatch: 0, prefixMatch: 0, pathMatch: 0, penalty: 1.0 } };
46
75
 
47
76
  const w = weights || DEFAULT_WEIGHTS;
77
+ const signals = { exactToken: 0, symbolMatch: 0, prefixMatch: 0, pathMatch: 0, penalty: _computePenalty(filePath) };
48
78
 
49
79
  // Build token set from all signatures
50
80
  const sigText = sigs.join(' ');
@@ -60,14 +90,19 @@ function scoreFile(filePath, sigs, queryTokens, weights) {
60
90
 
61
91
  // Exact token match in sigs
62
92
  if (sigTokenSet.has(qt)) {
63
- score += w.exactToken;
93
+ const bonus = w.exactToken;
94
+ score += bonus;
95
+ signals.exactToken += bonus;
64
96
 
65
97
  // Bonus: appears directly in a function/class/method name line
66
98
  const nameLineMatch = sigs.some((sig) => {
67
99
  const nt = tokenize(sig.replace(/[^a-zA-Z0-9_\s]/g, ' '));
68
100
  return nt.includes(qt);
69
101
  });
70
- if (nameLineMatch) score += w.symbolMatch;
102
+ if (nameLineMatch) {
103
+ score += w.symbolMatch;
104
+ signals.symbolMatch += w.symbolMatch;
105
+ }
71
106
  }
72
107
 
73
108
  // Prefix match (e.g. query "python" matches "pythonDeps")
@@ -75,6 +110,7 @@ function scoreFile(filePath, sigs, queryTokens, weights) {
75
110
  for (const st of sigTokenSet) {
76
111
  if (st !== qt && st.startsWith(qt)) {
77
112
  score += w.prefixMatch;
113
+ signals.prefixMatch += w.prefixMatch;
78
114
  break; // one bonus per query token
79
115
  }
80
116
  }
@@ -83,10 +119,14 @@ function scoreFile(filePath, sigs, queryTokens, weights) {
83
119
  // Path token match
84
120
  if (pathTokenSet.has(qt)) {
85
121
  score += w.pathMatch;
122
+ signals.pathMatch += w.pathMatch;
86
123
  }
87
124
  }
88
125
 
89
- return score;
126
+ // Apply penalty multiplier
127
+ score *= signals.penalty;
128
+
129
+ return { score, signals };
90
130
  }
91
131
 
92
132
  /**
@@ -101,7 +141,7 @@ function scoreFile(filePath, sigs, queryTokens, weights) {
101
141
  * @param {object} [opts.weights] - override scoring weights
102
142
  * @param {string} [opts.cwd] - project root for learned ranking weights
103
143
  * @param {{ forward: Map<string,string[]> }} [opts.graph] - dependency graph for neighbor boost
104
- * @returns {{ file: string, score: number, sigs: string[], tokens: number }[]}
144
+ * @returns {{ file: string, score: number, sigs: string[], tokens: number, intent: string, signals: object }[]}
105
145
  */
106
146
  function rank(query, sigIndex, opts) {
107
147
  if (!query || typeof query !== 'string') return [];
@@ -110,17 +150,21 @@ function rank(query, sigIndex, opts) {
110
150
  const topK = (opts && opts.topK) || 10;
111
151
  const recencyMultiplier = (opts && opts.recencyBoost) || DEFAULT_WEIGHTS.recencyBoost;
112
152
  const recencySet = (opts && opts.recencySet) || null;
113
- const weights = (opts && opts.weights) ? Object.assign({}, DEFAULT_WEIGHTS, opts.weights) : DEFAULT_WEIGHTS;
114
- const learnedWeights = opts && opts.cwd ? loadWeights(opts.cwd) : null;
115
153
  const graph = (opts && opts.graph && opts.graph.forward instanceof Map) ? opts.graph : null;
116
154
  const cwd = (opts && opts.cwd) || null;
117
155
 
156
+ // Detect query intent and get appropriate weights
157
+ const intent = detectIntent(query);
158
+ const intentWeights = INTENT_WEIGHTS[intent] || DEFAULT_WEIGHTS;
159
+ const weights = (opts && opts.weights) ? Object.assign({}, intentWeights, opts.weights) : intentWeights;
160
+ const learnedWeights = opts && opts.cwd ? loadWeights(opts.cwd) : null;
161
+
118
162
  const queryTokens = tokenize(query);
119
163
  if (queryTokens.length === 0) {
120
164
  // Empty query: return top-K by file count (most signatures = most useful)
121
165
  const all = [];
122
166
  for (const [file, sigs] of sigIndex.entries()) {
123
- all.push({ file, score: sigs.length, sigs, tokens: Math.ceil(sigs.join('\n').length / 4) });
167
+ all.push({ file, score: sigs.length, sigs, tokens: Math.ceil(sigs.join('\n').length / 4), intent, signals: {} });
124
168
  }
125
169
  all.sort((a, b) => b.score - a.score || a.file.localeCompare(b.file));
126
170
  return all.slice(0, topK);
@@ -128,15 +172,20 @@ function rank(query, sigIndex, opts) {
128
172
 
129
173
  const scored = [];
130
174
  for (const [file, sigs] of sigIndex.entries()) {
131
- let score = scoreFile(file, sigs, queryTokens, weights);
175
+ const result = scoreFile(file, sigs, queryTokens, weights);
176
+ let score = result.score;
177
+ const signals = result.signals;
132
178
 
133
179
  // Recency boost
134
180
  if (recencySet && recencySet.has(file) && score > 0) {
135
181
  score *= recencyMultiplier;
182
+ signals.recencyBoost = recencyMultiplier;
136
183
  }
137
184
 
138
185
  if (learnedWeights && score > 0) {
139
- score *= learnedWeights[file] || 1.0;
186
+ const multiplier = learnedWeights[file] || 1.0;
187
+ score *= multiplier;
188
+ signals.learnedWeights = multiplier;
140
189
  }
141
190
 
142
191
  scored.push({
@@ -144,6 +193,8 @@ function rank(query, sigIndex, opts) {
144
193
  score,
145
194
  sigs,
146
195
  tokens: Math.ceil(sigs.join('\n').length / 4),
196
+ intent,
197
+ signals,
147
198
  });
148
199
  }
149
200
 
@@ -166,6 +217,7 @@ function rank(query, sigIndex, opts) {
166
217
  const idx = relToIdx.get(neighborRel);
167
218
  if (idx !== undefined) {
168
219
  scored[idx].score += weights.graphBoost;
220
+ scored[idx].signals.graphBoost = (scored[idx].signals.graphBoost || 0) + weights.graphBoost;
169
221
  }
170
222
  }
171
223
  }
@@ -286,7 +338,7 @@ function buildSigIndex(cwd, opts) {
286
338
  /**
287
339
  * Format ranked results as a markdown table string.
288
340
  *
289
- * @param {{ file: string, score: number, sigs: string[], tokens: number }[]} results
341
+ * @param {{ file: string, score: number, sigs: string[], tokens: number, intent: string, signals: object }[]} results
290
342
  * @param {string} query
291
343
  * @returns {string}
292
344
  */
@@ -295,14 +347,17 @@ function formatRankTable(results, query) {
295
347
  return `No matching files found for query: "${query}"\n`;
296
348
  }
297
349
 
350
+ const intent = (results[0] && results[0].intent) || 'search';
298
351
  const lines = [
299
352
  `## Query: ${query}`,
353
+ `Intent: ${intent}`,
300
354
  '',
301
- '| Rank | File | Score | Sigs | Tokens |',
302
- '|------|------|-------|------|--------|',
303
- ...results.map((r, i) =>
304
- `| ${i + 1} | ${r.file} | ${r.score.toFixed(2)} | ${r.sigs.length} | ${r.tokens} |`
305
- ),
355
+ '| Rank | File | Score | Sigs | Penalty |',
356
+ '|------|------|-------|------|---------|',
357
+ ...results.map((r, i) => {
358
+ const penalty = r.signals && r.signals.penalty ? r.signals.penalty.toFixed(2) : '1.00';
359
+ return `| ${i + 1} | ${r.file} | ${r.score.toFixed(2)} | ${r.sigs.length} | ${penalty} |`;
360
+ }),
306
361
  '',
307
362
  ];
308
363
 
@@ -310,6 +365,10 @@ function formatRankTable(results, query) {
310
365
  for (const r of results.slice(0, 3)) {
311
366
  if (r.sigs.length > 0) {
312
367
  lines.push(`### ${r.file}`);
368
+ if (r.signals) {
369
+ const sig = r.signals;
370
+ lines.push(`Signals: exactToken=${(sig.exactToken || 0).toFixed(2)} symbolMatch=${(sig.symbolMatch || 0).toFixed(2)} prefixMatch=${(sig.prefixMatch || 0).toFixed(2)} pathMatch=${(sig.pathMatch || 0).toFixed(2)} penalty=${(sig.penalty || 1).toFixed(2)}`);
371
+ }
313
372
  lines.push('```');
314
373
  lines.push(...r.sigs.slice(0, 10));
315
374
  if (r.sigs.length > 10) lines.push(`... (${r.sigs.length - 10} more)`);
@@ -324,32 +383,38 @@ function formatRankTable(results, query) {
324
383
  /**
325
384
  * Format ranked results as a structured JSON-serialisable object.
326
385
  *
327
- * @param {{ file: string, score: number, sigs: string[], tokens: number }[]} results
386
+ * @param {{ file: string, score: number, sigs: string[], tokens: number, intent: string, signals: object }[]} results
328
387
  * @param {string} query
329
388
  * @returns {object}
330
389
  */
331
390
  function formatRankJSON(results, query) {
391
+ const intent = (results && results[0] && results[0].intent) || 'search';
332
392
  return {
333
393
  query,
394
+ intent,
334
395
  results: (results || []).map((r, i) => ({
335
396
  rank: i + 1,
336
397
  file: r.file,
337
398
  score: r.score,
338
399
  sigs: r.sigs,
339
400
  tokens: r.tokens,
401
+ signals: r.signals || {},
340
402
  })),
341
403
  totalResults: (results || []).length,
342
404
  };
343
405
  }
344
406
 
345
407
  // ---------------------------------------------------------------------------
346
- // Intent detection
408
+ // Intent detection — 7 intents
347
409
  // ---------------------------------------------------------------------------
348
410
  const INTENT_PATTERNS = {
349
411
  debug: /\b(bug|fix|error|crash|exception|broken|failing|issue|problem|regression)\b/i,
350
- explain: /\b(explain|how does|what is|understand|overview|architecture|describe|walk me)\b/i,
351
- refactor: /\b(refactor|restructure|redesign|clean up|extract|move|rename|simplify)\b/i,
352
- review: /\b(review|check|audit|security|pr|pull request|assess)\b/i,
412
+ explain: /\b(explain|how does|what is|understand|overview|architecture|describe|walk me|teach)\b/i,
413
+ refactor: /\b(refactor|restructure|redesign|clean up|extract|move|rename|simplify|optimize)\b/i,
414
+ review: /\b(review|check|audit|security|pr|pull request|assess|validate)\b/i,
415
+ test: /\b(test|unit test|integration test|testing|spec|assert|mock)\b/i,
416
+ integrate:/\b(import|integrate|connect|wire|bind|require|export|depend|graph)\b|require[ds]\b/i,
417
+ navigate: /\b(find|locate|where|search|look for|show me|navigate|browse|list)\b/i,
353
418
  };
354
419
 
355
420
  function detectIntent(query) {