sigmap 6.5.0 → 6.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +113 -108
- package/CHANGELOG.md +26 -0
- package/README.md +6 -6
- package/gen-context.js +569 -3
- package/package.json +1 -1
- package/packages/cli/package.json +1 -1
- package/packages/core/package.json +1 -1
- package/src/config/defaults.js +3 -0
- package/src/mcp/server.js +1 -1
- package/src/retrieval/ranker.js +148 -29
package/src/config/defaults.js
CHANGED
|
@@ -125,6 +125,9 @@ const DEFAULTS = {
|
|
|
125
125
|
// Directories scanned for tests when testCoverage is enabled
|
|
126
126
|
testDirs: ['tests', 'test', '__tests__', 'spec'],
|
|
127
127
|
|
|
128
|
+
// Enable incremental signature cache (v6.7) - only re-extract changed files
|
|
129
|
+
sigCache: false,
|
|
130
|
+
|
|
128
131
|
// Add reverse dependency usage hints on file headings (opt-in)
|
|
129
132
|
impactRadius: false,
|
|
130
133
|
|
package/src/mcp/server.js
CHANGED
package/src/retrieval/ranker.js
CHANGED
|
@@ -32,19 +32,75 @@ const DEFAULT_WEIGHTS = {
|
|
|
32
32
|
graphBoost: 0.4, // additive bonus for 1-hop import neighbors of matching files
|
|
33
33
|
};
|
|
34
34
|
|
|
35
|
+
// Graph boost amounts for 2-hop traversal with decay (v6.7)
|
|
36
|
+
const GRAPH_BOOST_AMOUNTS = {
|
|
37
|
+
hop1: 0.40, // direct import neighbor of a file with score > 0
|
|
38
|
+
hop2: 0.15, // 2 hops away (transitive), with decay
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
// Intent-specific weight adjustments
|
|
42
|
+
const INTENT_WEIGHTS = {
|
|
43
|
+
search: DEFAULT_WEIGHTS,
|
|
44
|
+
debug: { ...DEFAULT_WEIGHTS, exactToken: 1.2, pathMatch: 0.6 },
|
|
45
|
+
explain: { ...DEFAULT_WEIGHTS, symbolMatch: 0.8, pathMatch: 0.9 },
|
|
46
|
+
refactor: { ...DEFAULT_WEIGHTS, symbolMatch: 0.9, exactToken: 0.8 },
|
|
47
|
+
review: { ...DEFAULT_WEIGHTS, pathMatch: 1.0, exactToken: 0.9 },
|
|
48
|
+
test: { ...DEFAULT_WEIGHTS, exactToken: 0.7, symbolMatch: 0.4 },
|
|
49
|
+
integrate: { ...DEFAULT_WEIGHTS, graphBoost: 0.7, pathMatch: 1.1 },
|
|
50
|
+
navigate: { ...DEFAULT_WEIGHTS, pathMatch: 1.2, exactToken: 0.9 },
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
// Penalty multipliers for negative signals
|
|
54
|
+
const PENALTY_SIGNALS = {
|
|
55
|
+
testFile: 0.4, // test/spec/__tests__ in path
|
|
56
|
+
generatedCode: 0.3, // dist/build/.next in path
|
|
57
|
+
docsFile: 0.2, // docs/doc/README in path
|
|
58
|
+
nodeModules: 0.0, // node_modules (zero score)
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
function _computePenalty(filePath) {
|
|
62
|
+
const pathLower = filePath.toLowerCase();
|
|
63
|
+
if (pathLower.includes('node_modules')) return PENALTY_SIGNALS.nodeModules;
|
|
64
|
+
if (/(^|\/)(test|tests|spec|__tests__|e2e)($|\/)/.test(pathLower)) return PENALTY_SIGNALS.testFile;
|
|
65
|
+
if (/(^|\/)(dist|build|\.next|\.nuxt|out|\.venv|venv)($|\/)/.test(pathLower)) return PENALTY_SIGNALS.generatedCode;
|
|
66
|
+
if (/(^|\/)(docs|doc|readme|changelog)($|\/)/.test(pathLower)) return PENALTY_SIGNALS.docsFile;
|
|
67
|
+
return 1.0;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Detect hub files: those with fanout > 20% of all files in the graph
|
|
71
|
+
function _computeHubs(graph) {
|
|
72
|
+
if (!graph || !graph.reverse) return new Set();
|
|
73
|
+
const fileCount = Math.max(1, graph.reverse.size);
|
|
74
|
+
const threshold = Math.ceil(fileCount * 0.2);
|
|
75
|
+
const hubs = new Set();
|
|
76
|
+
for (const [file, deps] of graph.reverse) {
|
|
77
|
+
if ((deps && deps.size >= threshold) || (Array.isArray(deps) && deps.length >= threshold)) {
|
|
78
|
+
hubs.add(file);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return hubs;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Common utility paths that should be treated as hubs regardless of fanout
|
|
85
|
+
function _isHub(filePath) {
|
|
86
|
+
return /\/(utils|helpers|shared|common|constants|types|interfaces|index)\.(ts|tsx|js|jsx)$/.test(filePath)
|
|
87
|
+
|| filePath.endsWith('/index.ts') || filePath.endsWith('/index.js');
|
|
88
|
+
}
|
|
89
|
+
|
|
35
90
|
/**
|
|
36
|
-
* Score a single file against a query.
|
|
91
|
+
* Score a single file against a query, returning detailed signal breakdown.
|
|
37
92
|
*
|
|
38
93
|
* @param {string} filePath - relative file path (e.g. 'src/extractors/python.js')
|
|
39
94
|
* @param {string[]} sigs - signature strings for this file
|
|
40
95
|
* @param {string[]} queryTokens - pre-tokenized query
|
|
41
96
|
* @param {object} weights
|
|
42
|
-
* @returns {number}
|
|
97
|
+
* @returns {{ score: number, signals: { exactToken: number, symbolMatch: number, prefixMatch: number, pathMatch: number, penalty: number } }}
|
|
43
98
|
*/
|
|
44
99
|
function scoreFile(filePath, sigs, queryTokens, weights) {
|
|
45
|
-
if (!sigs || sigs.length === 0) return 0;
|
|
100
|
+
if (!sigs || sigs.length === 0) return { score: 0, signals: { exactToken: 0, symbolMatch: 0, prefixMatch: 0, pathMatch: 0, penalty: 1.0 } };
|
|
46
101
|
|
|
47
102
|
const w = weights || DEFAULT_WEIGHTS;
|
|
103
|
+
const signals = { exactToken: 0, symbolMatch: 0, prefixMatch: 0, pathMatch: 0, penalty: _computePenalty(filePath) };
|
|
48
104
|
|
|
49
105
|
// Build token set from all signatures
|
|
50
106
|
const sigText = sigs.join(' ');
|
|
@@ -60,14 +116,19 @@ function scoreFile(filePath, sigs, queryTokens, weights) {
|
|
|
60
116
|
|
|
61
117
|
// Exact token match in sigs
|
|
62
118
|
if (sigTokenSet.has(qt)) {
|
|
63
|
-
|
|
119
|
+
const bonus = w.exactToken;
|
|
120
|
+
score += bonus;
|
|
121
|
+
signals.exactToken += bonus;
|
|
64
122
|
|
|
65
123
|
// Bonus: appears directly in a function/class/method name line
|
|
66
124
|
const nameLineMatch = sigs.some((sig) => {
|
|
67
125
|
const nt = tokenize(sig.replace(/[^a-zA-Z0-9_\s]/g, ' '));
|
|
68
126
|
return nt.includes(qt);
|
|
69
127
|
});
|
|
70
|
-
if (nameLineMatch)
|
|
128
|
+
if (nameLineMatch) {
|
|
129
|
+
score += w.symbolMatch;
|
|
130
|
+
signals.symbolMatch += w.symbolMatch;
|
|
131
|
+
}
|
|
71
132
|
}
|
|
72
133
|
|
|
73
134
|
// Prefix match (e.g. query "python" matches "pythonDeps")
|
|
@@ -75,6 +136,7 @@ function scoreFile(filePath, sigs, queryTokens, weights) {
|
|
|
75
136
|
for (const st of sigTokenSet) {
|
|
76
137
|
if (st !== qt && st.startsWith(qt)) {
|
|
77
138
|
score += w.prefixMatch;
|
|
139
|
+
signals.prefixMatch += w.prefixMatch;
|
|
78
140
|
break; // one bonus per query token
|
|
79
141
|
}
|
|
80
142
|
}
|
|
@@ -83,10 +145,14 @@ function scoreFile(filePath, sigs, queryTokens, weights) {
|
|
|
83
145
|
// Path token match
|
|
84
146
|
if (pathTokenSet.has(qt)) {
|
|
85
147
|
score += w.pathMatch;
|
|
148
|
+
signals.pathMatch += w.pathMatch;
|
|
86
149
|
}
|
|
87
150
|
}
|
|
88
151
|
|
|
89
|
-
|
|
152
|
+
// Apply penalty multiplier
|
|
153
|
+
score *= signals.penalty;
|
|
154
|
+
|
|
155
|
+
return { score, signals };
|
|
90
156
|
}
|
|
91
157
|
|
|
92
158
|
/**
|
|
@@ -101,7 +167,7 @@ function scoreFile(filePath, sigs, queryTokens, weights) {
|
|
|
101
167
|
* @param {object} [opts.weights] - override scoring weights
|
|
102
168
|
* @param {string} [opts.cwd] - project root for learned ranking weights
|
|
103
169
|
* @param {{ forward: Map<string,string[]> }} [opts.graph] - dependency graph for neighbor boost
|
|
104
|
-
* @returns {{ file: string, score: number, sigs: string[], tokens: number }[]}
|
|
170
|
+
* @returns {{ file: string, score: number, sigs: string[], tokens: number, intent: string, signals: object }[]}
|
|
105
171
|
*/
|
|
106
172
|
function rank(query, sigIndex, opts) {
|
|
107
173
|
if (!query || typeof query !== 'string') return [];
|
|
@@ -110,17 +176,21 @@ function rank(query, sigIndex, opts) {
|
|
|
110
176
|
const topK = (opts && opts.topK) || 10;
|
|
111
177
|
const recencyMultiplier = (opts && opts.recencyBoost) || DEFAULT_WEIGHTS.recencyBoost;
|
|
112
178
|
const recencySet = (opts && opts.recencySet) || null;
|
|
113
|
-
const weights = (opts && opts.weights) ? Object.assign({}, DEFAULT_WEIGHTS, opts.weights) : DEFAULT_WEIGHTS;
|
|
114
|
-
const learnedWeights = opts && opts.cwd ? loadWeights(opts.cwd) : null;
|
|
115
179
|
const graph = (opts && opts.graph && opts.graph.forward instanceof Map) ? opts.graph : null;
|
|
116
180
|
const cwd = (opts && opts.cwd) || null;
|
|
117
181
|
|
|
182
|
+
// Detect query intent and get appropriate weights
|
|
183
|
+
const intent = detectIntent(query);
|
|
184
|
+
const intentWeights = INTENT_WEIGHTS[intent] || DEFAULT_WEIGHTS;
|
|
185
|
+
const weights = (opts && opts.weights) ? Object.assign({}, intentWeights, opts.weights) : intentWeights;
|
|
186
|
+
const learnedWeights = opts && opts.cwd ? loadWeights(opts.cwd) : null;
|
|
187
|
+
|
|
118
188
|
const queryTokens = tokenize(query);
|
|
119
189
|
if (queryTokens.length === 0) {
|
|
120
190
|
// Empty query: return top-K by file count (most signatures = most useful)
|
|
121
191
|
const all = [];
|
|
122
192
|
for (const [file, sigs] of sigIndex.entries()) {
|
|
123
|
-
all.push({ file, score: sigs.length, sigs, tokens: Math.ceil(sigs.join('\n').length / 4) });
|
|
193
|
+
all.push({ file, score: sigs.length, sigs, tokens: Math.ceil(sigs.join('\n').length / 4), intent, signals: {} });
|
|
124
194
|
}
|
|
125
195
|
all.sort((a, b) => b.score - a.score || a.file.localeCompare(b.file));
|
|
126
196
|
return all.slice(0, topK);
|
|
@@ -128,15 +198,20 @@ function rank(query, sigIndex, opts) {
|
|
|
128
198
|
|
|
129
199
|
const scored = [];
|
|
130
200
|
for (const [file, sigs] of sigIndex.entries()) {
|
|
131
|
-
|
|
201
|
+
const result = scoreFile(file, sigs, queryTokens, weights);
|
|
202
|
+
let score = result.score;
|
|
203
|
+
const signals = result.signals;
|
|
132
204
|
|
|
133
205
|
// Recency boost
|
|
134
206
|
if (recencySet && recencySet.has(file) && score > 0) {
|
|
135
207
|
score *= recencyMultiplier;
|
|
208
|
+
signals.recencyBoost = recencyMultiplier;
|
|
136
209
|
}
|
|
137
210
|
|
|
138
211
|
if (learnedWeights && score > 0) {
|
|
139
|
-
|
|
212
|
+
const multiplier = learnedWeights[file] || 1.0;
|
|
213
|
+
score *= multiplier;
|
|
214
|
+
signals.learnedWeights = multiplier;
|
|
140
215
|
}
|
|
141
216
|
|
|
142
217
|
scored.push({
|
|
@@ -144,28 +219,59 @@ function rank(query, sigIndex, opts) {
|
|
|
144
219
|
score,
|
|
145
220
|
sigs,
|
|
146
221
|
tokens: Math.ceil(sigs.join('\n').length / 4),
|
|
222
|
+
intent,
|
|
223
|
+
signals,
|
|
147
224
|
});
|
|
148
225
|
}
|
|
149
226
|
|
|
150
|
-
// Graph neighbor boost:
|
|
151
|
-
//
|
|
227
|
+
// Graph neighbor boost: 2-hop traversal with decay (v6.7)
|
|
228
|
+
// Hop 1: add hop1 amount to direct import neighbors (score > 0)
|
|
229
|
+
// Hop 2: add hop2 amount to neighbors of hop1 files (with decay)
|
|
230
|
+
// Hub suppression: files with high fanout (>20%) are not boosted
|
|
152
231
|
if (graph && cwd) {
|
|
153
232
|
const path = require('path');
|
|
154
|
-
// Build
|
|
233
|
+
// Build maps for relative ↔ absolute path conversion and index lookup
|
|
155
234
|
const relToIdx = new Map();
|
|
235
|
+
const absToRel = new Map();
|
|
156
236
|
for (let i = 0; i < scored.length; i++) {
|
|
157
237
|
relToIdx.set(scored[i].file, i);
|
|
238
|
+
const abs = path.resolve(cwd, scored[i].file);
|
|
239
|
+
absToRel.set(abs, scored[i].file);
|
|
158
240
|
}
|
|
241
|
+
|
|
242
|
+
const hubs = _computeHubs(graph);
|
|
243
|
+
const hop1Files = new Set(); // track which files received hop1 boost
|
|
244
|
+
|
|
245
|
+
// Hop 1: direct neighbors of scored files
|
|
159
246
|
for (const entry of scored) {
|
|
160
247
|
if (entry.score <= 0) continue;
|
|
161
|
-
// Resolve relative path to absolute for graph lookup
|
|
162
248
|
const abs = path.resolve(cwd, entry.file);
|
|
163
249
|
const neighbors = graph.forward.get(abs) || [];
|
|
164
250
|
for (const neighborAbs of neighbors) {
|
|
251
|
+
if (_isHub(neighborAbs) || hubs.has(neighborAbs)) continue;
|
|
165
252
|
const neighborRel = path.relative(cwd, neighborAbs).replace(/\\/g, '/');
|
|
166
253
|
const idx = relToIdx.get(neighborRel);
|
|
167
254
|
if (idx !== undefined) {
|
|
168
|
-
scored[idx].score +=
|
|
255
|
+
scored[idx].score += GRAPH_BOOST_AMOUNTS.hop1;
|
|
256
|
+
scored[idx].signals.graphBoost = (scored[idx].signals.graphBoost || 0) + GRAPH_BOOST_AMOUNTS.hop1;
|
|
257
|
+
hop1Files.add(neighborAbs);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Hop 2: neighbors of hop1 files (only if they didn't get a direct score)
|
|
263
|
+
for (const hop1File of hop1Files) {
|
|
264
|
+
if (!absToRel.has(hop1File)) continue; // skip files not in index
|
|
265
|
+
const neighbors = graph.forward.get(hop1File) || [];
|
|
266
|
+
for (const neighborAbs of neighbors) {
|
|
267
|
+
if (_isHub(neighborAbs) || hubs.has(neighborAbs)) continue;
|
|
268
|
+
if (hop1Files.has(neighborAbs)) continue; // skip already hop1-boosted
|
|
269
|
+
const neighborRel = path.relative(cwd, neighborAbs).replace(/\\/g, '/');
|
|
270
|
+
const idx = relToIdx.get(neighborRel);
|
|
271
|
+
if (idx !== undefined && scored[idx].score > 0) {
|
|
272
|
+
// Only boost files that have some baseline score (not noise)
|
|
273
|
+
scored[idx].score += GRAPH_BOOST_AMOUNTS.hop2;
|
|
274
|
+
scored[idx].signals.graphBoost = (scored[idx].signals.graphBoost || 0) + GRAPH_BOOST_AMOUNTS.hop2;
|
|
169
275
|
}
|
|
170
276
|
}
|
|
171
277
|
}
|
|
@@ -286,7 +392,7 @@ function buildSigIndex(cwd, opts) {
|
|
|
286
392
|
/**
|
|
287
393
|
* Format ranked results as a markdown table string.
|
|
288
394
|
*
|
|
289
|
-
* @param {{ file: string, score: number, sigs: string[], tokens: number }[]} results
|
|
395
|
+
* @param {{ file: string, score: number, sigs: string[], tokens: number, intent: string, signals: object }[]} results
|
|
290
396
|
* @param {string} query
|
|
291
397
|
* @returns {string}
|
|
292
398
|
*/
|
|
@@ -295,14 +401,17 @@ function formatRankTable(results, query) {
|
|
|
295
401
|
return `No matching files found for query: "${query}"\n`;
|
|
296
402
|
}
|
|
297
403
|
|
|
404
|
+
const intent = (results[0] && results[0].intent) || 'search';
|
|
298
405
|
const lines = [
|
|
299
406
|
`## Query: ${query}`,
|
|
407
|
+
`Intent: ${intent}`,
|
|
300
408
|
'',
|
|
301
|
-
'| Rank | File | Score | Sigs |
|
|
302
|
-
'
|
|
303
|
-
...results.map((r, i) =>
|
|
304
|
-
|
|
305
|
-
|
|
409
|
+
'| Rank | File | Score | Sigs | Penalty |',
|
|
410
|
+
'|------|------|-------|------|---------|',
|
|
411
|
+
...results.map((r, i) => {
|
|
412
|
+
const penalty = r.signals && r.signals.penalty ? r.signals.penalty.toFixed(2) : '1.00';
|
|
413
|
+
return `| ${i + 1} | ${r.file} | ${r.score.toFixed(2)} | ${r.sigs.length} | ${penalty} |`;
|
|
414
|
+
}),
|
|
306
415
|
'',
|
|
307
416
|
];
|
|
308
417
|
|
|
@@ -310,6 +419,10 @@ function formatRankTable(results, query) {
|
|
|
310
419
|
for (const r of results.slice(0, 3)) {
|
|
311
420
|
if (r.sigs.length > 0) {
|
|
312
421
|
lines.push(`### ${r.file}`);
|
|
422
|
+
if (r.signals) {
|
|
423
|
+
const sig = r.signals;
|
|
424
|
+
lines.push(`Signals: exactToken=${(sig.exactToken || 0).toFixed(2)} symbolMatch=${(sig.symbolMatch || 0).toFixed(2)} prefixMatch=${(sig.prefixMatch || 0).toFixed(2)} pathMatch=${(sig.pathMatch || 0).toFixed(2)} penalty=${(sig.penalty || 1).toFixed(2)}`);
|
|
425
|
+
}
|
|
313
426
|
lines.push('```');
|
|
314
427
|
lines.push(...r.sigs.slice(0, 10));
|
|
315
428
|
if (r.sigs.length > 10) lines.push(`... (${r.sigs.length - 10} more)`);
|
|
@@ -324,32 +437,38 @@ function formatRankTable(results, query) {
|
|
|
324
437
|
/**
|
|
325
438
|
* Format ranked results as a structured JSON-serialisable object.
|
|
326
439
|
*
|
|
327
|
-
* @param {{ file: string, score: number, sigs: string[], tokens: number }[]} results
|
|
440
|
+
* @param {{ file: string, score: number, sigs: string[], tokens: number, intent: string, signals: object }[]} results
|
|
328
441
|
* @param {string} query
|
|
329
442
|
* @returns {object}
|
|
330
443
|
*/
|
|
331
444
|
function formatRankJSON(results, query) {
|
|
445
|
+
const intent = (results && results[0] && results[0].intent) || 'search';
|
|
332
446
|
return {
|
|
333
447
|
query,
|
|
448
|
+
intent,
|
|
334
449
|
results: (results || []).map((r, i) => ({
|
|
335
450
|
rank: i + 1,
|
|
336
451
|
file: r.file,
|
|
337
452
|
score: r.score,
|
|
338
453
|
sigs: r.sigs,
|
|
339
454
|
tokens: r.tokens,
|
|
455
|
+
signals: r.signals || {},
|
|
340
456
|
})),
|
|
341
457
|
totalResults: (results || []).length,
|
|
342
458
|
};
|
|
343
459
|
}
|
|
344
460
|
|
|
345
461
|
// ---------------------------------------------------------------------------
|
|
346
|
-
// Intent detection
|
|
462
|
+
// Intent detection — 7 intents
|
|
347
463
|
// ---------------------------------------------------------------------------
|
|
348
464
|
const INTENT_PATTERNS = {
|
|
349
465
|
debug: /\b(bug|fix|error|crash|exception|broken|failing|issue|problem|regression)\b/i,
|
|
350
|
-
explain: /\b(explain|how does|what is|understand|overview|architecture|describe|walk me)\b/i,
|
|
351
|
-
refactor: /\b(refactor|restructure|redesign|clean up|extract|move|rename|simplify)\b/i,
|
|
352
|
-
review: /\b(review|check|audit|security|pr|pull request|assess)\b/i,
|
|
466
|
+
explain: /\b(explain|how does|what is|understand|overview|architecture|describe|walk me|teach)\b/i,
|
|
467
|
+
refactor: /\b(refactor|restructure|redesign|clean up|extract|move|rename|simplify|optimize)\b/i,
|
|
468
|
+
review: /\b(review|check|audit|security|pr|pull request|assess|validate)\b/i,
|
|
469
|
+
test: /\b(test|unit test|integration test|testing|spec|assert|mock)\b/i,
|
|
470
|
+
integrate:/\b(import|integrate|connect|wire|bind|require|export|depend|graph)\b|require[ds]\b/i,
|
|
471
|
+
navigate: /\b(find|locate|where|search|look for|show me|navigate|browse|list)\b/i,
|
|
353
472
|
};
|
|
354
473
|
|
|
355
474
|
function detectIntent(query) {
|
|
@@ -360,4 +479,4 @@ function detectIntent(query) {
|
|
|
360
479
|
return 'search';
|
|
361
480
|
}
|
|
362
481
|
|
|
363
|
-
module.exports = { rank, buildSigIndex, scoreFile, formatRankTable, formatRankJSON, DEFAULT_WEIGHTS, detectIntent };
|
|
482
|
+
module.exports = { rank, buildSigIndex, scoreFile, formatRankTable, formatRankJSON, DEFAULT_WEIGHTS, GRAPH_BOOST_AMOUNTS, detectIntent };
|