sigmap 6.4.0 → 6.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +102 -111
- package/CHANGELOG.md +32 -0
- package/README.md +9 -8
- package/gen-context.js +59 -2
- package/package.json +1 -1
- package/packages/cli/package.json +1 -1
- package/packages/core/package.json +1 -1
- package/src/config/loader.js +26 -2
- package/src/discovery/framework-detector.js +88 -0
- package/src/discovery/language-detector.js +74 -0
- package/src/discovery/sigmapignore.js +29 -0
- package/src/discovery/source-root-registry.js +166 -0
- package/src/discovery/source-root-resolver.js +181 -0
- package/src/discovery/source-root-scorer.js +98 -0
- package/src/mcp/server.js +1 -1
- package/src/retrieval/ranker.js +88 -23
package/src/retrieval/ranker.js
CHANGED
|
@@ -32,19 +32,49 @@ const DEFAULT_WEIGHTS = {
|
|
|
32
32
|
graphBoost: 0.4, // additive bonus for 1-hop import neighbors of matching files
|
|
33
33
|
};
|
|
34
34
|
|
|
35
|
+
// Intent-specific weight adjustments
|
|
36
|
+
const INTENT_WEIGHTS = {
|
|
37
|
+
search: DEFAULT_WEIGHTS,
|
|
38
|
+
debug: { ...DEFAULT_WEIGHTS, exactToken: 1.2, pathMatch: 0.6 },
|
|
39
|
+
explain: { ...DEFAULT_WEIGHTS, symbolMatch: 0.8, pathMatch: 0.9 },
|
|
40
|
+
refactor: { ...DEFAULT_WEIGHTS, symbolMatch: 0.9, exactToken: 0.8 },
|
|
41
|
+
review: { ...DEFAULT_WEIGHTS, pathMatch: 1.0, exactToken: 0.9 },
|
|
42
|
+
test: { ...DEFAULT_WEIGHTS, exactToken: 0.7, symbolMatch: 0.4 },
|
|
43
|
+
integrate: { ...DEFAULT_WEIGHTS, graphBoost: 0.7, pathMatch: 1.1 },
|
|
44
|
+
navigate: { ...DEFAULT_WEIGHTS, pathMatch: 1.2, exactToken: 0.9 },
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
// Penalty multipliers for negative signals
|
|
48
|
+
const PENALTY_SIGNALS = {
|
|
49
|
+
testFile: 0.4, // test/spec/__tests__ in path
|
|
50
|
+
generatedCode: 0.3, // dist/build/.next in path
|
|
51
|
+
docsFile: 0.2, // docs/doc/README in path
|
|
52
|
+
nodeModules: 0.0, // node_modules (zero score)
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
function _computePenalty(filePath) {
|
|
56
|
+
const pathLower = filePath.toLowerCase();
|
|
57
|
+
if (pathLower.includes('node_modules')) return PENALTY_SIGNALS.nodeModules;
|
|
58
|
+
if (/(^|\/)(test|tests|spec|__tests__|e2e)($|\/)/.test(pathLower)) return PENALTY_SIGNALS.testFile;
|
|
59
|
+
if (/(^|\/)(dist|build|\.next|\.nuxt|out|\.venv|venv)($|\/)/.test(pathLower)) return PENALTY_SIGNALS.generatedCode;
|
|
60
|
+
if (/(^|\/)(docs|doc|readme|changelog)($|\/)/.test(pathLower)) return PENALTY_SIGNALS.docsFile;
|
|
61
|
+
return 1.0;
|
|
62
|
+
}
|
|
63
|
+
|
|
35
64
|
/**
|
|
36
|
-
* Score a single file against a query.
|
|
65
|
+
* Score a single file against a query, returning detailed signal breakdown.
|
|
37
66
|
*
|
|
38
67
|
* @param {string} filePath - relative file path (e.g. 'src/extractors/python.js')
|
|
39
68
|
* @param {string[]} sigs - signature strings for this file
|
|
40
69
|
* @param {string[]} queryTokens - pre-tokenized query
|
|
41
70
|
* @param {object} weights
|
|
42
|
-
* @returns {number}
|
|
71
|
+
* @returns {{ score: number, signals: { exactToken: number, symbolMatch: number, prefixMatch: number, pathMatch: number, penalty: number } }}
|
|
43
72
|
*/
|
|
44
73
|
function scoreFile(filePath, sigs, queryTokens, weights) {
|
|
45
|
-
if (!sigs || sigs.length === 0) return 0;
|
|
74
|
+
if (!sigs || sigs.length === 0) return { score: 0, signals: { exactToken: 0, symbolMatch: 0, prefixMatch: 0, pathMatch: 0, penalty: 1.0 } };
|
|
46
75
|
|
|
47
76
|
const w = weights || DEFAULT_WEIGHTS;
|
|
77
|
+
const signals = { exactToken: 0, symbolMatch: 0, prefixMatch: 0, pathMatch: 0, penalty: _computePenalty(filePath) };
|
|
48
78
|
|
|
49
79
|
// Build token set from all signatures
|
|
50
80
|
const sigText = sigs.join(' ');
|
|
@@ -60,14 +90,19 @@ function scoreFile(filePath, sigs, queryTokens, weights) {
|
|
|
60
90
|
|
|
61
91
|
// Exact token match in sigs
|
|
62
92
|
if (sigTokenSet.has(qt)) {
|
|
63
|
-
|
|
93
|
+
const bonus = w.exactToken;
|
|
94
|
+
score += bonus;
|
|
95
|
+
signals.exactToken += bonus;
|
|
64
96
|
|
|
65
97
|
// Bonus: appears directly in a function/class/method name line
|
|
66
98
|
const nameLineMatch = sigs.some((sig) => {
|
|
67
99
|
const nt = tokenize(sig.replace(/[^a-zA-Z0-9_\s]/g, ' '));
|
|
68
100
|
return nt.includes(qt);
|
|
69
101
|
});
|
|
70
|
-
if (nameLineMatch)
|
|
102
|
+
if (nameLineMatch) {
|
|
103
|
+
score += w.symbolMatch;
|
|
104
|
+
signals.symbolMatch += w.symbolMatch;
|
|
105
|
+
}
|
|
71
106
|
}
|
|
72
107
|
|
|
73
108
|
// Prefix match (e.g. query "python" matches "pythonDeps")
|
|
@@ -75,6 +110,7 @@ function scoreFile(filePath, sigs, queryTokens, weights) {
|
|
|
75
110
|
for (const st of sigTokenSet) {
|
|
76
111
|
if (st !== qt && st.startsWith(qt)) {
|
|
77
112
|
score += w.prefixMatch;
|
|
113
|
+
signals.prefixMatch += w.prefixMatch;
|
|
78
114
|
break; // one bonus per query token
|
|
79
115
|
}
|
|
80
116
|
}
|
|
@@ -83,10 +119,14 @@ function scoreFile(filePath, sigs, queryTokens, weights) {
|
|
|
83
119
|
// Path token match
|
|
84
120
|
if (pathTokenSet.has(qt)) {
|
|
85
121
|
score += w.pathMatch;
|
|
122
|
+
signals.pathMatch += w.pathMatch;
|
|
86
123
|
}
|
|
87
124
|
}
|
|
88
125
|
|
|
89
|
-
|
|
126
|
+
// Apply penalty multiplier
|
|
127
|
+
score *= signals.penalty;
|
|
128
|
+
|
|
129
|
+
return { score, signals };
|
|
90
130
|
}
|
|
91
131
|
|
|
92
132
|
/**
|
|
@@ -101,7 +141,7 @@ function scoreFile(filePath, sigs, queryTokens, weights) {
|
|
|
101
141
|
* @param {object} [opts.weights] - override scoring weights
|
|
102
142
|
* @param {string} [opts.cwd] - project root for learned ranking weights
|
|
103
143
|
* @param {{ forward: Map<string,string[]> }} [opts.graph] - dependency graph for neighbor boost
|
|
104
|
-
* @returns {{ file: string, score: number, sigs: string[], tokens: number }[]}
|
|
144
|
+
* @returns {{ file: string, score: number, sigs: string[], tokens: number, intent: string, signals: object }[]}
|
|
105
145
|
*/
|
|
106
146
|
function rank(query, sigIndex, opts) {
|
|
107
147
|
if (!query || typeof query !== 'string') return [];
|
|
@@ -110,17 +150,21 @@ function rank(query, sigIndex, opts) {
|
|
|
110
150
|
const topK = (opts && opts.topK) || 10;
|
|
111
151
|
const recencyMultiplier = (opts && opts.recencyBoost) || DEFAULT_WEIGHTS.recencyBoost;
|
|
112
152
|
const recencySet = (opts && opts.recencySet) || null;
|
|
113
|
-
const weights = (opts && opts.weights) ? Object.assign({}, DEFAULT_WEIGHTS, opts.weights) : DEFAULT_WEIGHTS;
|
|
114
|
-
const learnedWeights = opts && opts.cwd ? loadWeights(opts.cwd) : null;
|
|
115
153
|
const graph = (opts && opts.graph && opts.graph.forward instanceof Map) ? opts.graph : null;
|
|
116
154
|
const cwd = (opts && opts.cwd) || null;
|
|
117
155
|
|
|
156
|
+
// Detect query intent and get appropriate weights
|
|
157
|
+
const intent = detectIntent(query);
|
|
158
|
+
const intentWeights = INTENT_WEIGHTS[intent] || DEFAULT_WEIGHTS;
|
|
159
|
+
const weights = (opts && opts.weights) ? Object.assign({}, intentWeights, opts.weights) : intentWeights;
|
|
160
|
+
const learnedWeights = opts && opts.cwd ? loadWeights(opts.cwd) : null;
|
|
161
|
+
|
|
118
162
|
const queryTokens = tokenize(query);
|
|
119
163
|
if (queryTokens.length === 0) {
|
|
120
164
|
// Empty query: return top-K by file count (most signatures = most useful)
|
|
121
165
|
const all = [];
|
|
122
166
|
for (const [file, sigs] of sigIndex.entries()) {
|
|
123
|
-
all.push({ file, score: sigs.length, sigs, tokens: Math.ceil(sigs.join('\n').length / 4) });
|
|
167
|
+
all.push({ file, score: sigs.length, sigs, tokens: Math.ceil(sigs.join('\n').length / 4), intent, signals: {} });
|
|
124
168
|
}
|
|
125
169
|
all.sort((a, b) => b.score - a.score || a.file.localeCompare(b.file));
|
|
126
170
|
return all.slice(0, topK);
|
|
@@ -128,15 +172,20 @@ function rank(query, sigIndex, opts) {
|
|
|
128
172
|
|
|
129
173
|
const scored = [];
|
|
130
174
|
for (const [file, sigs] of sigIndex.entries()) {
|
|
131
|
-
|
|
175
|
+
const result = scoreFile(file, sigs, queryTokens, weights);
|
|
176
|
+
let score = result.score;
|
|
177
|
+
const signals = result.signals;
|
|
132
178
|
|
|
133
179
|
// Recency boost
|
|
134
180
|
if (recencySet && recencySet.has(file) && score > 0) {
|
|
135
181
|
score *= recencyMultiplier;
|
|
182
|
+
signals.recencyBoost = recencyMultiplier;
|
|
136
183
|
}
|
|
137
184
|
|
|
138
185
|
if (learnedWeights && score > 0) {
|
|
139
|
-
|
|
186
|
+
const multiplier = learnedWeights[file] || 1.0;
|
|
187
|
+
score *= multiplier;
|
|
188
|
+
signals.learnedWeights = multiplier;
|
|
140
189
|
}
|
|
141
190
|
|
|
142
191
|
scored.push({
|
|
@@ -144,6 +193,8 @@ function rank(query, sigIndex, opts) {
|
|
|
144
193
|
score,
|
|
145
194
|
sigs,
|
|
146
195
|
tokens: Math.ceil(sigs.join('\n').length / 4),
|
|
196
|
+
intent,
|
|
197
|
+
signals,
|
|
147
198
|
});
|
|
148
199
|
}
|
|
149
200
|
|
|
@@ -166,6 +217,7 @@ function rank(query, sigIndex, opts) {
|
|
|
166
217
|
const idx = relToIdx.get(neighborRel);
|
|
167
218
|
if (idx !== undefined) {
|
|
168
219
|
scored[idx].score += weights.graphBoost;
|
|
220
|
+
scored[idx].signals.graphBoost = (scored[idx].signals.graphBoost || 0) + weights.graphBoost;
|
|
169
221
|
}
|
|
170
222
|
}
|
|
171
223
|
}
|
|
@@ -286,7 +338,7 @@ function buildSigIndex(cwd, opts) {
|
|
|
286
338
|
/**
|
|
287
339
|
* Format ranked results as a markdown table string.
|
|
288
340
|
*
|
|
289
|
-
* @param {{ file: string, score: number, sigs: string[], tokens: number }[]} results
|
|
341
|
+
* @param {{ file: string, score: number, sigs: string[], tokens: number, intent: string, signals: object }[]} results
|
|
290
342
|
* @param {string} query
|
|
291
343
|
* @returns {string}
|
|
292
344
|
*/
|
|
@@ -295,14 +347,17 @@ function formatRankTable(results, query) {
|
|
|
295
347
|
return `No matching files found for query: "${query}"\n`;
|
|
296
348
|
}
|
|
297
349
|
|
|
350
|
+
const intent = (results[0] && results[0].intent) || 'search';
|
|
298
351
|
const lines = [
|
|
299
352
|
`## Query: ${query}`,
|
|
353
|
+
`Intent: ${intent}`,
|
|
300
354
|
'',
|
|
301
|
-
'| Rank | File | Score | Sigs |
|
|
302
|
-
'
|
|
303
|
-
...results.map((r, i) =>
|
|
304
|
-
|
|
305
|
-
|
|
355
|
+
'| Rank | File | Score | Sigs | Penalty |',
|
|
356
|
+
'|------|------|-------|------|---------|',
|
|
357
|
+
...results.map((r, i) => {
|
|
358
|
+
const penalty = r.signals && r.signals.penalty ? r.signals.penalty.toFixed(2) : '1.00';
|
|
359
|
+
return `| ${i + 1} | ${r.file} | ${r.score.toFixed(2)} | ${r.sigs.length} | ${penalty} |`;
|
|
360
|
+
}),
|
|
306
361
|
'',
|
|
307
362
|
];
|
|
308
363
|
|
|
@@ -310,6 +365,10 @@ function formatRankTable(results, query) {
|
|
|
310
365
|
for (const r of results.slice(0, 3)) {
|
|
311
366
|
if (r.sigs.length > 0) {
|
|
312
367
|
lines.push(`### ${r.file}`);
|
|
368
|
+
if (r.signals) {
|
|
369
|
+
const sig = r.signals;
|
|
370
|
+
lines.push(`Signals: exactToken=${(sig.exactToken || 0).toFixed(2)} symbolMatch=${(sig.symbolMatch || 0).toFixed(2)} prefixMatch=${(sig.prefixMatch || 0).toFixed(2)} pathMatch=${(sig.pathMatch || 0).toFixed(2)} penalty=${(sig.penalty || 1).toFixed(2)}`);
|
|
371
|
+
}
|
|
313
372
|
lines.push('```');
|
|
314
373
|
lines.push(...r.sigs.slice(0, 10));
|
|
315
374
|
if (r.sigs.length > 10) lines.push(`... (${r.sigs.length - 10} more)`);
|
|
@@ -324,32 +383,38 @@ function formatRankTable(results, query) {
|
|
|
324
383
|
/**
|
|
325
384
|
* Format ranked results as a structured JSON-serialisable object.
|
|
326
385
|
*
|
|
327
|
-
* @param {{ file: string, score: number, sigs: string[], tokens: number }[]} results
|
|
386
|
+
* @param {{ file: string, score: number, sigs: string[], tokens: number, intent: string, signals: object }[]} results
|
|
328
387
|
* @param {string} query
|
|
329
388
|
* @returns {object}
|
|
330
389
|
*/
|
|
331
390
|
function formatRankJSON(results, query) {
|
|
391
|
+
const intent = (results && results[0] && results[0].intent) || 'search';
|
|
332
392
|
return {
|
|
333
393
|
query,
|
|
394
|
+
intent,
|
|
334
395
|
results: (results || []).map((r, i) => ({
|
|
335
396
|
rank: i + 1,
|
|
336
397
|
file: r.file,
|
|
337
398
|
score: r.score,
|
|
338
399
|
sigs: r.sigs,
|
|
339
400
|
tokens: r.tokens,
|
|
401
|
+
signals: r.signals || {},
|
|
340
402
|
})),
|
|
341
403
|
totalResults: (results || []).length,
|
|
342
404
|
};
|
|
343
405
|
}
|
|
344
406
|
|
|
345
407
|
// ---------------------------------------------------------------------------
|
|
346
|
-
// Intent detection
|
|
408
|
+
// Intent detection — 7 intents
|
|
347
409
|
// ---------------------------------------------------------------------------
|
|
348
410
|
const INTENT_PATTERNS = {
|
|
349
411
|
debug: /\b(bug|fix|error|crash|exception|broken|failing|issue|problem|regression)\b/i,
|
|
350
|
-
explain: /\b(explain|how does|what is|understand|overview|architecture|describe|walk me)\b/i,
|
|
351
|
-
refactor: /\b(refactor|restructure|redesign|clean up|extract|move|rename|simplify)\b/i,
|
|
352
|
-
review: /\b(review|check|audit|security|pr|pull request|assess)\b/i,
|
|
412
|
+
explain: /\b(explain|how does|what is|understand|overview|architecture|describe|walk me|teach)\b/i,
|
|
413
|
+
refactor: /\b(refactor|restructure|redesign|clean up|extract|move|rename|simplify|optimize)\b/i,
|
|
414
|
+
review: /\b(review|check|audit|security|pr|pull request|assess|validate)\b/i,
|
|
415
|
+
test: /\b(test|unit test|integration test|testing|spec|assert|mock)\b/i,
|
|
416
|
+
integrate:/\b(import|integrate|connect|wire|bind|require|export|depend|graph)\b|require[ds]\b/i,
|
|
417
|
+
navigate: /\b(find|locate|where|search|look for|show me|navigate|browse|list)\b/i,
|
|
353
418
|
};
|
|
354
419
|
|
|
355
420
|
function detectIntent(query) {
|