sigmap 5.1.0 → 5.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +70 -51
- package/CHANGELOG.md +28 -0
- package/README.md +36 -19
- package/gen-context.js +333 -13
- package/package.json +1 -1
- package/packages/cli/package.json +1 -1
- package/packages/core/README.md +1 -0
- package/packages/core/index.js +1 -0
- package/packages/core/package.json +1 -1
- package/src/format/benchmark-report.js +443 -0
- package/src/judge/judge-engine.js +68 -1
- package/src/learning/weights.js +138 -0
- package/src/mcp/handlers.js +2 -2
- package/src/mcp/server.js +1 -1
- package/src/retrieval/ranker.js +7 -0
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
6
|
+
function escapeHtml(value) {
|
|
7
|
+
return String(value == null ? '' : value)
|
|
8
|
+
.replace(/&/g, '&')
|
|
9
|
+
.replace(/</g, '<')
|
|
10
|
+
.replace(/>/g, '>')
|
|
11
|
+
.replace(/"/g, '"');
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function formatInt(value) {
|
|
15
|
+
const n = Number(value);
|
|
16
|
+
if (!Number.isFinite(n)) return 'n/a';
|
|
17
|
+
return Math.round(n).toLocaleString('en-US');
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function formatCompact(value) {
|
|
21
|
+
const n = Number(value);
|
|
22
|
+
if (!Number.isFinite(n)) return 'n/a';
|
|
23
|
+
if (Math.abs(n) >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
|
|
24
|
+
if (Math.abs(n) >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
|
|
25
|
+
return String(Math.round(n));
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function formatPct(value, digits = 1) {
|
|
29
|
+
const n = Number(value);
|
|
30
|
+
if (!Number.isFinite(n)) return 'n/a';
|
|
31
|
+
return `${n.toFixed(digits)}%`;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function formatMaybePct(value, digits = 1) {
|
|
35
|
+
const n = Number(value);
|
|
36
|
+
if (!Number.isFinite(n)) return 'n/a';
|
|
37
|
+
return `${n.toFixed(digits)}%`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function formatRatio(value, digits = 1) {
|
|
41
|
+
const n = Number(value);
|
|
42
|
+
if (!Number.isFinite(n)) return 'n/a';
|
|
43
|
+
return `${n.toFixed(digits)}x`;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function formatMoney(value) {
|
|
47
|
+
const n = Number(value);
|
|
48
|
+
if (!Number.isFinite(n)) return 'n/a';
|
|
49
|
+
return `$${n.toLocaleString('en-US', { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function durationLabel(ms) {
|
|
53
|
+
const n = Number(ms);
|
|
54
|
+
if (!Number.isFinite(n)) return 'n/a';
|
|
55
|
+
const sec = n / 1000;
|
|
56
|
+
if (sec < 60) return `${sec.toFixed(1)}s`;
|
|
57
|
+
const min = Math.floor(sec / 60);
|
|
58
|
+
const rem = sec - (min * 60);
|
|
59
|
+
return `${min}m ${rem.toFixed(1)}s`;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function maxOrZero(values) {
|
|
63
|
+
if (!Array.isArray(values) || values.length === 0) return 0;
|
|
64
|
+
return Math.max(...values.map((v) => (Number.isFinite(v) ? v : 0)));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function readJson(filePath) {
|
|
68
|
+
try {
|
|
69
|
+
if (!fs.existsSync(filePath)) return null;
|
|
70
|
+
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
|
71
|
+
} catch (_) {
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function loadBenchmarkReports(cwd) {
|
|
77
|
+
const reportsDir = path.join(cwd, 'benchmarks', 'reports');
|
|
78
|
+
return {
|
|
79
|
+
reportsDir,
|
|
80
|
+
token: readJson(path.join(reportsDir, 'token-reduction.json')),
|
|
81
|
+
retrieval: readJson(path.join(reportsDir, 'retrieval.json')),
|
|
82
|
+
quality: readJson(path.join(reportsDir, 'quality.json')),
|
|
83
|
+
task: readJson(path.join(reportsDir, 'task-benchmark.json')),
|
|
84
|
+
matrix: readJson(path.join(reportsDir, 'benchmark-matrix.json')),
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function buildRetrievalSummary(retrieval) {
|
|
89
|
+
if (!retrieval || !Array.isArray(retrieval.repos) || retrieval.repos.length === 0) return null;
|
|
90
|
+
let totalTasks = 0;
|
|
91
|
+
let weightedHit = 0;
|
|
92
|
+
let weightedRand = 0;
|
|
93
|
+
let correct = 0;
|
|
94
|
+
let partial = 0;
|
|
95
|
+
let wrong = 0;
|
|
96
|
+
let repoCount = 0;
|
|
97
|
+
|
|
98
|
+
for (const repo of retrieval.repos) {
|
|
99
|
+
const tasks = Number(repo.tasks) || 0;
|
|
100
|
+
repoCount++;
|
|
101
|
+
totalTasks += tasks;
|
|
102
|
+
weightedHit += (Number(repo.hitAt5) || 0) * tasks;
|
|
103
|
+
weightedRand += (Number(repo.randomBaseline) || 0) * tasks;
|
|
104
|
+
correct += Number(repo.tiers && repo.tiers.correct) || 0;
|
|
105
|
+
partial += Number(repo.tiers && repo.tiers.partial) || 0;
|
|
106
|
+
wrong += Number(repo.tiers && repo.tiers.wrong) || 0;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const hitAt5 = totalTasks > 0 ? (weightedHit / totalTasks) * 100 : null;
|
|
110
|
+
const randomBaseline = totalTasks > 0 ? (weightedRand / totalTasks) * 100 : null;
|
|
111
|
+
const lift = hitAt5 && randomBaseline ? hitAt5 / randomBaseline : null;
|
|
112
|
+
|
|
113
|
+
return {
|
|
114
|
+
repoCount,
|
|
115
|
+
totalTasks,
|
|
116
|
+
hitAt5,
|
|
117
|
+
randomBaseline,
|
|
118
|
+
lift,
|
|
119
|
+
correct,
|
|
120
|
+
partial,
|
|
121
|
+
wrong,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function buildBenchmarkSummary(reports, matrixSummary) {
|
|
126
|
+
const missing = [];
|
|
127
|
+
if (!reports.token) missing.push('token-reduction.json');
|
|
128
|
+
if (!reports.retrieval) missing.push('retrieval.json');
|
|
129
|
+
if (!reports.quality) missing.push('quality.json');
|
|
130
|
+
if (!reports.task) missing.push('task-benchmark.json');
|
|
131
|
+
|
|
132
|
+
const retrievalSummary = buildRetrievalSummary(reports.retrieval);
|
|
133
|
+
const qualitySummary = reports.quality && reports.quality.summary ? reports.quality.summary : null;
|
|
134
|
+
const tokenSummary = reports.token && reports.token.summary ? reports.token.summary : null;
|
|
135
|
+
const taskSummary = reports.task && reports.task.summary ? reports.task.summary : null;
|
|
136
|
+
const matrix = matrixSummary || reports.matrix || null;
|
|
137
|
+
|
|
138
|
+
const generatedCandidates = [
|
|
139
|
+
matrix && matrix.generated,
|
|
140
|
+
reports.task && reports.task.generated,
|
|
141
|
+
reports.retrieval && reports.retrieval.generated,
|
|
142
|
+
reports.quality && reports.quality.timestamp,
|
|
143
|
+
reports.token && reports.token.timestamp,
|
|
144
|
+
].filter(Boolean);
|
|
145
|
+
const generatedAt = generatedCandidates
|
|
146
|
+
.map((value) => ({ value, time: Date.parse(value) }))
|
|
147
|
+
.filter((item) => Number.isFinite(item.time))
|
|
148
|
+
.sort((a, b) => b.time - a.time)[0];
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
generatedAt: (generatedAt && generatedAt.value) || generatedCandidates[0] || new Date().toISOString(),
|
|
152
|
+
missing,
|
|
153
|
+
tokenSummary,
|
|
154
|
+
retrievalSummary,
|
|
155
|
+
qualitySummary,
|
|
156
|
+
taskSummary,
|
|
157
|
+
matrix,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function renderCard(label, value, hint, tone) {
|
|
162
|
+
const toneClass = tone ? ` ${tone}` : '';
|
|
163
|
+
return [
|
|
164
|
+
`<article class="card${toneClass}">`,
|
|
165
|
+
`<div class="label">${escapeHtml(label)}</div>`,
|
|
166
|
+
`<div class="value">${escapeHtml(value)}</div>`,
|
|
167
|
+
`<div class="hint">${escapeHtml(hint || '')}</div>`,
|
|
168
|
+
'</article>',
|
|
169
|
+
].join('');
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function renderProgress(label, value, max, suffix) {
|
|
173
|
+
const safeValue = Number.isFinite(value) ? value : 0;
|
|
174
|
+
const safeMax = Math.max(1, Number.isFinite(max) ? max : 1);
|
|
175
|
+
const width = Math.max(2, Math.min(100, (safeValue / safeMax) * 100));
|
|
176
|
+
return [
|
|
177
|
+
'<div class="progress-row">',
|
|
178
|
+
`<div class="progress-label">${escapeHtml(label)}</div>`,
|
|
179
|
+
'<div class="progress-bar"><span style="width:',
|
|
180
|
+
String(width.toFixed(1)),
|
|
181
|
+
'%"></span></div>',
|
|
182
|
+
`<div class="progress-value">${escapeHtml(`${safeValue}${suffix || ''}`)}</div>`,
|
|
183
|
+
'</div>',
|
|
184
|
+
].join('');
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function renderMatrixSection(matrix) {
|
|
188
|
+
if (!matrix || !Array.isArray(matrix.steps) || matrix.steps.length === 0) return '';
|
|
189
|
+
const rows = matrix.steps.map((step) => {
|
|
190
|
+
const status = step.ok ? 'ok' : 'fail';
|
|
191
|
+
return [
|
|
192
|
+
'<tr>',
|
|
193
|
+
`<td>${escapeHtml(step.name)}</td>`,
|
|
194
|
+
`<td><span class="badge ${status}">${escapeHtml(step.ok ? 'ok' : `exit ${step.status}`)}</span></td>`,
|
|
195
|
+
`<td>${escapeHtml(durationLabel(step.durationMs))}</td>`,
|
|
196
|
+
`<td><code>${escapeHtml(['node', step.script].concat(step.args || []).join(' '))}</code></td>`,
|
|
197
|
+
'</tr>',
|
|
198
|
+
].join('');
|
|
199
|
+
}).join('');
|
|
200
|
+
|
|
201
|
+
return [
|
|
202
|
+
'<section>',
|
|
203
|
+
'<h2>Run matrix</h2>',
|
|
204
|
+
'<p class="section-copy">This shows which benchmark jobs ran, whether they succeeded, and how long each step took.</p>',
|
|
205
|
+
'<table>',
|
|
206
|
+
'<thead><tr><th>Step</th><th>Status</th><th>Duration</th><th>Command</th></tr></thead>',
|
|
207
|
+
`<tbody>${rows}</tbody>`,
|
|
208
|
+
'</table>',
|
|
209
|
+
'</section>',
|
|
210
|
+
].join('');
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function renderTokenSection(token) {
|
|
214
|
+
if (!token || !Array.isArray(token.repos) || token.repos.length === 0) return '';
|
|
215
|
+
const rows = token.repos
|
|
216
|
+
.slice()
|
|
217
|
+
.sort((a, b) => (b.reductionPct || 0) - (a.reductionPct || 0))
|
|
218
|
+
.map((repo) => [
|
|
219
|
+
'<tr>',
|
|
220
|
+
`<td>${escapeHtml(repo.repo)}</td>`,
|
|
221
|
+
`<td>${escapeHtml(repo.language || 'n/a')}</td>`,
|
|
222
|
+
`<td>${escapeHtml(formatCompact(repo.rawTokens))}</td>`,
|
|
223
|
+
`<td>${escapeHtml(formatCompact(repo.finalTokens))}</td>`,
|
|
224
|
+
`<td>${escapeHtml(formatMaybePct(repo.reductionPct, 1))}</td>`,
|
|
225
|
+
'</tr>',
|
|
226
|
+
].join(''))
|
|
227
|
+
.join('');
|
|
228
|
+
|
|
229
|
+
return [
|
|
230
|
+
'<section>',
|
|
231
|
+
'<h2>Token reduction</h2>',
|
|
232
|
+
'<p class="section-copy">Raw repository tokens versus SigMap output size across the benchmark repos.</p>',
|
|
233
|
+
'<table>',
|
|
234
|
+
'<thead><tr><th>Repo</th><th>Language</th><th>Raw tokens</th><th>Final tokens</th><th>Reduction</th></tr></thead>',
|
|
235
|
+
`<tbody>${rows}</tbody>`,
|
|
236
|
+
'</table>',
|
|
237
|
+
'</section>',
|
|
238
|
+
].join('');
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
function renderRetrievalSection(retrieval) {
|
|
242
|
+
if (!retrieval || !Array.isArray(retrieval.repos) || retrieval.repos.length === 0) return '';
|
|
243
|
+
const rows = retrieval.repos.map((repo) => {
|
|
244
|
+
const lift = repo.randomBaseline > 0 ? (repo.hitAt5 / repo.randomBaseline) : null;
|
|
245
|
+
return [
|
|
246
|
+
'<tr>',
|
|
247
|
+
`<td>${escapeHtml(repo.repo)}</td>`,
|
|
248
|
+
`<td>${escapeHtml(formatMaybePct((repo.randomBaseline || 0) * 100, 1))}</td>`,
|
|
249
|
+
`<td>${escapeHtml(formatMaybePct((repo.hitAt5 || 0) * 100, 1))}</td>`,
|
|
250
|
+
`<td>${escapeHtml(formatRatio(lift, 1))}</td>`,
|
|
251
|
+
`<td>${escapeHtml(String((repo.tiers && repo.tiers.correct) || 0))}</td>`,
|
|
252
|
+
`<td>${escapeHtml(String((repo.tiers && repo.tiers.partial) || 0))}</td>`,
|
|
253
|
+
`<td>${escapeHtml(String((repo.tiers && repo.tiers.wrong) || 0))}</td>`,
|
|
254
|
+
'</tr>',
|
|
255
|
+
].join('');
|
|
256
|
+
}).join('');
|
|
257
|
+
|
|
258
|
+
return [
|
|
259
|
+
'<section>',
|
|
260
|
+
'<h2>Retrieval quality</h2>',
|
|
261
|
+
'<p class="section-copy">Hit@5 performance against the random baseline, plus the quality-tier mix that drives the task benchmark.</p>',
|
|
262
|
+
'<table>',
|
|
263
|
+
'<thead><tr><th>Repo</th><th>Random hit@5</th><th>SigMap hit@5</th><th>Lift</th><th>Correct</th><th>Partial</th><th>Wrong</th></tr></thead>',
|
|
264
|
+
`<tbody>${rows}</tbody>`,
|
|
265
|
+
'</table>',
|
|
266
|
+
'</section>',
|
|
267
|
+
].join('');
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
function renderQualitySection(quality) {
|
|
271
|
+
if (!quality || !Array.isArray(quality.repos) || quality.repos.length === 0) return '';
|
|
272
|
+
const rows = quality.repos.map((repo) => {
|
|
273
|
+
const overflow = (repo.rawTokens || 0) > 128000 ? 'overflow' : 'fits';
|
|
274
|
+
return [
|
|
275
|
+
'<tr>',
|
|
276
|
+
`<td>${escapeHtml(repo.repo)}</td>`,
|
|
277
|
+
`<td>${escapeHtml(formatInt(repo.groundedSymbols))}</td>`,
|
|
278
|
+
`<td>${escapeHtml(formatInt(repo.darkSymbols))}</td>`,
|
|
279
|
+
`<td>${escapeHtml(formatMaybePct(repo.groundingPct, 0))}</td>`,
|
|
280
|
+
`<td>${escapeHtml(String(repo.filesHiddenRaw || 0))}</td>`,
|
|
281
|
+
`<td><span class="badge ${overflow === 'overflow' ? 'warn' : 'ok'}">${escapeHtml(overflow)}</span></td>`,
|
|
282
|
+
'</tr>',
|
|
283
|
+
].join('');
|
|
284
|
+
}).join('');
|
|
285
|
+
|
|
286
|
+
return [
|
|
287
|
+
'<section>',
|
|
288
|
+
'<h2>Quality and hallucination surface</h2>',
|
|
289
|
+
'<p class="section-copy">How much code stays visible to the model, plus the overflow and dark-symbol risk by repo.</p>',
|
|
290
|
+
'<table>',
|
|
291
|
+
'<thead><tr><th>Repo</th><th>Grounded symbols</th><th>Dark symbols</th><th>Grounding</th><th>Hidden files (raw)</th><th>GPT-4o 128K</th></tr></thead>',
|
|
292
|
+
`<tbody>${rows}</tbody>`,
|
|
293
|
+
'</table>',
|
|
294
|
+
'</section>',
|
|
295
|
+
].join('');
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
function renderTaskSection(task) {
|
|
299
|
+
if (!task || !Array.isArray(task.repos) || task.repos.length === 0 || !task.summary) return '';
|
|
300
|
+
const summary = task.summary;
|
|
301
|
+
const maxReduction = maxOrZero(task.repos.map((repo) => Number(repo.reductionPct) || 0));
|
|
302
|
+
const repoBars = task.repos
|
|
303
|
+
.slice()
|
|
304
|
+
.sort((a, b) => (b.reductionPct || 0) - (a.reductionPct || 0))
|
|
305
|
+
.slice(0, 10)
|
|
306
|
+
.map((repo) => renderProgress(repo.repo, Number(repo.reductionPct) || 0, maxReduction, '%'))
|
|
307
|
+
.join('');
|
|
308
|
+
|
|
309
|
+
return [
|
|
310
|
+
'<section>',
|
|
311
|
+
'<h2>Task benchmark</h2>',
|
|
312
|
+
'<p class="section-copy">A prompt-reduction proxy derived from retrieval quality tiers. Lower prompts means the right file surfaces sooner.</p>',
|
|
313
|
+
'<div class="split">',
|
|
314
|
+
'<div class="panel">',
|
|
315
|
+
'<h3>Answer quality tiers</h3>',
|
|
316
|
+
renderProgress('Correct', Number(summary.correctPct) || 0, 100, '%'),
|
|
317
|
+
renderProgress('Partial', Number(summary.partialPct) || 0, 100, '%'),
|
|
318
|
+
renderProgress('Wrong', Number(summary.wrongPct) || 0, 100, '%'),
|
|
319
|
+
'</div>',
|
|
320
|
+
'<div class="panel">',
|
|
321
|
+
'<h3>Best prompt reduction by repo</h3>',
|
|
322
|
+
repoBars,
|
|
323
|
+
'</div>',
|
|
324
|
+
'</div>',
|
|
325
|
+
'</section>',
|
|
326
|
+
].join('');
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
function generateBenchmarkReportHtml(reports, opts = {}) {
|
|
330
|
+
const summary = buildBenchmarkSummary(reports, opts.matrixSummary);
|
|
331
|
+
const cards = [];
|
|
332
|
+
cards.push(renderCard(
|
|
333
|
+
'Token reduction',
|
|
334
|
+
summary.tokenSummary ? formatPct(summary.tokenSummary.overallReductionPct, 1) : 'n/a',
|
|
335
|
+
summary.tokenSummary ? `${formatInt(summary.tokenSummary.repoCount)} repos • ${formatCompact(summary.tokenSummary.totalRawTokens)} raw -> ${formatCompact(summary.tokenSummary.totalFinalTokens)} final` : 'token-reduction.json missing',
|
|
336
|
+
'cool'
|
|
337
|
+
));
|
|
338
|
+
cards.push(renderCard(
|
|
339
|
+
'Retrieval hit@5',
|
|
340
|
+
summary.retrievalSummary ? formatPct(summary.retrievalSummary.hitAt5, 1) : 'n/a',
|
|
341
|
+
summary.retrievalSummary ? `${formatPct(summary.retrievalSummary.randomBaseline, 1)} random baseline • ${formatRatio(summary.retrievalSummary.lift, 1)} lift` : 'retrieval.json missing',
|
|
342
|
+
'warm'
|
|
343
|
+
));
|
|
344
|
+
cards.push(renderCard(
|
|
345
|
+
'Prompt reduction',
|
|
346
|
+
summary.taskSummary ? formatPct(summary.taskSummary.avgReductionPct, 0) : 'n/a',
|
|
347
|
+
summary.taskSummary ? `${summary.taskSummary.avgPromptsWithout} -> ${summary.taskSummary.avgPromptsWith} prompts • ${formatInt(summary.taskSummary.totalTasks)} tasks` : 'task-benchmark.json missing',
|
|
348
|
+
'neutral'
|
|
349
|
+
));
|
|
350
|
+
cards.push(renderCard(
|
|
351
|
+
'Overflow risk',
|
|
352
|
+
summary.qualitySummary ? `${formatInt(summary.qualitySummary.overflowGPT4oCount)} repos` : 'n/a',
|
|
353
|
+
summary.qualitySummary ? `${formatInt(summary.qualitySummary.totalHiddenFiles)} hidden raw files • ${formatMoney(summary.qualitySummary.gpt4oSavedPerMonth)}/month saved` : 'quality.json missing',
|
|
354
|
+
summary.qualitySummary && summary.qualitySummary.overflowGPT4oCount > 0 ? 'warn' : 'ok'
|
|
355
|
+
));
|
|
356
|
+
|
|
357
|
+
const missingHtml = summary.missing.length > 0
|
|
358
|
+
? `<div class="notice">Missing source reports: ${escapeHtml(summary.missing.join(', '))}. The page still renders whatever data is available.</div>`
|
|
359
|
+
: '';
|
|
360
|
+
|
|
361
|
+
return [
|
|
362
|
+
'<!doctype html>',
|
|
363
|
+
'<html lang="en">',
|
|
364
|
+
'<head>',
|
|
365
|
+
'<meta charset="utf-8" />',
|
|
366
|
+
'<meta name="viewport" content="width=device-width, initial-scale=1" />',
|
|
367
|
+
'<title>SigMap Benchmark Report</title>',
|
|
368
|
+
'<style>',
|
|
369
|
+
':root { color-scheme: light; --bg:#f5f1e8; --panel:#fffaf2; --ink:#1f1b16; --muted:#6a6258; --line:#dccfbf; --gold:#c87f2a; --green:#2f6f52; --blue:#2f5f8f; --red:#9f4f43; --shadow:0 18px 40px rgba(54,38,14,.10);} ',
|
|
370
|
+
'*{box-sizing:border-box} body{margin:0;font-family:ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;background:linear-gradient(180deg,#f3ecdf 0%,#f7f3ed 100%);color:var(--ink)}',
|
|
371
|
+
'.page{max-width:1240px;margin:0 auto;padding:28px 20px 56px}',
|
|
372
|
+
'header{display:flex;justify-content:space-between;gap:24px;align-items:flex-end;margin-bottom:24px}',
|
|
373
|
+
'h1{margin:0;font-size:clamp(2rem,4vw,3.6rem);line-height:1.02;letter-spacing:-.04em}',
|
|
374
|
+
'.lede{max-width:760px;color:var(--muted);font-size:1rem;line-height:1.6;margin-top:10px}',
|
|
375
|
+
'.stamp{font-size:.92rem;color:var(--muted);text-align:right}',
|
|
376
|
+
'.grid{display:grid;grid-template-columns:repeat(4,minmax(0,1fr));gap:14px;margin:20px 0 24px}',
|
|
377
|
+
'.card,.panel,.notice,section{background:var(--panel);border:1px solid var(--line);box-shadow:var(--shadow);border-radius:18px}',
|
|
378
|
+
'.card{padding:18px 18px 16px}.card.cool{background:#f7f5ff}.card.warm{background:#fff4eb}.card.warn{background:#fff1eb}.card.ok{background:#eff8f1}',
|
|
379
|
+
'.label{font-size:.84rem;text-transform:uppercase;letter-spacing:.08em;color:var(--muted)}',
|
|
380
|
+
'.value{font-size:2rem;font-weight:700;letter-spacing:-.04em;margin-top:8px}',
|
|
381
|
+
'.hint{font-size:.95rem;color:var(--muted);margin-top:8px;line-height:1.5}',
|
|
382
|
+
'.notice{padding:14px 16px;margin-bottom:20px;color:var(--muted)}',
|
|
383
|
+
'section{padding:20px;margin-top:18px}',
|
|
384
|
+
'h2{margin:0 0 6px;font-size:1.4rem;letter-spacing:-.03em}',
|
|
385
|
+
'h3{margin:0 0 14px;font-size:1rem}',
|
|
386
|
+
'.section-copy{margin:0 0 16px;color:var(--muted);line-height:1.6}',
|
|
387
|
+
'table{width:100%;border-collapse:collapse;font-size:.95rem}',
|
|
388
|
+
'th,td{padding:10px 12px;border-bottom:1px solid var(--line);text-align:left;vertical-align:top}',
|
|
389
|
+
'th{font-size:.82rem;text-transform:uppercase;letter-spacing:.06em;color:var(--muted)}',
|
|
390
|
+
'tbody tr:hover{background:rgba(200,127,42,.06)}',
|
|
391
|
+
'.badge{display:inline-flex;align-items:center;padding:4px 8px;border-radius:999px;font-size:.78rem;font-weight:600;text-transform:uppercase;letter-spacing:.04em}',
|
|
392
|
+
'.badge.ok{background:#e6f4ea;color:#21573f}.badge.warn{background:#fff0de;color:#8a4a17}.badge.fail{background:#fde8e5;color:#8a2e23}',
|
|
393
|
+
'.split{display:grid;grid-template-columns:1fr 1fr;gap:16px}',
|
|
394
|
+
'.panel{padding:16px}',
|
|
395
|
+
'.progress-row{display:grid;grid-template-columns:140px 1fr 60px;gap:12px;align-items:center;margin:10px 0}',
|
|
396
|
+
'.progress-label,.progress-value{font-size:.92rem}',
|
|
397
|
+
'.progress-bar{height:10px;border-radius:999px;background:#efe4d5;overflow:hidden}',
|
|
398
|
+
'.progress-bar span{display:block;height:100%;border-radius:999px;background:linear-gradient(90deg,var(--gold),#ebbb61)}',
|
|
399
|
+
'code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:.85rem}',
|
|
400
|
+
'@media (max-width: 1020px){.grid{grid-template-columns:repeat(2,minmax(0,1fr))}.split{grid-template-columns:1fr}header{flex-direction:column;align-items:flex-start}.stamp{text-align:left}}',
|
|
401
|
+
'@media (max-width: 640px){.grid{grid-template-columns:1fr}.progress-row{grid-template-columns:110px 1fr 52px}th:nth-child(n+5),td:nth-child(n+5){display:none}}',
|
|
402
|
+
'</style>',
|
|
403
|
+
'</head>',
|
|
404
|
+
'<body>',
|
|
405
|
+
'<div class="page">',
|
|
406
|
+
'<header>',
|
|
407
|
+
'<div>',
|
|
408
|
+
'<h1>SigMap Benchmark Report</h1>',
|
|
409
|
+
'<p class="lede">A self-contained view of token reduction, retrieval quality, hallucination surface, and task-level prompt reduction. This page reads the saved JSON benchmark artifacts so it stays easy to regenerate locally.</p>',
|
|
410
|
+
'</div>',
|
|
411
|
+
`<div class="stamp">Generated: ${escapeHtml(summary.generatedAt)}<br />Source directory: <code>benchmarks/reports</code></div>`,
|
|
412
|
+
'</header>',
|
|
413
|
+
missingHtml,
|
|
414
|
+
`<div class="grid">${cards.join('')}</div>`,
|
|
415
|
+
renderMatrixSection(summary.matrix),
|
|
416
|
+
renderTokenSection(reports.token),
|
|
417
|
+
renderRetrievalSection(reports.retrieval),
|
|
418
|
+
renderQualitySection(reports.quality),
|
|
419
|
+
renderTaskSection(reports.task),
|
|
420
|
+
'</div>',
|
|
421
|
+
'</body>',
|
|
422
|
+
'</html>',
|
|
423
|
+
].join('');
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
function writeBenchmarkReport(cwd, opts = {}) {
|
|
427
|
+
const reports = loadBenchmarkReports(cwd);
|
|
428
|
+
const html = generateBenchmarkReportHtml(reports, opts);
|
|
429
|
+
const filePath = path.join(reports.reportsDir, opts.fileName || 'benchmark-report.html');
|
|
430
|
+
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
431
|
+
fs.writeFileSync(filePath, html, 'utf8');
|
|
432
|
+
return {
|
|
433
|
+
file: filePath,
|
|
434
|
+
summary: buildBenchmarkSummary(reports, opts.matrixSummary),
|
|
435
|
+
};
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
module.exports = {
|
|
439
|
+
loadBenchmarkReports,
|
|
440
|
+
buildBenchmarkSummary,
|
|
441
|
+
generateBenchmarkReportHtml,
|
|
442
|
+
writeBenchmarkReport,
|
|
443
|
+
};
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
const { boostFiles, normalizeFile, penalizeFiles } = require('../learning/weights');
|
|
6
|
+
|
|
3
7
|
const STOP = new Set([
|
|
4
8
|
'the','a','an','in','on','at','to','of','for','and','or','but',
|
|
5
9
|
'is','are','was','were','be','been','being','have','has','had',
|
|
@@ -30,6 +34,30 @@ const GENERIC_MARKERS = [
|
|
|
30
34
|
'as a general rule',
|
|
31
35
|
];
|
|
32
36
|
|
|
37
|
+
function extractContextFiles(context, cwd) {
|
|
38
|
+
if (!context || !cwd) return [];
|
|
39
|
+
|
|
40
|
+
const seen = new Set();
|
|
41
|
+
const files = [];
|
|
42
|
+
const lines = context.split('\n');
|
|
43
|
+
|
|
44
|
+
for (const line of lines) {
|
|
45
|
+
const match = line.match(/^#{2,3}\s+(.+?)\s*$/);
|
|
46
|
+
if (!match) continue;
|
|
47
|
+
|
|
48
|
+
const normalized = normalizeFile(cwd, match[1]);
|
|
49
|
+
if (!normalized) continue;
|
|
50
|
+
|
|
51
|
+
const abs = path.join(cwd, normalized);
|
|
52
|
+
if (!fs.existsSync(abs) || seen.has(normalized)) continue;
|
|
53
|
+
|
|
54
|
+
seen.add(normalized);
|
|
55
|
+
files.push(normalized);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return files;
|
|
59
|
+
}
|
|
60
|
+
|
|
33
61
|
function judge(response, context, opts = {}) {
|
|
34
62
|
const score = groundedness(response, context);
|
|
35
63
|
const threshold = opts.threshold !== undefined ? opts.threshold : 0.25;
|
|
@@ -49,7 +77,46 @@ function judge(response, context, opts = {}) {
|
|
|
49
77
|
}
|
|
50
78
|
|
|
51
79
|
const verdict = score >= threshold && reasons.length === 0 ? 'pass' : 'fail';
|
|
52
|
-
|
|
80
|
+
const result = { score, verdict, reasons };
|
|
81
|
+
|
|
82
|
+
if (opts.learn) {
|
|
83
|
+
const learning = {
|
|
84
|
+
applied: false,
|
|
85
|
+
action: 'none',
|
|
86
|
+
files: [],
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
if (!opts.cwd) {
|
|
90
|
+
learning.reason = 'cwd is required for learning';
|
|
91
|
+
result.learning = learning;
|
|
92
|
+
return result;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const contextFiles = extractContextFiles(context, opts.cwd);
|
|
96
|
+
learning.files = contextFiles;
|
|
97
|
+
|
|
98
|
+
if (contextFiles.length === 0) {
|
|
99
|
+
learning.reason = 'no context files found in context headings';
|
|
100
|
+
result.learning = learning;
|
|
101
|
+
return result;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (score > 0.75) {
|
|
105
|
+
boostFiles(opts.cwd, contextFiles, 0.05);
|
|
106
|
+
learning.applied = true;
|
|
107
|
+
learning.action = 'boost';
|
|
108
|
+
} else if (score < 0.40) {
|
|
109
|
+
penalizeFiles(opts.cwd, contextFiles, 0.03);
|
|
110
|
+
learning.applied = true;
|
|
111
|
+
learning.action = 'penalize';
|
|
112
|
+
} else {
|
|
113
|
+
learning.reason = 'groundedness in no-op band (0.40-0.75)';
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
result.learning = learning;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return result;
|
|
53
120
|
}
|
|
54
121
|
|
|
55
122
|
module.exports = { groundedness, judge };
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
6
|
+
const DECAY = 0.95;
|
|
7
|
+
const MAX_MULT = 3.0;
|
|
8
|
+
const MIN_MULT = 0.30;
|
|
9
|
+
const BASELINE = 1.0;
|
|
10
|
+
|
|
11
|
+
function weightsPath(cwd) {
|
|
12
|
+
return path.join(cwd, '.context', 'weights.json');
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function clampMultiplier(value) {
|
|
16
|
+
if (!Number.isFinite(value)) return BASELINE;
|
|
17
|
+
if (value > MAX_MULT) return MAX_MULT;
|
|
18
|
+
if (value < MIN_MULT) return MIN_MULT;
|
|
19
|
+
return parseFloat(value.toFixed(6));
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function normalizeFile(cwd, filePath) {
|
|
23
|
+
if (!cwd || !filePath || typeof filePath !== 'string') return null;
|
|
24
|
+
const cleaned = filePath.trim().replace(/\\/g, '/');
|
|
25
|
+
if (!cleaned) return null;
|
|
26
|
+
|
|
27
|
+
const abs = path.resolve(cwd, cleaned);
|
|
28
|
+
const rel = path.relative(cwd, abs);
|
|
29
|
+
if (!rel || rel.startsWith('..') || path.isAbsolute(rel)) return null;
|
|
30
|
+
return rel.split(path.sep).join('/');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function sanitizeWeights(cwd, weights) {
|
|
34
|
+
const out = {};
|
|
35
|
+
const entries = weights && typeof weights === 'object' ? Object.entries(weights) : [];
|
|
36
|
+
|
|
37
|
+
for (const [filePath, raw] of entries) {
|
|
38
|
+
const normalized = normalizeFile(cwd, filePath);
|
|
39
|
+
if (!normalized) continue;
|
|
40
|
+
const mult = clampMultiplier(Number(raw));
|
|
41
|
+
if (Math.abs(mult - BASELINE) < 1e-9) continue;
|
|
42
|
+
out[normalized] = mult;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return out;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function loadWeights(cwd) {
|
|
49
|
+
try {
|
|
50
|
+
const parsed = JSON.parse(fs.readFileSync(weightsPath(cwd), 'utf8'));
|
|
51
|
+
return sanitizeWeights(cwd, parsed);
|
|
52
|
+
} catch (_) {
|
|
53
|
+
return {};
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function saveWeights(cwd, weights) {
|
|
58
|
+
const cleaned = sanitizeWeights(cwd, weights);
|
|
59
|
+
const outPath = weightsPath(cwd);
|
|
60
|
+
|
|
61
|
+
if (Object.keys(cleaned).length === 0) {
|
|
62
|
+
try {
|
|
63
|
+
if (fs.existsSync(outPath)) fs.unlinkSync(outPath);
|
|
64
|
+
} catch (_) {}
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
|
69
|
+
const sorted = Object.keys(cleaned)
|
|
70
|
+
.sort()
|
|
71
|
+
.reduce((acc, key) => {
|
|
72
|
+
acc[key] = cleaned[key];
|
|
73
|
+
return acc;
|
|
74
|
+
}, {});
|
|
75
|
+
fs.writeFileSync(outPath, JSON.stringify(sorted, null, 2) + '\n', 'utf8');
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function updateWeights(cwd, opts = {}) {
|
|
79
|
+
const goodAmount = Number.isFinite(opts.goodAmount) ? opts.goodAmount : 0.15;
|
|
80
|
+
const badAmount = Number.isFinite(opts.badAmount) ? opts.badAmount : 0.10;
|
|
81
|
+
const goodFiles = Array.isArray(opts.goodFiles) ? opts.goodFiles : [];
|
|
82
|
+
const badFiles = Array.isArray(opts.badFiles) ? opts.badFiles : [];
|
|
83
|
+
|
|
84
|
+
const weights = loadWeights(cwd);
|
|
85
|
+
|
|
86
|
+
for (const key of Object.keys(weights)) {
|
|
87
|
+
weights[key] = clampMultiplier(weights[key] * DECAY);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const good = [];
|
|
91
|
+
const bad = [];
|
|
92
|
+
|
|
93
|
+
for (const filePath of goodFiles) {
|
|
94
|
+
const normalized = normalizeFile(cwd, filePath);
|
|
95
|
+
if (!normalized) continue;
|
|
96
|
+
weights[normalized] = clampMultiplier((weights[normalized] || BASELINE) + goodAmount);
|
|
97
|
+
good.push(normalized);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
for (const filePath of badFiles) {
|
|
101
|
+
const normalized = normalizeFile(cwd, filePath);
|
|
102
|
+
if (!normalized) continue;
|
|
103
|
+
weights[normalized] = clampMultiplier((weights[normalized] || BASELINE) - badAmount);
|
|
104
|
+
bad.push(normalized);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
saveWeights(cwd, weights);
|
|
108
|
+
return { good, bad, weights: loadWeights(cwd) };
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function boostFiles(cwd, files, amount = 0.15) {
|
|
112
|
+
return updateWeights(cwd, { goodFiles: files, goodAmount: amount });
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function penalizeFiles(cwd, files, amount = 0.10) {
|
|
116
|
+
return updateWeights(cwd, { badFiles: files, badAmount: amount });
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function resetWeights(cwd) {
|
|
120
|
+
const outPath = weightsPath(cwd);
|
|
121
|
+
if (fs.existsSync(outPath)) fs.unlinkSync(outPath);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
module.exports = {
|
|
125
|
+
BASELINE,
|
|
126
|
+
DECAY,
|
|
127
|
+
MAX_MULT,
|
|
128
|
+
MIN_MULT,
|
|
129
|
+
weightsPath,
|
|
130
|
+
clampMultiplier,
|
|
131
|
+
normalizeFile,
|
|
132
|
+
loadWeights,
|
|
133
|
+
saveWeights,
|
|
134
|
+
updateWeights,
|
|
135
|
+
boostFiles,
|
|
136
|
+
penalizeFiles,
|
|
137
|
+
resetWeights,
|
|
138
|
+
};
|
package/src/mcp/handlers.js
CHANGED
|
@@ -450,7 +450,7 @@ function queryContext(args, cwd) {
|
|
|
450
450
|
if (index.size === 0) return 'No signatures indexed. Run: node gen-context.js';
|
|
451
451
|
|
|
452
452
|
const topK = Math.min(Math.max(1, parseInt(args.topK, 10) || 10), 25);
|
|
453
|
-
const results = rank(args.query, index, { topK });
|
|
453
|
+
const results = rank(args.query, index, { topK, cwd });
|
|
454
454
|
return formatRankTable(results, args.query);
|
|
455
455
|
} catch (err) {
|
|
456
456
|
return `_query_context failed: ${err.message}_`;
|
|
@@ -477,4 +477,4 @@ function getImpact(args, cwd) {
|
|
|
477
477
|
}
|
|
478
478
|
}
|
|
479
479
|
|
|
480
|
-
module.exports = { readContext, searchSignatures, getMap, createCheckpoint, getRouting, explainFile, listModules, queryContext, getImpact };
|
|
480
|
+
module.exports = { readContext, searchSignatures, getMap, createCheckpoint, getRouting, explainFile, listModules, queryContext, getImpact };
|