sigmap 4.3.0 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +45 -32
- package/CHANGELOG.md +20 -0
- package/gen-context.js +254 -8
- package/package.json +1 -1
- package/packages/cli/package.json +1 -1
- package/packages/core/package.json +1 -1
- package/src/config/loader.js +77 -5
- package/src/format/dashboard.js +20 -0
- package/src/judge/judge-engine.js +55 -0
- package/src/mcp/server.js +1 -1
package/AGENTS.md
CHANGED
|
@@ -12,20 +12,23 @@ Use this marker block for all appendable context files:
|
|
|
12
12
|
## Auto-generated signatures
|
|
13
13
|
<!-- Updated by gen-context.js -->
|
|
14
14
|
You are a coding assistant with full knowledge of this codebase.
|
|
15
|
-
Below are the code signatures extracted by SigMap
|
|
15
|
+
Below are the code signatures extracted by SigMap v5.1.0 on 2026-04-16T21:33:38.411Z.
|
|
16
16
|
|
|
17
17
|
Use these signatures to answer questions about the code accurately.
|
|
18
18
|
|
|
19
19
|
## Code Signatures
|
|
20
20
|
|
|
21
|
-
<!-- Generated by SigMap gen-context.js
|
|
21
|
+
<!-- Generated by SigMap gen-context.js v5.1.0 -->
|
|
22
22
|
<!-- DO NOT EDIT below the marker line — run gen-context.js to regenerate -->
|
|
23
23
|
|
|
24
24
|
# Code signatures
|
|
25
25
|
|
|
26
|
-
## changes (last 5 commits —
|
|
26
|
+
## changes (last 5 commits — 16 minutes ago)
|
|
27
27
|
```
|
|
28
|
-
src/
|
|
28
|
+
src/config/loader.js +loadBaseConfig ~loadConfig ~deepClone
|
|
29
|
+
src/format/dashboard.js ~computeExtractorCoverage ~readBenchmarkTrend
|
|
30
|
+
src/judge/judge-engine.js +tokenize +groundedness +judge
|
|
31
|
+
src/retrieval/ranker.js +detectIntent ~formatRankJSON
|
|
29
32
|
```
|
|
30
33
|
|
|
31
34
|
## packages
|
|
@@ -146,9 +149,41 @@ function adapt(context, adapterName, opts = {}) → string
|
|
|
146
149
|
|
|
147
150
|
## src
|
|
148
151
|
|
|
149
|
-
### src/config/
|
|
152
|
+
### src/config/loader.js
|
|
150
153
|
```
|
|
151
|
-
module.exports = {
|
|
154
|
+
module.exports = { loadConfig, loadBaseConfig }
|
|
155
|
+
function loadBaseConfig(extendsVal, cwd)
|
|
156
|
+
function detectAutoSrcDirs(cwd, excludeList) → string[]
|
|
157
|
+
function loadConfig(cwd) → object
|
|
158
|
+
function deepClone(obj)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### src/format/dashboard.js
|
|
162
|
+
```
|
|
163
|
+
module.exports = { generateDashboardHtml, renderHistoryCharts, computeExtractorCoverage, percentile, overBudgetStreak }
|
|
164
|
+
function toNumber(v)
|
|
165
|
+
function percentile(values, p)
|
|
166
|
+
function overBudgetStreak(entries)
|
|
167
|
+
function loadConfig(cwd)
|
|
168
|
+
function shouldExclude(rel, excludeSet)
|
|
169
|
+
function detectLanguage(filePath)
|
|
170
|
+
function walkFiles(dir, maxDepth, depth, out, excludeSet)
|
|
171
|
+
function computeExtractorCoverage(cwd)
|
|
172
|
+
function readBenchmarkTrend(cwd)
|
|
173
|
+
function lineChartSvg(values, title, ySuffix)
|
|
174
|
+
function barChartSvg(perLanguage)
|
|
175
|
+
function sparkline(values)
|
|
176
|
+
function buildDashboardData(cwd, health)
|
|
177
|
+
function generateDashboardHtml(cwd, health)
|
|
178
|
+
function renderHistoryCharts(cwd, health)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### src/judge/judge-engine.js
|
|
182
|
+
```
|
|
183
|
+
module.exports = { groundedness, judge }
|
|
184
|
+
function tokenize(text)
|
|
185
|
+
function groundedness(response, context)
|
|
186
|
+
function judge(response, context, opts = {})
|
|
152
187
|
```
|
|
153
188
|
|
|
154
189
|
### src/mcp/server.js
|
|
@@ -162,13 +197,14 @@ function start(cwd)
|
|
|
162
197
|
|
|
163
198
|
### src/retrieval/ranker.js
|
|
164
199
|
```
|
|
165
|
-
module.exports = { rank, buildSigIndex, scoreFile, formatRankTable, formatRankJSON, DEFAULT_WEIGHTS }
|
|
200
|
+
module.exports = { rank, buildSigIndex, scoreFile, formatRankTable, formatRankJSON, DEFAULT_WEIGHTS, detectIntent }
|
|
166
201
|
function scoreFile(filePath, sigs, queryTokens, weights) → number
|
|
167
202
|
function rank(query, sigIndex, opts) → { file: string, score: nu
|
|
168
203
|
function _parseContextFile(contextPath) → Map<string, string[]>
|
|
169
204
|
function buildSigIndex(cwd, opts) → Map<string, string[]>
|
|
170
205
|
function formatRankTable(results, query) → string
|
|
171
206
|
function formatRankJSON(results, query) → object
|
|
207
|
+
function detectIntent(query)
|
|
172
208
|
```
|
|
173
209
|
|
|
174
210
|
### src/analysis/coverage-score.js
|
|
@@ -178,12 +214,9 @@ function coverageScore(cwd, fileEntries, config) → { * score: number, * grad
|
|
|
178
214
|
function _walk(dir, excludeSet, out)
|
|
179
215
|
```
|
|
180
216
|
|
|
181
|
-
### src/config/
|
|
217
|
+
### src/config/defaults.js
|
|
182
218
|
```
|
|
183
|
-
module.exports = {
|
|
184
|
-
function detectAutoSrcDirs(cwd, excludeList) → string[]
|
|
185
|
-
function loadConfig(cwd) → object
|
|
186
|
-
function deepClone(obj)
|
|
219
|
+
module.exports = { DEFAULTS }
|
|
187
220
|
```
|
|
188
221
|
|
|
189
222
|
### src/eval/analyzer.js
|
|
@@ -527,26 +560,6 @@ function formatCache(content) → string
|
|
|
527
560
|
function formatCachePayload(content, model) → string
|
|
528
561
|
```
|
|
529
562
|
|
|
530
|
-
### src/format/dashboard.js
|
|
531
|
-
```
|
|
532
|
-
module.exports = { generateDashboardHtml, renderHistoryCharts, computeExtractorCoverage, percentile, overBudgetStreak }
|
|
533
|
-
function toNumber(v)
|
|
534
|
-
function percentile(values, p)
|
|
535
|
-
function overBudgetStreak(entries)
|
|
536
|
-
function loadConfig(cwd)
|
|
537
|
-
function shouldExclude(rel, excludeSet)
|
|
538
|
-
function detectLanguage(filePath)
|
|
539
|
-
function walkFiles(dir, maxDepth, depth, out, excludeSet)
|
|
540
|
-
function computeExtractorCoverage(cwd)
|
|
541
|
-
function readBenchmarkTrend(cwd)
|
|
542
|
-
function lineChartSvg(values, title, ySuffix)
|
|
543
|
-
function barChartSvg(perLanguage)
|
|
544
|
-
function sparkline(values)
|
|
545
|
-
function buildDashboardData(cwd, health)
|
|
546
|
-
function generateDashboardHtml(cwd, health)
|
|
547
|
-
function renderHistoryCharts(cwd, health)
|
|
548
|
-
```
|
|
549
|
-
|
|
550
563
|
### src/format/llm-txt.js
|
|
551
564
|
```
|
|
552
565
|
module.exports = { format, outputPath }
|
package/CHANGELOG.md
CHANGED
|
@@ -10,6 +10,26 @@ Format: [Semantic Versioning](https://semver.org/)
|
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
13
|
+
## [5.1.0] — 2026-04-16
|
|
14
|
+
|
|
15
|
+
### Added
|
|
16
|
+
|
|
17
|
+
- **Benchmark history tracking** — all three benchmark scripts (`run-retrieval-benchmark.mjs`, `run-benchmark.mjs`, `run-task-benchmark.mjs`) now append a structured NDJSON entry to `.context/benchmark-history.ndjson` after each run (`type: "retrieval" | "token-reduction" | "task"`).
|
|
18
|
+
- **`sigmap history` benchmark trend rows** — when `.context/benchmark-history.ndjson` exists, `sigmap history` prints a retrieval `hit@5` sparkline row and a token-reduction sparkline row below the usage table. The command no longer exits early when the usage log is empty.
|
|
19
|
+
- **Dashboard `readBenchmarkTrend` uses local history** — `src/format/dashboard.js` now prefers `.context/benchmark-history.ndjson` over the CI-only `benchmarks/results/` directory, so the dashboard hit@5 trend chart populates for all users after running any benchmark locally.
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## [5.0.0] — 2026-04-16
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
|
|
27
|
+
- **`sigmap judge --response <file> --context <file>`** — rule-based groundedness scoring engine (`src/judge/judge-engine.js`). Computes a 0–1 score from token overlap between an LLM response and its source context. Exits 0 when verdict is `pass`, exits 1 on `fail`. Supports `--json` (emits `{ score, verdict, reasons }`) and `--threshold` override.
|
|
28
|
+
- **Config `extends`** — `gen-context.config.json` now accepts an `"extends"` key pointing to a local JSON file path or HTTPS URL. The base config is deep-merged (DEFAULTS → base → local), with HTTPS responses cached for 1 hour in `.context/config-cache/`.
|
|
29
|
+
- **`sigmap history [--last N] [--json]`** — displays last N usage log entries as a table with a Unicode sparkline (▁▂▃▄▅▆▇█) for the token trend. Reads from `.context/usage.ndjson` (requires `tracking: true` in config).
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
13
33
|
## [4.3.0] — 2026-04-16
|
|
14
34
|
|
|
15
35
|
### Added
|
package/gen-context.js
CHANGED
|
@@ -221,6 +221,47 @@ __factories["./src/config/loader"] = function(module, exports) {
|
|
|
221
221
|
});
|
|
222
222
|
}
|
|
223
223
|
|
|
224
|
+
const BASE_CONFIG_TTL_MS = 60 * 60 * 1000;
|
|
225
|
+
|
|
226
|
+
function loadBaseConfig(extendsVal, cwd) {
|
|
227
|
+
if (!extendsVal || typeof extendsVal !== 'string') return {};
|
|
228
|
+
if (extendsVal.startsWith('https://') || extendsVal.startsWith('http://')) {
|
|
229
|
+
const cacheDir = path.join(cwd, '.context', 'config-cache');
|
|
230
|
+
const cacheKey = Buffer.from(extendsVal).toString('base64').replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
231
|
+
const cachePath = path.join(cacheDir, `${cacheKey}.json`);
|
|
232
|
+
if (fs.existsSync(cachePath)) {
|
|
233
|
+
const age = Date.now() - fs.statSync(cachePath).mtimeMs;
|
|
234
|
+
if (age < BASE_CONFIG_TTL_MS) {
|
|
235
|
+
try { return JSON.parse(fs.readFileSync(cachePath, 'utf8')); } catch (_) {}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
try {
|
|
239
|
+
const { execSync } = require('child_process');
|
|
240
|
+
const proto = extendsVal.startsWith('https') ? 'https' : 'http';
|
|
241
|
+
const out = execSync(
|
|
242
|
+
`node -e "const h=require('${proto}');let d='';h.get(${JSON.stringify(extendsVal)},r=>{r.on('data',c=>d+=c);r.on('end',()=>process.stdout.write(d))}).on('error',()=>process.exit(1))"`,
|
|
243
|
+
{ timeout: 10000, encoding: 'utf8' }
|
|
244
|
+
);
|
|
245
|
+
const parsed = JSON.parse(out);
|
|
246
|
+
if (!fs.existsSync(cacheDir)) fs.mkdirSync(cacheDir, { recursive: true });
|
|
247
|
+
fs.writeFileSync(cachePath, JSON.stringify(parsed), 'utf8');
|
|
248
|
+
return parsed;
|
|
249
|
+
} catch (err) {
|
|
250
|
+
process.stderr.write(`[sigmap] config extends: could not fetch ${extendsVal}: ${err.message}\n`);
|
|
251
|
+
if (fs.existsSync(cachePath)) {
|
|
252
|
+
try { return JSON.parse(fs.readFileSync(cachePath, 'utf8')); } catch (_) {}
|
|
253
|
+
}
|
|
254
|
+
return {};
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
const absPath = path.resolve(cwd, extendsVal);
|
|
258
|
+
try { return JSON.parse(fs.readFileSync(absPath, 'utf8')); }
|
|
259
|
+
catch (err) {
|
|
260
|
+
process.stderr.write(`[sigmap] config extends: could not load ${absPath}: ${err.message}\n`);
|
|
261
|
+
return {};
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
224
265
|
/**
|
|
225
266
|
* Load and merge configuration for a given working directory.
|
|
226
267
|
*
|
|
@@ -250,18 +291,31 @@ __factories["./src/config/loader"] = function(module, exports) {
|
|
|
250
291
|
|
|
251
292
|
// Warn on unknown keys (helps catch typos)
|
|
252
293
|
for (const key of Object.keys(userConfig)) {
|
|
253
|
-
if (key.startsWith('_')
|
|
294
|
+
if (key.startsWith('_') || key === 'extends') continue;
|
|
254
295
|
if (!KNOWN_KEYS.has(key)) {
|
|
255
296
|
console.warn(`[sigmap] unknown config key: "${key}" (ignored)`);
|
|
256
297
|
}
|
|
257
298
|
}
|
|
258
299
|
|
|
259
|
-
// Deep merge:
|
|
260
|
-
|
|
300
|
+
// Deep merge: DEFAULTS → base (extends) → user config
|
|
301
|
+
const baseConfig = loadBaseConfig(userConfig.extends, cwd);
|
|
261
302
|
const merged = deepClone(DEFAULTS);
|
|
303
|
+
|
|
304
|
+
for (const key of Object.keys(baseConfig)) {
|
|
305
|
+
if (key.startsWith('_') || key === 'extends') continue;
|
|
306
|
+
if (!KNOWN_KEYS.has(key)) continue;
|
|
307
|
+
const val = baseConfig[key];
|
|
308
|
+
if (val !== null && typeof val === 'object' && !Array.isArray(val) &&
|
|
309
|
+
typeof merged[key] === 'object' && !Array.isArray(merged[key])) {
|
|
310
|
+
merged[key] = Object.assign({}, merged[key], val);
|
|
311
|
+
} else {
|
|
312
|
+
merged[key] = val;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
262
316
|
for (const key of Object.keys(userConfig)) {
|
|
263
|
-
if (key.startsWith('_')) continue;
|
|
264
|
-
if (!KNOWN_KEYS.has(key)) continue;
|
|
317
|
+
if (key.startsWith('_') || key === 'extends') continue;
|
|
318
|
+
if (!KNOWN_KEYS.has(key)) continue;
|
|
265
319
|
const val = userConfig[key];
|
|
266
320
|
if (val !== null && typeof val === 'object' && !Array.isArray(val) &&
|
|
267
321
|
typeof merged[key] === 'object' && !Array.isArray(merged[key])) {
|
|
@@ -292,7 +346,7 @@ __factories["./src/config/loader"] = function(module, exports) {
|
|
|
292
346
|
return JSON.parse(JSON.stringify(obj));
|
|
293
347
|
}
|
|
294
348
|
|
|
295
|
-
module.exports = { loadConfig, detectAutoSrcDirs };
|
|
349
|
+
module.exports = { loadConfig, loadBaseConfig, detectAutoSrcDirs };
|
|
296
350
|
|
|
297
351
|
};
|
|
298
352
|
|
|
@@ -3098,6 +3152,25 @@ __factories["./src/format/cache"] = function(module, exports) {
|
|
|
3098
3152
|
}
|
|
3099
3153
|
|
|
3100
3154
|
function readBenchmarkTrend(cwd) {
|
|
3155
|
+
// Prefer per-user history file written by benchmark scripts
|
|
3156
|
+
const histPath = path.join(cwd, '.context', 'benchmark-history.ndjson');
|
|
3157
|
+
if (fs.existsSync(histPath)) {
|
|
3158
|
+
const histValues = [];
|
|
3159
|
+
try {
|
|
3160
|
+
for (const line of fs.readFileSync(histPath, 'utf8').trim().split('\n').filter(Boolean)) {
|
|
3161
|
+
try {
|
|
3162
|
+
const obj = JSON.parse(line);
|
|
3163
|
+
if (obj.type === 'retrieval') {
|
|
3164
|
+
const v = toNumber(obj.hitAt5Pct);
|
|
3165
|
+
if (v !== null) histValues.push(v);
|
|
3166
|
+
}
|
|
3167
|
+
} catch (_) {}
|
|
3168
|
+
}
|
|
3169
|
+
} catch (_) {}
|
|
3170
|
+
if (histValues.length > 0) return histValues.slice(-30);
|
|
3171
|
+
}
|
|
3172
|
+
|
|
3173
|
+
// Fallback: legacy benchmarks/results directory (CI artifacts)
|
|
3101
3174
|
const resultDir = path.join(cwd, 'benchmarks', 'results');
|
|
3102
3175
|
if (!fs.existsSync(resultDir)) return [];
|
|
3103
3176
|
const files = [];
|
|
@@ -4654,7 +4727,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
|
|
|
4654
4727
|
|
|
4655
4728
|
const SERVER_INFO = {
|
|
4656
4729
|
name: 'sigmap',
|
|
4657
|
-
version: '
|
|
4730
|
+
version: '5.1.0',
|
|
4658
4731
|
description: 'SigMap MCP server — code signatures on demand',
|
|
4659
4732
|
};
|
|
4660
4733
|
|
|
@@ -5252,6 +5325,61 @@ __factories["./src/security/scanner"] = function(module, exports) {
|
|
|
5252
5325
|
|
|
5253
5326
|
};
|
|
5254
5327
|
|
|
5328
|
+
// ── ./src/judge/judge-engine ──
|
|
5329
|
+
__factories["./src/judge/judge-engine"] = function(module, exports) {
|
|
5330
|
+
'use strict';
|
|
5331
|
+
|
|
5332
|
+
const STOP = new Set([
|
|
5333
|
+
'the','a','an','in','on','at','to','of','for','and','or','but',
|
|
5334
|
+
'is','are','was','were','be','been','being','have','has','had',
|
|
5335
|
+
'do','does','did','will','would','could','should','may','might',
|
|
5336
|
+
'shall','can','not','with','from','by','as','this','that','it',
|
|
5337
|
+
]);
|
|
5338
|
+
|
|
5339
|
+
function tokenize(text) {
|
|
5340
|
+
return (text || '').toLowerCase().match(/\b[a-z][a-z0-9_]{2,}\b/g) || [];
|
|
5341
|
+
}
|
|
5342
|
+
|
|
5343
|
+
function groundedness(response, context) {
|
|
5344
|
+
if (!response || !context) return 0;
|
|
5345
|
+
const ctxTokens = new Set(tokenize(context).filter((t) => !STOP.has(t)));
|
|
5346
|
+
if (ctxTokens.size === 0) return 0;
|
|
5347
|
+
const respTokens = tokenize(response).filter((t) => !STOP.has(t));
|
|
5348
|
+
if (respTokens.length === 0) return 0;
|
|
5349
|
+
const matched = respTokens.filter((t) => ctxTokens.has(t));
|
|
5350
|
+
return parseFloat((matched.length / respTokens.length).toFixed(3));
|
|
5351
|
+
}
|
|
5352
|
+
|
|
5353
|
+
const GENERIC_MARKERS = [
|
|
5354
|
+
'however, based on my knowledge',
|
|
5355
|
+
'generally speaking',
|
|
5356
|
+
'in general',
|
|
5357
|
+
'typically,',
|
|
5358
|
+
'usually,',
|
|
5359
|
+
'as a general rule',
|
|
5360
|
+
];
|
|
5361
|
+
|
|
5362
|
+
function judge(response, context, opts) {
|
|
5363
|
+
opts = opts || {};
|
|
5364
|
+
const score = groundedness(response, context);
|
|
5365
|
+
const threshold = opts.threshold !== undefined ? opts.threshold : 0.25;
|
|
5366
|
+
const reasons = [];
|
|
5367
|
+
if (score < threshold) {
|
|
5368
|
+
reasons.push(`score ${score} is below threshold ${threshold} — response may not be grounded in context`);
|
|
5369
|
+
}
|
|
5370
|
+
if (response) {
|
|
5371
|
+
const lower = response.toLowerCase();
|
|
5372
|
+
for (const m of GENERIC_MARKERS) {
|
|
5373
|
+
if (lower.includes(m)) reasons.push(`response contains generic phrase: "${m}"`);
|
|
5374
|
+
}
|
|
5375
|
+
}
|
|
5376
|
+
const verdict = score >= threshold && reasons.length === 0 ? 'pass' : 'fail';
|
|
5377
|
+
return { score, verdict, reasons };
|
|
5378
|
+
}
|
|
5379
|
+
|
|
5380
|
+
module.exports = { groundedness, judge };
|
|
5381
|
+
};
|
|
5382
|
+
|
|
5255
5383
|
// ── ./src/tracking/logger ──
|
|
5256
5384
|
__factories["./src/tracking/logger"] = function(module, exports) {
|
|
5257
5385
|
|
|
@@ -6262,7 +6390,7 @@ const path = require('path');
|
|
|
6262
6390
|
const os = require('os');
|
|
6263
6391
|
const { execSync } = require('child_process');
|
|
6264
6392
|
|
|
6265
|
-
const VERSION = '
|
|
6393
|
+
const VERSION = '5.1.0';
|
|
6266
6394
|
const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
|
|
6267
6395
|
|
|
6268
6396
|
function requireSourceOrBundled(key) {
|
|
@@ -8313,6 +8441,124 @@ function main() {
|
|
|
8313
8441
|
process.exit(0);
|
|
8314
8442
|
}
|
|
8315
8443
|
|
|
8444
|
+
// v5.0: `sigmap judge --response <file> --context <file>` — groundedness scoring
|
|
8445
|
+
if (args[0] === 'judge') {
|
|
8446
|
+
const respIdx = args.indexOf('--response');
|
|
8447
|
+
const ctxIdx = args.indexOf('--context');
|
|
8448
|
+
|
|
8449
|
+
if (respIdx < 0 || ctxIdx < 0) {
|
|
8450
|
+
console.error('[sigmap] Usage: sigmap judge --response <file> --context <file> [--json] [--threshold 0.25]');
|
|
8451
|
+
process.exit(1);
|
|
8452
|
+
}
|
|
8453
|
+
|
|
8454
|
+
const respFile = (args[respIdx + 1] || '').trim();
|
|
8455
|
+
const ctxFile = (args[ctxIdx + 1] || '').trim();
|
|
8456
|
+
|
|
8457
|
+
if (!respFile || respFile.startsWith('--') || !ctxFile || ctxFile.startsWith('--')) {
|
|
8458
|
+
console.error('[sigmap] --response and --context require file paths');
|
|
8459
|
+
process.exit(1);
|
|
8460
|
+
}
|
|
8461
|
+
|
|
8462
|
+
let responseText = '', contextText = '';
|
|
8463
|
+
try { responseText = fs.readFileSync(path.resolve(cwd, respFile), 'utf8'); }
|
|
8464
|
+
catch (e) { console.error(`[sigmap] cannot read --response file: ${e.message}`); process.exit(1); }
|
|
8465
|
+
try { contextText = fs.readFileSync(path.resolve(cwd, ctxFile), 'utf8'); }
|
|
8466
|
+
catch (e) { console.error(`[sigmap] cannot read --context file: ${e.message}`); process.exit(1); }
|
|
8467
|
+
|
|
8468
|
+
const thrIdx = args.indexOf('--threshold');
|
|
8469
|
+
const judgeOpts = thrIdx >= 0 ? { threshold: parseFloat(args[thrIdx + 1]) || 0.25 } : {};
|
|
8470
|
+
|
|
8471
|
+
const { judge: runJudge } = requireSourceOrBundled('./src/judge/judge-engine');
|
|
8472
|
+
const result = runJudge(responseText, contextText, judgeOpts);
|
|
8473
|
+
|
|
8474
|
+
if (args.includes('--json')) {
|
|
8475
|
+
process.stdout.write(JSON.stringify(result) + '\n');
|
|
8476
|
+
} else {
|
|
8477
|
+
const bar = '─'.repeat(44);
|
|
8478
|
+
console.log([
|
|
8479
|
+
bar,
|
|
8480
|
+
` sigmap judge`,
|
|
8481
|
+
` Score : ${result.score}`,
|
|
8482
|
+
` Verdict : ${result.verdict}`,
|
|
8483
|
+
result.reasons.length ? ` Reasons :\n ${result.reasons.join('\n ')}` : ` Reasons : none`,
|
|
8484
|
+
bar,
|
|
8485
|
+
].join('\n'));
|
|
8486
|
+
}
|
|
8487
|
+
process.exit(result.verdict === 'pass' ? 0 : 1);
|
|
8488
|
+
}
|
|
8489
|
+
|
|
8490
|
+
// v5.0: `sigmap history` — show last N usage log entries with sparkline
|
|
8491
|
+
if (args[0] === 'history') {
|
|
8492
|
+
const { readLog } = requireSourceOrBundled('./src/tracking/logger');
|
|
8493
|
+
const entries = readLog(cwd);
|
|
8494
|
+
|
|
8495
|
+
const nIdx = args.indexOf('--last');
|
|
8496
|
+
const n = nIdx >= 0 ? (parseInt(args[nIdx + 1], 10) || 10) : 10;
|
|
8497
|
+
const last = entries.slice(-n);
|
|
8498
|
+
|
|
8499
|
+
if (args.includes('--json')) {
|
|
8500
|
+
process.stdout.write(JSON.stringify(last) + '\n');
|
|
8501
|
+
process.exit(0);
|
|
8502
|
+
}
|
|
8503
|
+
|
|
8504
|
+
const SPARK_CHARS = '▁▂▃▄▅▆▇█';
|
|
8505
|
+
function sparkline(values) {
|
|
8506
|
+
if (values.length === 0) return '';
|
|
8507
|
+
const min = Math.min(...values);
|
|
8508
|
+
const max = Math.max(...values);
|
|
8509
|
+
const range = max - min || 1;
|
|
8510
|
+
return values.map((v) => {
|
|
8511
|
+
const idx = Math.round(((v - min) / range) * (SPARK_CHARS.length - 1));
|
|
8512
|
+
return SPARK_CHARS[idx];
|
|
8513
|
+
}).join('');
|
|
8514
|
+
}
|
|
8515
|
+
|
|
8516
|
+
const bar = '─'.repeat(62);
|
|
8517
|
+
console.log(bar);
|
|
8518
|
+
console.log(` sigmap history (last ${Math.max(last.length, 1)} runs)`);
|
|
8519
|
+
console.log(bar);
|
|
8520
|
+
|
|
8521
|
+
if (last.length === 0) {
|
|
8522
|
+
console.log(' No usage log entries. Enable tracking: true in config to start recording runs.');
|
|
8523
|
+
} else {
|
|
8524
|
+
console.log(` ${'Date'.padEnd(24)} ${'Files'.padStart(5)} ${'Tokens'.padStart(7)} ${'Reduction'.padStart(9)} ${'Budget?'.padStart(7)}`);
|
|
8525
|
+
console.log(` ${'─'.repeat(24)} ${'─'.repeat(5)} ${'─'.repeat(7)} ${'─'.repeat(9)} ${'─'.repeat(7)}`);
|
|
8526
|
+
for (const e of last) {
|
|
8527
|
+
const date = (e.ts || '').slice(0, 19).replace('T', ' ');
|
|
8528
|
+
const files = String(e.fileCount || 0).padStart(5);
|
|
8529
|
+
const tok = String(e.finalTokens || 0).padStart(7);
|
|
8530
|
+
const red = `${e.reductionPct || 0}%`.padStart(9);
|
|
8531
|
+
const over = (e.overBudget ? ' ⚠ yes' : ' no').padStart(7);
|
|
8532
|
+
console.log(` ${date.padEnd(24)} ${files} ${tok} ${red} ${over}`);
|
|
8533
|
+
}
|
|
8534
|
+
console.log(bar);
|
|
8535
|
+
const tokens = last.map((e) => e.finalTokens || 0);
|
|
8536
|
+
console.log(` Token trend: ${sparkline(tokens)}`);
|
|
8537
|
+
}
|
|
8538
|
+
|
|
8539
|
+
// Show benchmark trend row if .context/benchmark-history.ndjson exists
|
|
8540
|
+
const benchHistPath = path.join(cwd, '.context', 'benchmark-history.ndjson');
|
|
8541
|
+
if (fs.existsSync(benchHistPath)) {
|
|
8542
|
+
try {
|
|
8543
|
+
const benchEntries = fs.readFileSync(benchHistPath, 'utf8').trim().split('\n')
|
|
8544
|
+
.map((l) => { try { return JSON.parse(l); } catch (_) { return null; } }).filter(Boolean);
|
|
8545
|
+
const retrieval = benchEntries.filter((e) => e.type === 'retrieval').slice(-n);
|
|
8546
|
+
if (retrieval.length > 0) {
|
|
8547
|
+
const hits = retrieval.map((e) => e.hitAt5Pct || 0);
|
|
8548
|
+
console.log(` hit@5 trend: ${sparkline(hits)} ${hits.at(-1)}% (latest)`);
|
|
8549
|
+
}
|
|
8550
|
+
const tokenBench = benchEntries.filter((e) => e.type === 'token-reduction').slice(-n);
|
|
8551
|
+
if (tokenBench.length > 0) {
|
|
8552
|
+
const reds = tokenBench.map((e) => e.reduction || e.avgReductionPct || 0);
|
|
8553
|
+
console.log(` tok reduce : ${sparkline(reds)} ${reds.at(-1)}% (latest)`);
|
|
8554
|
+
}
|
|
8555
|
+
} catch (_) {}
|
|
8556
|
+
}
|
|
8557
|
+
|
|
8558
|
+
console.log(bar);
|
|
8559
|
+
process.exit(0);
|
|
8560
|
+
}
|
|
8561
|
+
|
|
8316
8562
|
// Feature 6: `sigmap sync` — write all outputs + llms.txt + print compact diff
|
|
8317
8563
|
if (args[0] === 'sync') {
|
|
8318
8564
|
try {
|
package/package.json
CHANGED
package/src/config/loader.js
CHANGED
|
@@ -4,6 +4,65 @@ const fs = require('fs');
|
|
|
4
4
|
const path = require('path');
|
|
5
5
|
const { DEFAULTS } = require('./defaults');
|
|
6
6
|
|
|
7
|
+
const BASE_CONFIG_TTL_MS = 60 * 60 * 1000; // 1 hour
|
|
8
|
+
|
|
9
|
+
function loadBaseConfig(extendsVal, cwd) {
|
|
10
|
+
if (!extendsVal || typeof extendsVal !== 'string') return {};
|
|
11
|
+
|
|
12
|
+
if (extendsVal.startsWith('https://') || extendsVal.startsWith('http://')) {
|
|
13
|
+
const cacheDir = path.join(cwd, '.context', 'config-cache');
|
|
14
|
+
const cacheKey = Buffer.from(extendsVal).toString('base64url').replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
15
|
+
const cachePath = path.join(cacheDir, `${cacheKey}.json`);
|
|
16
|
+
|
|
17
|
+
if (fs.existsSync(cachePath)) {
|
|
18
|
+
const age = Date.now() - fs.statSync(cachePath).mtimeMs;
|
|
19
|
+
if (age < BASE_CONFIG_TTL_MS) {
|
|
20
|
+
try { return JSON.parse(fs.readFileSync(cachePath, 'utf8')); } catch (_) {}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
try {
|
|
25
|
+
const https = require('https');
|
|
26
|
+
const http = require('http');
|
|
27
|
+
const mod = extendsVal.startsWith('https://') ? https : http;
|
|
28
|
+
const raw = (() => {
|
|
29
|
+
let data = '';
|
|
30
|
+
return new Promise((resolve, reject) => {
|
|
31
|
+
mod.get(extendsVal, (res) => {
|
|
32
|
+
res.on('data', (c) => { data += c; });
|
|
33
|
+
res.on('end', () => resolve(data));
|
|
34
|
+
}).on('error', reject);
|
|
35
|
+
});
|
|
36
|
+
})();
|
|
37
|
+
// sync fallback: use execSync with node -e
|
|
38
|
+
const { execSync } = require('child_process');
|
|
39
|
+
const out = execSync(
|
|
40
|
+
`node -e "const h=require('${extendsVal.startsWith('https') ? 'https' : 'http'}');let d='';h.get(${JSON.stringify(extendsVal)},r=>{r.on('data',c=>d+=c);r.on('end',()=>process.stdout.write(d))}).on('error',()=>process.exit(1))"`,
|
|
41
|
+
{ timeout: 10000, encoding: 'utf8' }
|
|
42
|
+
);
|
|
43
|
+
const parsed = JSON.parse(out);
|
|
44
|
+
if (!fs.existsSync(cacheDir)) fs.mkdirSync(cacheDir, { recursive: true });
|
|
45
|
+
fs.writeFileSync(cachePath, JSON.stringify(parsed), 'utf8');
|
|
46
|
+
return parsed;
|
|
47
|
+
} catch (err) {
|
|
48
|
+
process.stderr.write(`[sigmap] config extends: could not fetch ${extendsVal}: ${err.message}\n`);
|
|
49
|
+
if (fs.existsSync(cachePath)) {
|
|
50
|
+
try { return JSON.parse(fs.readFileSync(cachePath, 'utf8')); } catch (_) {}
|
|
51
|
+
}
|
|
52
|
+
return {};
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Local file path
|
|
57
|
+
const absPath = path.resolve(cwd, extendsVal);
|
|
58
|
+
try {
|
|
59
|
+
return JSON.parse(fs.readFileSync(absPath, 'utf8'));
|
|
60
|
+
} catch (err) {
|
|
61
|
+
process.stderr.write(`[sigmap] config extends: could not load ${absPath}: ${err.message}\n`);
|
|
62
|
+
return {};
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
7
66
|
// Keys that are valid in gen-context.config.json
|
|
8
67
|
const KNOWN_KEYS = new Set(Object.keys(DEFAULTS));
|
|
9
68
|
|
|
@@ -173,17 +232,30 @@ function loadConfig(cwd) {
|
|
|
173
232
|
|
|
174
233
|
// Warn on unknown keys (helps catch typos)
|
|
175
234
|
for (const key of Object.keys(userConfig)) {
|
|
176
|
-
if (key.startsWith('_')
|
|
235
|
+
if (key.startsWith('_') || key === 'extends') continue;
|
|
177
236
|
if (!KNOWN_KEYS.has(key)) {
|
|
178
237
|
console.warn(`[sigmap] unknown config key: "${key}" (ignored)`);
|
|
179
238
|
}
|
|
180
239
|
}
|
|
181
240
|
|
|
182
|
-
// Deep merge:
|
|
183
|
-
|
|
241
|
+
// Deep merge: DEFAULTS → base (extends) → user config
|
|
242
|
+
const baseConfig = loadBaseConfig(userConfig.extends, cwd);
|
|
184
243
|
const merged = deepClone(DEFAULTS);
|
|
244
|
+
|
|
245
|
+
for (const key of Object.keys(baseConfig)) {
|
|
246
|
+
if (key.startsWith('_') || key === 'extends') continue;
|
|
247
|
+
if (!KNOWN_KEYS.has(key)) continue;
|
|
248
|
+
const val = baseConfig[key];
|
|
249
|
+
if (val !== null && typeof val === 'object' && !Array.isArray(val) &&
|
|
250
|
+
typeof merged[key] === 'object' && !Array.isArray(merged[key])) {
|
|
251
|
+
merged[key] = Object.assign({}, merged[key], val);
|
|
252
|
+
} else {
|
|
253
|
+
merged[key] = val;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
185
257
|
for (const key of Object.keys(userConfig)) {
|
|
186
|
-
if (key.startsWith('_')) continue;
|
|
258
|
+
if (key.startsWith('_') || key === 'extends') continue;
|
|
187
259
|
if (!KNOWN_KEYS.has(key)) continue; // skip unknown keys
|
|
188
260
|
const val = userConfig[key];
|
|
189
261
|
if (val !== null && typeof val === 'object' && !Array.isArray(val) &&
|
|
@@ -214,4 +286,4 @@ function deepClone(obj) {
|
|
|
214
286
|
return JSON.parse(JSON.stringify(obj));
|
|
215
287
|
}
|
|
216
288
|
|
|
217
|
-
module.exports = { loadConfig };
|
|
289
|
+
module.exports = { loadConfig, loadBaseConfig };
|
package/src/format/dashboard.js
CHANGED
|
@@ -140,6 +140,26 @@ function computeExtractorCoverage(cwd) {
|
|
|
140
140
|
}
|
|
141
141
|
|
|
142
142
|
function readBenchmarkTrend(cwd) {
|
|
143
|
+
// Prefer per-user history file written by benchmark scripts
|
|
144
|
+
const histPath = path.join(cwd, '.context', 'benchmark-history.ndjson');
|
|
145
|
+
if (fs.existsSync(histPath)) {
|
|
146
|
+
const values = [];
|
|
147
|
+
try {
|
|
148
|
+
const lines = fs.readFileSync(histPath, 'utf8').trim().split('\n').filter(Boolean);
|
|
149
|
+
for (const line of lines) {
|
|
150
|
+
try {
|
|
151
|
+
const obj = JSON.parse(line);
|
|
152
|
+
if (obj.type === 'retrieval') {
|
|
153
|
+
const v = toNumber(obj.hitAt5Pct);
|
|
154
|
+
if (v !== null) values.push(v);
|
|
155
|
+
}
|
|
156
|
+
} catch (_) {}
|
|
157
|
+
}
|
|
158
|
+
} catch (_) {}
|
|
159
|
+
if (values.length > 0) return values.slice(-30);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Fallback: legacy benchmarks/results directory (CI artifacts)
|
|
143
163
|
const resultDir = path.join(cwd, 'benchmarks', 'results');
|
|
144
164
|
if (!fs.existsSync(resultDir)) return [];
|
|
145
165
|
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const STOP = new Set([
|
|
4
|
+
'the','a','an','in','on','at','to','of','for','and','or','but',
|
|
5
|
+
'is','are','was','were','be','been','being','have','has','had',
|
|
6
|
+
'do','does','did','will','would','could','should','may','might',
|
|
7
|
+
'shall','can','not','with','from','by','as','this','that','it',
|
|
8
|
+
]);
|
|
9
|
+
|
|
10
|
+
function tokenize(text) {
|
|
11
|
+
return (text || '').toLowerCase().match(/\b[a-z][a-z0-9_]{2,}\b/g) || [];
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function groundedness(response, context) {
|
|
15
|
+
if (!response || !context) return 0;
|
|
16
|
+
const ctxTokens = new Set(tokenize(context).filter((t) => !STOP.has(t)));
|
|
17
|
+
if (ctxTokens.size === 0) return 0;
|
|
18
|
+
const respTokens = tokenize(response).filter((t) => !STOP.has(t));
|
|
19
|
+
if (respTokens.length === 0) return 0;
|
|
20
|
+
const matched = respTokens.filter((t) => ctxTokens.has(t));
|
|
21
|
+
return parseFloat((matched.length / respTokens.length).toFixed(3));
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const GENERIC_MARKERS = [
|
|
25
|
+
'however, based on my knowledge',
|
|
26
|
+
'generally speaking',
|
|
27
|
+
'in general',
|
|
28
|
+
'typically,',
|
|
29
|
+
'usually,',
|
|
30
|
+
'as a general rule',
|
|
31
|
+
];
|
|
32
|
+
|
|
33
|
+
function judge(response, context, opts = {}) {
|
|
34
|
+
const score = groundedness(response, context);
|
|
35
|
+
const threshold = opts.threshold !== undefined ? opts.threshold : 0.25;
|
|
36
|
+
const reasons = [];
|
|
37
|
+
|
|
38
|
+
if (score < threshold) {
|
|
39
|
+
reasons.push(`score ${score} is below threshold ${threshold} — response may not be grounded in context`);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (response) {
|
|
43
|
+
const lower = response.toLowerCase();
|
|
44
|
+
for (const m of GENERIC_MARKERS) {
|
|
45
|
+
if (lower.includes(m)) {
|
|
46
|
+
reasons.push(`response contains generic phrase: "${m}"`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const verdict = score >= threshold && reasons.length === 0 ? 'pass' : 'fail';
|
|
52
|
+
return { score, verdict, reasons };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
module.exports = { groundedness, judge };
|
package/src/mcp/server.js
CHANGED