sigmap 7.14.0 → 7.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/gen-context.js +199 -2
- package/llms-full.txt +1 -1
- package/llms.txt +1 -1
- package/package.json +3 -2
- package/packages/cli/package.json +1 -1
- package/packages/core/package.json +1 -1
- package/src/conventions/ci.js +48 -0
- package/src/eval/llm-ablation.js +113 -0
- package/src/mcp/server.js +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -10,6 +10,24 @@ Format: [Semantic Versioning](https://semver.org/)
|
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
13
|
+
## [7.16.0] — 2026-06-18
|
|
14
|
+
|
|
15
|
+
Minor release — LLM A/B hallucination ablation harness (grounded codegen, IMPL §9).
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
- **LLM A/B hallucination ablation harness (#325):** the honest measurement behind the grounded-codegen plan (IMPL §9). Runs a model twice per task — (A) no SigMap context, (B) with SigMap grounding — pipes both outputs through the hallucination guard, and reports the measured delta in flagged codebase-fact errors. New zero-dependency, bundle-safe `src/eval/llm-ablation.js` (`buildGrounding`, `scoreAnswer`, `runAblation`) keeps the model call **injected**, so the harness is fully offline-testable; the live runner `scripts/run-llm-ablation.mjs` wires Anthropic via `ANTHROPIC_API_KEY` and prints the A/B table + delta (`npm run benchmark:llm-ablation`), degrading to a graceful skip (exit 0) when no key is set. The network fetch is confined to `scripts/`, never the published library surface. Starter corpus in `benchmarks/llm-ablation-tasks.json`. This turns §9 from an offline coverage proxy into a ready-to-run real A/B — the moment a key is present, it produces the measured hallucination delta.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## [7.15.0] — 2026-06-18
|
|
23
|
+
|
|
24
|
+
Minor release — `sigmap conventions --ci` (grounded codegen, Layer 3 polish).
|
|
25
|
+
|
|
26
|
+
### Added
|
|
27
|
+
- **`sigmap conventions --ci` — gate CI on convention consistency (#322):** completes the consistency-tracking story started by `--report` (v7.14.0). A CI gate that fails when a repo's overall convention consistency falls below a threshold (`--min`, default 0.70), and — with `--no-regress` — also fails when the score dropped vs the last recorded snapshot (best-effort). New zero-dependency, bundle-safe `src/conventions/ci.js` (`ciGate`) reuses `overallScore`; the command is read-only (reads the last `.context/conventions-history.ndjson` snapshot for `--no-regress`, never appends) and exits non-zero on failure, so it drops straight into CI. `--json` for machine output. The remaining `conventions` flags (`--fix`, `--update`) and the §9 LLM A/B benchmark are follow-ups.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
13
31
|
## [7.14.0] — 2026-06-17
|
|
14
32
|
|
|
15
33
|
Minor release — `sigmap conventions --report` (grounded codegen, Layer 3 polish).
|
package/gen-context.js
CHANGED
|
@@ -30,6 +30,175 @@ function __require(key) {
|
|
|
30
30
|
// ── ./src/review/review-pr ──
|
|
31
31
|
// ── ./src/create/orchestrate ──
|
|
32
32
|
// ── ./src/conventions/report ──
|
|
33
|
+
// ── ./src/conventions/ci ──
|
|
34
|
+
// ── ./src/eval/llm-ablation ──
|
|
35
|
+
__factories["./src/eval/llm-ablation"] = function(module, exports) {
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* LLM A/B hallucination ablation (IMPL.md §9) — the honest measurement.
|
|
39
|
+
*
|
|
40
|
+
* Runs a model twice per task — (A) no SigMap context, (B) with SigMap
|
|
41
|
+
* grounding — pipes both outputs through the hallucination guard, and reports
|
|
42
|
+
* the measured delta in flagged codebase-fact errors. The model call is
|
|
43
|
+
* INJECTED (`complete(prompt) → text`), so the harness itself is pure and
|
|
44
|
+
* offline-testable; the live model adapter lives in `scripts/run-llm-ablation.mjs`.
|
|
45
|
+
* Zero-dependency, bundle-safe (no network here).
|
|
46
|
+
*/
|
|
47
|
+
|
|
48
|
+
const { verify } = __require('./src/verify/hallucination-guard');
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Build the SigMap grounding block for a repo — what we prepend to a task
|
|
52
|
+
* prompt in arm B. Conventions (the house style) + the known-symbol list
|
|
53
|
+
* (so the model can reference real names instead of guessing).
|
|
54
|
+
* @param {string} cwd
|
|
55
|
+
* @param {object} [opts]
|
|
56
|
+
* @param {number} [opts.maxSymbols=80]
|
|
57
|
+
* @returns {string}
|
|
58
|
+
*/
|
|
59
|
+
function buildGrounding(cwd, opts = {}) {
|
|
60
|
+
const maxSymbols = opts.maxSymbols != null ? opts.maxSymbols : 80;
|
|
61
|
+
const parts = [];
|
|
62
|
+
|
|
63
|
+
try {
|
|
64
|
+
const { extractConventions } = __require('./src/conventions/extract');
|
|
65
|
+
const { renderConventionsBlock } = __require('./src/conventions/inject');
|
|
66
|
+
const { loadConfig } = __require('./src/config/loader');
|
|
67
|
+
let files = [];
|
|
68
|
+
try {
|
|
69
|
+
const cfg = loadConfig(cwd);
|
|
70
|
+
const { buildSigIndex } = __require('./src/retrieval/ranker');
|
|
71
|
+
files = [...buildSigIndex(cwd).keys()];
|
|
72
|
+
void cfg;
|
|
73
|
+
} catch (_) {}
|
|
74
|
+
const conv = extractConventions(cwd, files);
|
|
75
|
+
parts.push(renderConventionsBlock(conv));
|
|
76
|
+
} catch (_) {}
|
|
77
|
+
|
|
78
|
+
try {
|
|
79
|
+
const { buildSymbolSet } = __require('./src/verify/hallucination-guard');
|
|
80
|
+
const { set } = buildSymbolSet(cwd);
|
|
81
|
+
const names = [...set].slice(0, maxSymbols);
|
|
82
|
+
if (names.length) parts.push(`## Known symbols (reference these exactly)\n${names.join(', ')}`);
|
|
83
|
+
} catch (_) {}
|
|
84
|
+
|
|
85
|
+
return parts.join('\n\n');
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Count flagged codebase-fact errors in an answer (the §9 metric).
|
|
90
|
+
* @param {string} answerText
|
|
91
|
+
* @param {string} cwd
|
|
92
|
+
* @returns {number}
|
|
93
|
+
*/
|
|
94
|
+
function scoreAnswer(answerText, cwd) {
|
|
95
|
+
try {
|
|
96
|
+
const { summary } = verify(String(answerText || ''), cwd);
|
|
97
|
+
return summary.total || 0;
|
|
98
|
+
} catch (_) {
|
|
99
|
+
return 0;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Run the A/B ablation over a task corpus.
|
|
105
|
+
* @param {Array<{id:string, prompt:string}>} tasks
|
|
106
|
+
* @param {string} cwd
|
|
107
|
+
* @param {(prompt:string, meta:object)=>string} complete injected model call
|
|
108
|
+
* @param {object} [opts]
|
|
109
|
+
* @param {string} [opts.grounding] precomputed grounding (else built from cwd)
|
|
110
|
+
* @returns {{ tasks: object[], aggregate: object }}
|
|
111
|
+
*/
|
|
112
|
+
function runAblation(tasks, cwd, complete, opts = {}) {
|
|
113
|
+
const grounding = opts.grounding != null ? opts.grounding : buildGrounding(cwd);
|
|
114
|
+
const rows = [];
|
|
115
|
+
let sumA = 0;
|
|
116
|
+
let sumB = 0;
|
|
117
|
+
|
|
118
|
+
for (const task of tasks || []) {
|
|
119
|
+
const basePrompt = task.prompt || '';
|
|
120
|
+
const groundedPrompt = grounding ? `${grounding}\n\n---\n\n${basePrompt}` : basePrompt;
|
|
121
|
+
|
|
122
|
+
const outA = String(complete(basePrompt, { id: task.id, grounded: false }) || '');
|
|
123
|
+
const outB = String(complete(groundedPrompt, { id: task.id, grounded: true }) || '');
|
|
124
|
+
|
|
125
|
+
const aFlagged = scoreAnswer(outA, cwd);
|
|
126
|
+
const bFlagged = scoreAnswer(outB, cwd);
|
|
127
|
+
sumA += aFlagged;
|
|
128
|
+
sumB += bFlagged;
|
|
129
|
+
rows.push({ id: task.id, aFlagged, bFlagged });
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const n = rows.length;
|
|
133
|
+
const per100 = (sum) => (n > 0 ? (sum / n) * 100 : 0);
|
|
134
|
+
return {
|
|
135
|
+
tasks: rows,
|
|
136
|
+
aggregate: {
|
|
137
|
+
n,
|
|
138
|
+
withoutFlagged: sumA,
|
|
139
|
+
withFlagged: sumB,
|
|
140
|
+
delta: sumA - sumB,
|
|
141
|
+
withoutPer100: per100(sumA),
|
|
142
|
+
withPer100: per100(sumB),
|
|
143
|
+
},
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
module.exports = { buildGrounding, scoreAnswer, runAblation };
|
|
148
|
+
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
__factories["./src/conventions/ci"] = function(module, exports) {
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Convention CI gate (IMPL.md §4 — `conventions --ci`).
|
|
155
|
+
*
|
|
156
|
+
* Fails CI when a repo's overall convention consistency is below a threshold,
|
|
157
|
+
* and optionally when it regresses vs the last recorded run. Builds on the
|
|
158
|
+
* `--report` score. Pure, zero-dependency, bundle-safe.
|
|
159
|
+
*/
|
|
160
|
+
|
|
161
|
+
const { overallScore } = __require('./src/conventions/report');
|
|
162
|
+
|
|
163
|
+
const DEFAULT_MIN = 0.7;
|
|
164
|
+
const EPS = 1e-9;
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Evaluate the consistency gate.
|
|
168
|
+
* @param {object} result an `extractConventions` result
|
|
169
|
+
* @param {object} [opts]
|
|
170
|
+
* @param {number} [opts.min=0.7] minimum overall consistency (0–1)
|
|
171
|
+
* @param {boolean} [opts.noRegress=false] also fail if the score dropped vs prior
|
|
172
|
+
* @param {object|null} [prior] the previous snapshot (from `report.snapshot`)
|
|
173
|
+
* @returns {{ score:number, min:number, ok:boolean, regressed:boolean, reasons:string[] }}
|
|
174
|
+
*/
|
|
175
|
+
function ciGate(result, opts = {}, prior = null) {
|
|
176
|
+
const min = opts.min != null ? opts.min : DEFAULT_MIN;
|
|
177
|
+
const score = overallScore(result);
|
|
178
|
+
const reasons = [];
|
|
179
|
+
let ok = true;
|
|
180
|
+
|
|
181
|
+
if (score < min) {
|
|
182
|
+
ok = false;
|
|
183
|
+
reasons.push(`consistency ${(score * 100).toFixed(0)}% below min ${(min * 100).toFixed(0)}%`);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
let regressed = false;
|
|
187
|
+
if (opts.noRegress && prior && typeof prior.score === 'number') {
|
|
188
|
+
if (score < prior.score - EPS) {
|
|
189
|
+
regressed = true;
|
|
190
|
+
ok = false;
|
|
191
|
+
reasons.push(`consistency dropped ${(prior.score * 100).toFixed(0)}% → ${(score * 100).toFixed(0)}%`);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return { score, min, ok, regressed, reasons };
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
module.exports = { ciGate, DEFAULT_MIN };
|
|
199
|
+
|
|
200
|
+
};
|
|
201
|
+
|
|
33
202
|
__factories["./src/conventions/report"] = function(module, exports) {
|
|
34
203
|
|
|
35
204
|
/**
|
|
@@ -7538,7 +7707,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
|
|
|
7538
7707
|
|
|
7539
7708
|
const SERVER_INFO = {
|
|
7540
7709
|
name: 'sigmap',
|
|
7541
|
-
version: '7.
|
|
7710
|
+
version: '7.16.0',
|
|
7542
7711
|
description: 'SigMap MCP server — code signatures on demand',
|
|
7543
7712
|
};
|
|
7544
7713
|
|
|
@@ -13216,7 +13385,7 @@ function __tryGit(args, opts = {}) {
|
|
|
13216
13385
|
catch (_) { return ''; }
|
|
13217
13386
|
}
|
|
13218
13387
|
|
|
13219
|
-
const VERSION = '7.
|
|
13388
|
+
const VERSION = '7.16.0';
|
|
13220
13389
|
const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
|
|
13221
13390
|
|
|
13222
13391
|
function requireSourceOrBundled(key) {
|
|
@@ -16411,6 +16580,34 @@ function main() {
|
|
|
16411
16580
|
process.exit(0);
|
|
16412
16581
|
}
|
|
16413
16582
|
|
|
16583
|
+
// `--ci`: gate — fail when overall consistency is below a threshold (or regresses).
|
|
16584
|
+
if (args.includes('--ci')) {
|
|
16585
|
+
const { ciGate } = requireSourceOrBundled('./src/conventions/ci');
|
|
16586
|
+
const minIdx = args.indexOf('--min');
|
|
16587
|
+
const min = minIdx !== -1 && args[minIdx + 1] ? parseFloat(args[minIdx + 1]) : undefined;
|
|
16588
|
+
const noRegress = args.includes('--no-regress');
|
|
16589
|
+
let prior = null;
|
|
16590
|
+
if (noRegress) {
|
|
16591
|
+
try {
|
|
16592
|
+
const lines = fs.readFileSync(path.join(cwd, '.context', 'conventions-history.ndjson'), 'utf8').split('\n').filter(Boolean);
|
|
16593
|
+
if (lines.length) prior = JSON.parse(lines[lines.length - 1]);
|
|
16594
|
+
} catch (_) {}
|
|
16595
|
+
}
|
|
16596
|
+
const gate = ciGate(result, { min, noRegress }, prior);
|
|
16597
|
+
if (jsonOut) {
|
|
16598
|
+
process.stdout.write(JSON.stringify(gate) + '\n');
|
|
16599
|
+
process.exit(gate.ok ? 0 : 1);
|
|
16600
|
+
}
|
|
16601
|
+
const pctC = (n) => `${(n * 100).toFixed(0)}%`;
|
|
16602
|
+
if (gate.ok) {
|
|
16603
|
+
console.log(`[sigmap] conventions --ci ✓ PASS — consistency ${pctC(gate.score)} (min ${pctC(gate.min)})`);
|
|
16604
|
+
process.exit(0);
|
|
16605
|
+
}
|
|
16606
|
+
console.log(`[sigmap] conventions --ci ✗ FAIL — consistency ${pctC(gate.score)} (min ${pctC(gate.min)})`);
|
|
16607
|
+
for (const r of gate.reasons) console.log(` • ${r}`);
|
|
16608
|
+
process.exit(1);
|
|
16609
|
+
}
|
|
16610
|
+
|
|
16414
16611
|
// `--report`: consistency audit + score + trend vs the last run.
|
|
16415
16612
|
if (args.includes('--report')) {
|
|
16416
16613
|
const { scoreReport, snapshot } = requireSourceOrBundled('./src/conventions/report');
|
package/llms-full.txt
CHANGED
|
@@ -9,7 +9,7 @@ the files relevant to the task — cutting tokens ~97% while keeping answers
|
|
|
9
9
|
grounded. Deterministic, offline, no embeddings or vector database. Works with
|
|
10
10
|
Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
|
|
11
11
|
|
|
12
|
-
# Version: 7.
|
|
12
|
+
# Version: 7.16.0 | Benchmark: sigmap-v7.0-main (2026-06-14)
|
|
13
13
|
# Source: auto-generated from package.json, version.json, src/mcp/tools.js, src/config/defaults.js
|
|
14
14
|
# Regenerate: npm run generate:llms | Validate: npm run validate:llms
|
|
15
15
|
|
package/llms.txt
CHANGED
|
@@ -9,7 +9,7 @@ the files relevant to the task — cutting tokens ~97% while keeping answers
|
|
|
9
9
|
grounded. Deterministic, offline, no embeddings or vector database. Works with
|
|
10
10
|
Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
|
|
11
11
|
|
|
12
|
-
# Version: 7.
|
|
12
|
+
# Version: 7.16.0 | Benchmark: sigmap-v7.0-main (2026-06-14)
|
|
13
13
|
# Source: auto-generated from package.json, version.json, src/mcp/tools.js, src/config/defaults.js
|
|
14
14
|
# Regenerate: npm run generate:llms | Validate: npm run validate:llms
|
|
15
15
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sigmap",
|
|
3
|
-
"version": "7.
|
|
3
|
+
"version": "7.16.0",
|
|
4
4
|
"description": "97% token reduction for AI coding. Extracts function & class signatures with TF-IDF ranking to feed only the right files to Claude, Cursor, Copilot, Aider, Windsurf, local LLMs & MCP. Zero dependencies, runs offline via npx.",
|
|
5
5
|
"main": "packages/core/index.js",
|
|
6
6
|
"exports": {
|
|
@@ -39,7 +39,8 @@
|
|
|
39
39
|
"generate:llms": "node scripts/generate-llms.mjs",
|
|
40
40
|
"validate:llms": "node scripts/validate-llms.mjs",
|
|
41
41
|
"prepublishOnly": "node scripts/check-bundle.mjs && node scripts/check-version-meta.mjs && node scripts/generate-llms.mjs",
|
|
42
|
-
"benchmark:grounding": "node scripts/run-hallucination-benchmark.mjs"
|
|
42
|
+
"benchmark:grounding": "node scripts/run-hallucination-benchmark.mjs",
|
|
43
|
+
"benchmark:llm-ablation": "node scripts/run-llm-ablation.mjs"
|
|
43
44
|
},
|
|
44
45
|
"files": [
|
|
45
46
|
"gen-context.js",
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Convention CI gate (IMPL.md §4 — `conventions --ci`).
|
|
5
|
+
*
|
|
6
|
+
* Fails CI when a repo's overall convention consistency is below a threshold,
|
|
7
|
+
* and optionally when it regresses vs the last recorded run. Builds on the
|
|
8
|
+
* `--report` score. Pure, zero-dependency, bundle-safe.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
const { overallScore } = require('./report');
|
|
12
|
+
|
|
13
|
+
const DEFAULT_MIN = 0.7;
|
|
14
|
+
const EPS = 1e-9;
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Evaluate the consistency gate.
|
|
18
|
+
* @param {object} result an `extractConventions` result
|
|
19
|
+
* @param {object} [opts]
|
|
20
|
+
* @param {number} [opts.min=0.7] minimum overall consistency (0–1)
|
|
21
|
+
* @param {boolean} [opts.noRegress=false] also fail if the score dropped vs prior
|
|
22
|
+
* @param {object|null} [prior] the previous snapshot (from `report.snapshot`)
|
|
23
|
+
* @returns {{ score:number, min:number, ok:boolean, regressed:boolean, reasons:string[] }}
|
|
24
|
+
*/
|
|
25
|
+
function ciGate(result, opts = {}, prior = null) {
|
|
26
|
+
const min = opts.min != null ? opts.min : DEFAULT_MIN;
|
|
27
|
+
const score = overallScore(result);
|
|
28
|
+
const reasons = [];
|
|
29
|
+
let ok = true;
|
|
30
|
+
|
|
31
|
+
if (score < min) {
|
|
32
|
+
ok = false;
|
|
33
|
+
reasons.push(`consistency ${(score * 100).toFixed(0)}% below min ${(min * 100).toFixed(0)}%`);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
let regressed = false;
|
|
37
|
+
if (opts.noRegress && prior && typeof prior.score === 'number') {
|
|
38
|
+
if (score < prior.score - EPS) {
|
|
39
|
+
regressed = true;
|
|
40
|
+
ok = false;
|
|
41
|
+
reasons.push(`consistency dropped ${(prior.score * 100).toFixed(0)}% → ${(score * 100).toFixed(0)}%`);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return { score, min, ok, regressed, reasons };
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
module.exports = { ciGate, DEFAULT_MIN };
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* LLM A/B hallucination ablation (IMPL.md §9) — the honest measurement.
|
|
5
|
+
*
|
|
6
|
+
* Runs a model twice per task — (A) no SigMap context, (B) with SigMap
|
|
7
|
+
* grounding — pipes both outputs through the hallucination guard, and reports
|
|
8
|
+
* the measured delta in flagged codebase-fact errors. The model call is
|
|
9
|
+
* INJECTED (`complete(prompt) → text`), so the harness itself is pure and
|
|
10
|
+
* offline-testable; the live model adapter lives in `scripts/run-llm-ablation.mjs`.
|
|
11
|
+
* Zero-dependency, bundle-safe (no network here).
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const { verify } = require('../verify/hallucination-guard');
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Build the SigMap grounding block for a repo — what we prepend to a task
|
|
18
|
+
* prompt in arm B. Conventions (the house style) + the known-symbol list
|
|
19
|
+
* (so the model can reference real names instead of guessing).
|
|
20
|
+
* @param {string} cwd
|
|
21
|
+
* @param {object} [opts]
|
|
22
|
+
* @param {number} [opts.maxSymbols=80]
|
|
23
|
+
* @returns {string}
|
|
24
|
+
*/
|
|
25
|
+
function buildGrounding(cwd, opts = {}) {
|
|
26
|
+
const maxSymbols = opts.maxSymbols != null ? opts.maxSymbols : 80;
|
|
27
|
+
const parts = [];
|
|
28
|
+
|
|
29
|
+
try {
|
|
30
|
+
const { extractConventions } = require('../conventions/extract');
|
|
31
|
+
const { renderConventionsBlock } = require('../conventions/inject');
|
|
32
|
+
const { loadConfig } = require('../config/loader');
|
|
33
|
+
let files = [];
|
|
34
|
+
try {
|
|
35
|
+
const cfg = loadConfig(cwd);
|
|
36
|
+
const { buildSigIndex } = require('../retrieval/ranker');
|
|
37
|
+
files = [...buildSigIndex(cwd).keys()];
|
|
38
|
+
void cfg;
|
|
39
|
+
} catch (_) {}
|
|
40
|
+
const conv = extractConventions(cwd, files);
|
|
41
|
+
parts.push(renderConventionsBlock(conv));
|
|
42
|
+
} catch (_) {}
|
|
43
|
+
|
|
44
|
+
try {
|
|
45
|
+
const { buildSymbolSet } = require('../verify/hallucination-guard');
|
|
46
|
+
const { set } = buildSymbolSet(cwd);
|
|
47
|
+
const names = [...set].slice(0, maxSymbols);
|
|
48
|
+
if (names.length) parts.push(`## Known symbols (reference these exactly)\n${names.join(', ')}`);
|
|
49
|
+
} catch (_) {}
|
|
50
|
+
|
|
51
|
+
return parts.join('\n\n');
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Count flagged codebase-fact errors in an answer (the §9 metric).
|
|
56
|
+
* @param {string} answerText
|
|
57
|
+
* @param {string} cwd
|
|
58
|
+
* @returns {number}
|
|
59
|
+
*/
|
|
60
|
+
function scoreAnswer(answerText, cwd) {
|
|
61
|
+
try {
|
|
62
|
+
const { summary } = verify(String(answerText || ''), cwd);
|
|
63
|
+
return summary.total || 0;
|
|
64
|
+
} catch (_) {
|
|
65
|
+
return 0;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Run the A/B ablation over a task corpus.
|
|
71
|
+
* @param {Array<{id:string, prompt:string}>} tasks
|
|
72
|
+
* @param {string} cwd
|
|
73
|
+
* @param {(prompt:string, meta:object)=>string} complete injected model call
|
|
74
|
+
* @param {object} [opts]
|
|
75
|
+
* @param {string} [opts.grounding] precomputed grounding (else built from cwd)
|
|
76
|
+
* @returns {{ tasks: object[], aggregate: object }}
|
|
77
|
+
*/
|
|
78
|
+
function runAblation(tasks, cwd, complete, opts = {}) {
|
|
79
|
+
const grounding = opts.grounding != null ? opts.grounding : buildGrounding(cwd);
|
|
80
|
+
const rows = [];
|
|
81
|
+
let sumA = 0;
|
|
82
|
+
let sumB = 0;
|
|
83
|
+
|
|
84
|
+
for (const task of tasks || []) {
|
|
85
|
+
const basePrompt = task.prompt || '';
|
|
86
|
+
const groundedPrompt = grounding ? `${grounding}\n\n---\n\n${basePrompt}` : basePrompt;
|
|
87
|
+
|
|
88
|
+
const outA = String(complete(basePrompt, { id: task.id, grounded: false }) || '');
|
|
89
|
+
const outB = String(complete(groundedPrompt, { id: task.id, grounded: true }) || '');
|
|
90
|
+
|
|
91
|
+
const aFlagged = scoreAnswer(outA, cwd);
|
|
92
|
+
const bFlagged = scoreAnswer(outB, cwd);
|
|
93
|
+
sumA += aFlagged;
|
|
94
|
+
sumB += bFlagged;
|
|
95
|
+
rows.push({ id: task.id, aFlagged, bFlagged });
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const n = rows.length;
|
|
99
|
+
const per100 = (sum) => (n > 0 ? (sum / n) * 100 : 0);
|
|
100
|
+
return {
|
|
101
|
+
tasks: rows,
|
|
102
|
+
aggregate: {
|
|
103
|
+
n,
|
|
104
|
+
withoutFlagged: sumA,
|
|
105
|
+
withFlagged: sumB,
|
|
106
|
+
delta: sumA - sumB,
|
|
107
|
+
withoutPer100: per100(sumA),
|
|
108
|
+
withPer100: per100(sumB),
|
|
109
|
+
},
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
module.exports = { buildGrounding, scoreAnswer, runAblation };
|
package/src/mcp/server.js
CHANGED