sigmap 7.14.0 → 7.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -10,6 +10,24 @@ Format: [Semantic Versioning](https://semver.org/)
10
10
 
11
11
  ---
12
12
 
13
+ ## [7.16.0] — 2026-06-18
14
+
15
+ Minor release — LLM A/B hallucination ablation harness (grounded codegen, IMPL §9).
16
+
17
+ ### Added
18
+ - **LLM A/B hallucination ablation harness (#325):** the honest measurement behind the grounded-codegen plan (IMPL §9). Runs a model twice per task — (A) no SigMap context, (B) with SigMap grounding — pipes both outputs through the hallucination guard, and reports the measured delta in flagged codebase-fact errors. New zero-dependency, bundle-safe `src/eval/llm-ablation.js` (`buildGrounding`, `scoreAnswer`, `runAblation`) keeps the model call **injected**, so the harness is fully offline-testable; the live runner `scripts/run-llm-ablation.mjs` wires Anthropic via `ANTHROPIC_API_KEY` and prints the A/B table + delta (`npm run benchmark:llm-ablation`), degrading to a graceful skip (exit 0) when no key is set. The network fetch is confined to `scripts/`, never the published library surface. Starter corpus in `benchmarks/llm-ablation-tasks.json`. This turns §9 from an offline coverage proxy into a ready-to-run real A/B — the moment a key is present, it produces the measured hallucination delta.
19
+
20
+ ---
21
+
22
+ ## [7.15.0] — 2026-06-18
23
+
24
+ Minor release — `sigmap conventions --ci` (grounded codegen, Layer 3 polish).
25
+
26
+ ### Added
27
+ - **`sigmap conventions --ci` — gate CI on convention consistency (#322):** completes the consistency-tracking story started by `--report` (v7.14.0). A CI gate that fails when a repo's overall convention consistency falls below a threshold (`--min`, default 0.70), and — with `--no-regress` — also fails when the score dropped vs the last recorded snapshot (best-effort). New zero-dependency, bundle-safe `src/conventions/ci.js` (`ciGate`) reuses `overallScore`; the command is read-only (reads the last `.context/conventions-history.ndjson` snapshot for `--no-regress`, never appends) and exits non-zero on failure, so it drops straight into CI. `--json` for machine output. The remaining `conventions` flags (`--fix`, `--update`) and the §9 LLM A/B benchmark are follow-ups.
28
+
29
+ ---
30
+
13
31
  ## [7.14.0] — 2026-06-17
14
32
 
15
33
  Minor release — `sigmap conventions --report` (grounded codegen, Layer 3 polish).
package/gen-context.js CHANGED
@@ -30,6 +30,175 @@ function __require(key) {
30
30
  // ── ./src/review/review-pr ──
31
31
  // ── ./src/create/orchestrate ──
32
32
  // ── ./src/conventions/report ──
33
+ // ── ./src/conventions/ci ──
34
+ // ── ./src/eval/llm-ablation ──
35
+ __factories["./src/eval/llm-ablation"] = function(module, exports) {
36
+
37
+ /**
38
+ * LLM A/B hallucination ablation (IMPL.md §9) — the honest measurement.
39
+ *
40
+ * Runs a model twice per task — (A) no SigMap context, (B) with SigMap
41
+ * grounding — pipes both outputs through the hallucination guard, and reports
42
+ * the measured delta in flagged codebase-fact errors. The model call is
43
+ * INJECTED (`complete(prompt) → text`), so the harness itself is pure and
44
+ * offline-testable; the live model adapter lives in `scripts/run-llm-ablation.mjs`.
45
+ * Zero-dependency, bundle-safe (no network here).
46
+ */
47
+
48
+ const { verify } = __require('./src/verify/hallucination-guard');
49
+
50
+ /**
51
+ * Build the SigMap grounding block for a repo — what we prepend to a task
52
+ * prompt in arm B. Conventions (the house style) + the known-symbol list
53
+ * (so the model can reference real names instead of guessing).
54
+ * @param {string} cwd
55
+ * @param {object} [opts]
56
+ * @param {number} [opts.maxSymbols=80]
57
+ * @returns {string}
58
+ */
59
+ function buildGrounding(cwd, opts = {}) {
60
+ const maxSymbols = opts.maxSymbols != null ? opts.maxSymbols : 80;
61
+ const parts = [];
62
+
63
+ try {
64
+ const { extractConventions } = __require('./src/conventions/extract');
65
+ const { renderConventionsBlock } = __require('./src/conventions/inject');
66
+ const { loadConfig } = __require('./src/config/loader');
67
+ let files = [];
68
+ try {
69
+ const cfg = loadConfig(cwd);
70
+ const { buildSigIndex } = __require('./src/retrieval/ranker');
71
+ files = [...buildSigIndex(cwd).keys()];
72
+ void cfg;
73
+ } catch (_) {}
74
+ const conv = extractConventions(cwd, files);
75
+ parts.push(renderConventionsBlock(conv));
76
+ } catch (_) {}
77
+
78
+ try {
79
+ const { buildSymbolSet } = __require('./src/verify/hallucination-guard');
80
+ const { set } = buildSymbolSet(cwd);
81
+ const names = [...set].slice(0, maxSymbols);
82
+ if (names.length) parts.push(`## Known symbols (reference these exactly)\n${names.join(', ')}`);
83
+ } catch (_) {}
84
+
85
+ return parts.join('\n\n');
86
+ }
87
+
88
+ /**
89
+ * Count flagged codebase-fact errors in an answer (the §9 metric).
90
+ * @param {string} answerText
91
+ * @param {string} cwd
92
+ * @returns {number}
93
+ */
94
+ function scoreAnswer(answerText, cwd) {
95
+ try {
96
+ const { summary } = verify(String(answerText || ''), cwd);
97
+ return summary.total || 0;
98
+ } catch (_) {
99
+ return 0;
100
+ }
101
+ }
102
+
103
+ /**
104
+ * Run the A/B ablation over a task corpus.
105
+ * @param {Array<{id:string, prompt:string}>} tasks
106
+ * @param {string} cwd
107
+ * @param {(prompt:string, meta:object)=>string} complete injected model call
108
+ * @param {object} [opts]
109
+ * @param {string} [opts.grounding] precomputed grounding (else built from cwd)
110
+ * @returns {{ tasks: object[], aggregate: object }}
111
+ */
112
+ function runAblation(tasks, cwd, complete, opts = {}) {
113
+ const grounding = opts.grounding != null ? opts.grounding : buildGrounding(cwd);
114
+ const rows = [];
115
+ let sumA = 0;
116
+ let sumB = 0;
117
+
118
+ for (const task of tasks || []) {
119
+ const basePrompt = task.prompt || '';
120
+ const groundedPrompt = grounding ? `${grounding}\n\n---\n\n${basePrompt}` : basePrompt;
121
+
122
+ const outA = String(complete(basePrompt, { id: task.id, grounded: false }) || '');
123
+ const outB = String(complete(groundedPrompt, { id: task.id, grounded: true }) || '');
124
+
125
+ const aFlagged = scoreAnswer(outA, cwd);
126
+ const bFlagged = scoreAnswer(outB, cwd);
127
+ sumA += aFlagged;
128
+ sumB += bFlagged;
129
+ rows.push({ id: task.id, aFlagged, bFlagged });
130
+ }
131
+
132
+ const n = rows.length;
133
+ const per100 = (sum) => (n > 0 ? (sum / n) * 100 : 0);
134
+ return {
135
+ tasks: rows,
136
+ aggregate: {
137
+ n,
138
+ withoutFlagged: sumA,
139
+ withFlagged: sumB,
140
+ delta: sumA - sumB,
141
+ withoutPer100: per100(sumA),
142
+ withPer100: per100(sumB),
143
+ },
144
+ };
145
+ }
146
+
147
+ module.exports = { buildGrounding, scoreAnswer, runAblation };
148
+
149
+ };
150
+
151
+ __factories["./src/conventions/ci"] = function(module, exports) {
152
+
153
+ /**
154
+ * Convention CI gate (IMPL.md §4 — `conventions --ci`).
155
+ *
156
+ * Fails CI when a repo's overall convention consistency is below a threshold,
157
+ * and optionally when it regresses vs the last recorded run. Builds on the
158
+ * `--report` score. Pure, zero-dependency, bundle-safe.
159
+ */
160
+
161
+ const { overallScore } = __require('./src/conventions/report');
162
+
163
+ const DEFAULT_MIN = 0.7;
164
+ const EPS = 1e-9;
165
+
166
+ /**
167
+ * Evaluate the consistency gate.
168
+ * @param {object} result an `extractConventions` result
169
+ * @param {object} [opts]
170
+ * @param {number} [opts.min=0.7] minimum overall consistency (0–1)
171
+ * @param {boolean} [opts.noRegress=false] also fail if the score dropped vs prior
172
+ * @param {object|null} [prior] the previous snapshot (from `report.snapshot`)
173
+ * @returns {{ score:number, min:number, ok:boolean, regressed:boolean, reasons:string[] }}
174
+ */
175
+ function ciGate(result, opts = {}, prior = null) {
176
+ const min = opts.min != null ? opts.min : DEFAULT_MIN;
177
+ const score = overallScore(result);
178
+ const reasons = [];
179
+ let ok = true;
180
+
181
+ if (score < min) {
182
+ ok = false;
183
+ reasons.push(`consistency ${(score * 100).toFixed(0)}% below min ${(min * 100).toFixed(0)}%`);
184
+ }
185
+
186
+ let regressed = false;
187
+ if (opts.noRegress && prior && typeof prior.score === 'number') {
188
+ if (score < prior.score - EPS) {
189
+ regressed = true;
190
+ ok = false;
191
+ reasons.push(`consistency dropped ${(prior.score * 100).toFixed(0)}% → ${(score * 100).toFixed(0)}%`);
192
+ }
193
+ }
194
+
195
+ return { score, min, ok, regressed, reasons };
196
+ }
197
+
198
+ module.exports = { ciGate, DEFAULT_MIN };
199
+
200
+ };
201
+
33
202
  __factories["./src/conventions/report"] = function(module, exports) {
34
203
 
35
204
  /**
@@ -7538,7 +7707,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
7538
7707
 
7539
7708
  const SERVER_INFO = {
7540
7709
  name: 'sigmap',
7541
- version: '7.14.0',
7710
+ version: '7.16.0',
7542
7711
  description: 'SigMap MCP server — code signatures on demand',
7543
7712
  };
7544
7713
 
@@ -13216,7 +13385,7 @@ function __tryGit(args, opts = {}) {
13216
13385
  catch (_) { return ''; }
13217
13386
  }
13218
13387
 
13219
- const VERSION = '7.14.0';
13388
+ const VERSION = '7.16.0';
13220
13389
  const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
13221
13390
 
13222
13391
  function requireSourceOrBundled(key) {
@@ -16411,6 +16580,34 @@ function main() {
16411
16580
  process.exit(0);
16412
16581
  }
16413
16582
 
16583
+ // `--ci`: gate — fail when overall consistency is below a threshold (or regresses).
16584
+ if (args.includes('--ci')) {
16585
+ const { ciGate } = requireSourceOrBundled('./src/conventions/ci');
16586
+ const minIdx = args.indexOf('--min');
16587
+ const min = minIdx !== -1 && args[minIdx + 1] ? parseFloat(args[minIdx + 1]) : undefined;
16588
+ const noRegress = args.includes('--no-regress');
16589
+ let prior = null;
16590
+ if (noRegress) {
16591
+ try {
16592
+ const lines = fs.readFileSync(path.join(cwd, '.context', 'conventions-history.ndjson'), 'utf8').split('\n').filter(Boolean);
16593
+ if (lines.length) prior = JSON.parse(lines[lines.length - 1]);
16594
+ } catch (_) {}
16595
+ }
16596
+ const gate = ciGate(result, { min, noRegress }, prior);
16597
+ if (jsonOut) {
16598
+ process.stdout.write(JSON.stringify(gate) + '\n');
16599
+ process.exit(gate.ok ? 0 : 1);
16600
+ }
16601
+ const pctC = (n) => `${(n * 100).toFixed(0)}%`;
16602
+ if (gate.ok) {
16603
+ console.log(`[sigmap] conventions --ci ✓ PASS — consistency ${pctC(gate.score)} (min ${pctC(gate.min)})`);
16604
+ process.exit(0);
16605
+ }
16606
+ console.log(`[sigmap] conventions --ci ✗ FAIL — consistency ${pctC(gate.score)} (min ${pctC(gate.min)})`);
16607
+ for (const r of gate.reasons) console.log(` • ${r}`);
16608
+ process.exit(1);
16609
+ }
16610
+
16414
16611
  // `--report`: consistency audit + score + trend vs the last run.
16415
16612
  if (args.includes('--report')) {
16416
16613
  const { scoreReport, snapshot } = requireSourceOrBundled('./src/conventions/report');
package/llms-full.txt CHANGED
@@ -9,7 +9,7 @@ the files relevant to the task — cutting tokens ~97% while keeping answers
9
9
  grounded. Deterministic, offline, no embeddings or vector database. Works with
10
10
  Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
11
11
 
12
- # Version: 7.14.0 | Benchmark: sigmap-v7.0-main (2026-06-14)
12
+ # Version: 7.16.0 | Benchmark: sigmap-v7.0-main (2026-06-14)
13
13
  # Source: auto-generated from package.json, version.json, src/mcp/tools.js, src/config/defaults.js
14
14
  # Regenerate: npm run generate:llms | Validate: npm run validate:llms
15
15
 
package/llms.txt CHANGED
@@ -9,7 +9,7 @@ the files relevant to the task — cutting tokens ~97% while keeping answers
9
9
  grounded. Deterministic, offline, no embeddings or vector database. Works with
10
10
  Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
11
11
 
12
- # Version: 7.14.0 | Benchmark: sigmap-v7.0-main (2026-06-14)
12
+ # Version: 7.16.0 | Benchmark: sigmap-v7.0-main (2026-06-14)
13
13
  # Source: auto-generated from package.json, version.json, src/mcp/tools.js, src/config/defaults.js
14
14
  # Regenerate: npm run generate:llms | Validate: npm run validate:llms
15
15
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap",
3
- "version": "7.14.0",
3
+ "version": "7.16.0",
4
4
  "description": "97% token reduction for AI coding. Extracts function & class signatures with TF-IDF ranking to feed only the right files to Claude, Cursor, Copilot, Aider, Windsurf, local LLMs & MCP. Zero dependencies, runs offline via npx.",
5
5
  "main": "packages/core/index.js",
6
6
  "exports": {
@@ -39,7 +39,8 @@
39
39
  "generate:llms": "node scripts/generate-llms.mjs",
40
40
  "validate:llms": "node scripts/validate-llms.mjs",
41
41
  "prepublishOnly": "node scripts/check-bundle.mjs && node scripts/check-version-meta.mjs && node scripts/generate-llms.mjs",
42
- "benchmark:grounding": "node scripts/run-hallucination-benchmark.mjs"
42
+ "benchmark:grounding": "node scripts/run-hallucination-benchmark.mjs",
43
+ "benchmark:llm-ablation": "node scripts/run-llm-ablation.mjs"
43
44
  },
44
45
  "files": [
45
46
  "gen-context.js",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap-cli",
3
- "version": "7.14.0",
3
+ "version": "7.16.0",
4
4
  "description": "SigMap CLI wrapper — thin adapter for programmatic CLI invocation",
5
5
  "main": "index.js",
6
6
  "keywords": [
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap-core",
3
- "version": "7.14.0",
3
+ "version": "7.16.0",
4
4
  "description": "SigMap core library — zero-dependency code signature extraction, retrieval, and security scanning",
5
5
  "main": "index.js",
6
6
  "keywords": [
@@ -0,0 +1,48 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Convention CI gate (IMPL.md §4 — `conventions --ci`).
5
+ *
6
+ * Fails CI when a repo's overall convention consistency is below a threshold,
7
+ * and optionally when it regresses vs the last recorded run. Builds on the
8
+ * `--report` score. Pure, zero-dependency, bundle-safe.
9
+ */
10
+
11
+ const { overallScore } = require('./report');
12
+
13
+ const DEFAULT_MIN = 0.7;
14
+ const EPS = 1e-9;
15
+
16
+ /**
17
+ * Evaluate the consistency gate.
18
+ * @param {object} result an `extractConventions` result
19
+ * @param {object} [opts]
20
+ * @param {number} [opts.min=0.7] minimum overall consistency (0–1)
21
+ * @param {boolean} [opts.noRegress=false] also fail if the score dropped vs prior
22
+ * @param {object|null} [prior] the previous snapshot (from `report.snapshot`)
23
+ * @returns {{ score:number, min:number, ok:boolean, regressed:boolean, reasons:string[] }}
24
+ */
25
+ function ciGate(result, opts = {}, prior = null) {
26
+ const min = opts.min != null ? opts.min : DEFAULT_MIN;
27
+ const score = overallScore(result);
28
+ const reasons = [];
29
+ let ok = true;
30
+
31
+ if (score < min) {
32
+ ok = false;
33
+ reasons.push(`consistency ${(score * 100).toFixed(0)}% below min ${(min * 100).toFixed(0)}%`);
34
+ }
35
+
36
+ let regressed = false;
37
+ if (opts.noRegress && prior && typeof prior.score === 'number') {
38
+ if (score < prior.score - EPS) {
39
+ regressed = true;
40
+ ok = false;
41
+ reasons.push(`consistency dropped ${(prior.score * 100).toFixed(0)}% → ${(score * 100).toFixed(0)}%`);
42
+ }
43
+ }
44
+
45
+ return { score, min, ok, regressed, reasons };
46
+ }
47
+
48
+ module.exports = { ciGate, DEFAULT_MIN };
@@ -0,0 +1,113 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * LLM A/B hallucination ablation (IMPL.md §9) — the honest measurement.
5
+ *
6
+ * Runs a model twice per task — (A) no SigMap context, (B) with SigMap
7
+ * grounding — pipes both outputs through the hallucination guard, and reports
8
+ * the measured delta in flagged codebase-fact errors. The model call is
9
+ * INJECTED (`complete(prompt) → text`), so the harness itself is pure and
10
+ * offline-testable; the live model adapter lives in `scripts/run-llm-ablation.mjs`.
11
+ * Zero-dependency, bundle-safe (no network here).
12
+ */
13
+
14
+ const { verify } = require('../verify/hallucination-guard');
15
+
16
+ /**
17
+ * Build the SigMap grounding block for a repo — what we prepend to a task
18
+ * prompt in arm B. Conventions (the house style) + the known-symbol list
19
+ * (so the model can reference real names instead of guessing).
20
+ * @param {string} cwd
21
+ * @param {object} [opts]
22
+ * @param {number} [opts.maxSymbols=80]
23
+ * @returns {string}
24
+ */
25
+ function buildGrounding(cwd, opts = {}) {
26
+ const maxSymbols = opts.maxSymbols != null ? opts.maxSymbols : 80;
27
+ const parts = [];
28
+
29
+ try {
30
+ const { extractConventions } = require('../conventions/extract');
31
+ const { renderConventionsBlock } = require('../conventions/inject');
32
+ const { loadConfig } = require('../config/loader');
33
+ let files = [];
34
+ try {
35
+ const cfg = loadConfig(cwd);
36
+ const { buildSigIndex } = require('../retrieval/ranker');
37
+ files = [...buildSigIndex(cwd).keys()];
38
+ void cfg;
39
+ } catch (_) {}
40
+ const conv = extractConventions(cwd, files);
41
+ parts.push(renderConventionsBlock(conv));
42
+ } catch (_) {}
43
+
44
+ try {
45
+ const { buildSymbolSet } = require('../verify/hallucination-guard');
46
+ const { set } = buildSymbolSet(cwd);
47
+ const names = [...set].slice(0, maxSymbols);
48
+ if (names.length) parts.push(`## Known symbols (reference these exactly)\n${names.join(', ')}`);
49
+ } catch (_) {}
50
+
51
+ return parts.join('\n\n');
52
+ }
53
+
54
+ /**
55
+ * Count flagged codebase-fact errors in an answer (the §9 metric).
56
+ * @param {string} answerText
57
+ * @param {string} cwd
58
+ * @returns {number}
59
+ */
60
+ function scoreAnswer(answerText, cwd) {
61
+ try {
62
+ const { summary } = verify(String(answerText || ''), cwd);
63
+ return summary.total || 0;
64
+ } catch (_) {
65
+ return 0;
66
+ }
67
+ }
68
+
69
+ /**
70
+ * Run the A/B ablation over a task corpus.
71
+ * @param {Array<{id:string, prompt:string}>} tasks
72
+ * @param {string} cwd
73
+ * @param {(prompt:string, meta:object)=>string} complete injected model call
74
+ * @param {object} [opts]
75
+ * @param {string} [opts.grounding] precomputed grounding (else built from cwd)
76
+ * @returns {{ tasks: object[], aggregate: object }}
77
+ */
78
+ function runAblation(tasks, cwd, complete, opts = {}) {
79
+ const grounding = opts.grounding != null ? opts.grounding : buildGrounding(cwd);
80
+ const rows = [];
81
+ let sumA = 0;
82
+ let sumB = 0;
83
+
84
+ for (const task of tasks || []) {
85
+ const basePrompt = task.prompt || '';
86
+ const groundedPrompt = grounding ? `${grounding}\n\n---\n\n${basePrompt}` : basePrompt;
87
+
88
+ const outA = String(complete(basePrompt, { id: task.id, grounded: false }) || '');
89
+ const outB = String(complete(groundedPrompt, { id: task.id, grounded: true }) || '');
90
+
91
+ const aFlagged = scoreAnswer(outA, cwd);
92
+ const bFlagged = scoreAnswer(outB, cwd);
93
+ sumA += aFlagged;
94
+ sumB += bFlagged;
95
+ rows.push({ id: task.id, aFlagged, bFlagged });
96
+ }
97
+
98
+ const n = rows.length;
99
+ const per100 = (sum) => (n > 0 ? (sum / n) * 100 : 0);
100
+ return {
101
+ tasks: rows,
102
+ aggregate: {
103
+ n,
104
+ withoutFlagged: sumA,
105
+ withFlagged: sumB,
106
+ delta: sumA - sumB,
107
+ withoutPer100: per100(sumA),
108
+ withPer100: per100(sumB),
109
+ },
110
+ };
111
+ }
112
+
113
+ module.exports = { buildGrounding, scoreAnswer, runAblation };
package/src/mcp/server.js CHANGED
@@ -18,7 +18,7 @@ const { readContext, searchSignatures, getMap, createCheckpoint, getRouting, exp
18
18
 
19
19
  const SERVER_INFO = {
20
20
  name: 'sigmap',
21
- version: '7.14.0',
21
+ version: '7.16.0',
22
22
  description: 'SigMap MCP server — code signatures on demand',
23
23
  };
24
24