sigmap 7.22.2 → 7.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -10,6 +10,15 @@ Format: [Semantic Versioning](https://semver.org/)
10
10
 
11
11
  ---
12
12
 
13
+ ## [7.23.0] — 2026-06-19
14
+
15
+ Minor release — make the §9 LLM ablation produce a statistically stable number.
16
+
17
+ ### Added
18
+ - **§9 ablation: `--runs N` averaging + 100-task corpus (#353):** the cleaned-guard §9 result is directionally clear (grounding cuts flagged codebase-fact errors ~13 → 3 per 100) but at N=40 with single-digit raw counts a single pass is noisy. `scripts/run-llm-ablation.mjs` gains `--runs N` (default 1) that runs the full task set N times with **fresh model calls per pass** and prints a mean ± [min–max] summary; `src/eval/llm-ablation.js` adds a pure, unit-tested `aggregateRuns(aggregates[])` (mean/min/max of without/with per-100 and delta). The committed corpus (`benchmarks/llm-ablation-tasks.json`) expands from 40 to **100** real-symbol tasks (`gen-ablation-corpus.mjs` default 40 → 100) for a tighter single-run estimate. The network touch stays confined to `scripts/`; the offline harness is unchanged. Run the robust headline with `npm run benchmark:llm-ablation -- --runs 5 --save`.
19
+
20
+ ---
21
+
13
22
  ## [7.22.2] — 2026-06-19
14
23
 
15
24
  Patch release — clears the two remaining `verify-ai-output` false-positive classes surfaced by the §9 ablation.
package/gen-context.js CHANGED
@@ -32,6 +32,177 @@ function __require(key) {
32
32
  // ── ./src/conventions/report ──
33
33
  // ── ./src/conventions/ci ──
34
34
  // ── ./src/eval/llm-ablation ──
35
+ __factories["./src/eval/llm-ablation"] = function(module, exports) {
36
+
37
+ /**
38
+ * LLM A/B hallucination ablation (IMPL.md §9) — the honest measurement.
39
+ *
40
+ * Runs a model twice per task — (A) no SigMap context, (B) with SigMap
41
+ * grounding — pipes both outputs through the hallucination guard, and reports
42
+ * the measured delta in flagged codebase-fact errors. The model call is
43
+ * INJECTED (`complete(prompt) → text`), so the harness itself is pure and
44
+ * offline-testable; the live model adapter lives in `scripts/run-llm-ablation.mjs`.
45
+ * Zero-dependency, bundle-safe (no network here).
46
+ */
47
+
48
+ const { verify } = __require('./src/verify/hallucination-guard');
49
+
50
+ const path = require('path');
51
+
52
+ /** Strip a signature's trailing line anchor (` :12-20`) for prompt cleanliness. */
53
+ function _cleanSig(sig) {
54
+ return String(sig).replace(/\s*:\d+(?:-\d+)?\s*$/, '').trim();
55
+ }
56
+
57
+ /**
58
+ * Build the SigMap grounding block for a repo — what we prepend to a task
59
+ * prompt in arm B. Conventions (the house style) + **exact signatures** grouped
60
+ * by file (what `get_callee_signatures` returns), so the model references the
61
+ * real surface instead of guessing — the actual product behavior, not a flat
62
+ * name dump.
63
+ * @param {string} cwd
64
+ * @param {object} [opts]
65
+ * @param {number} [opts.maxSignatures=150] cap on signature lines (bounds prompt size)
66
+ * @returns {string}
67
+ */
68
+ function buildGrounding(cwd, opts = {}) {
69
+ const maxSignatures = opts.maxSignatures != null ? opts.maxSignatures : 150;
70
+ const parts = [];
71
+
72
+ let index = null;
73
+ try {
74
+ const { buildSigIndex } = __require('./src/retrieval/ranker');
75
+ index = buildSigIndex(cwd);
76
+ } catch (_) {}
77
+
78
+ try {
79
+ const { extractConventions } = __require('./src/conventions/extract');
80
+ const { renderConventionsBlock } = __require('./src/conventions/inject');
81
+ const files = index ? [...index.keys()] : [];
82
+ parts.push(renderConventionsBlock(extractConventions(cwd, files)));
83
+ } catch (_) {}
84
+
85
+ if (index) {
86
+ const lines = ['## Exact signatures (use these — do not invent symbols or paths)'];
87
+ let count = 0;
88
+ for (const [file, sigs] of index) {
89
+ if (count >= maxSignatures) break;
90
+ const rel = path.relative(cwd, file).replace(/\\/g, '/');
91
+ const clean = (sigs || []).map(_cleanSig).filter(Boolean);
92
+ if (!clean.length) continue;
93
+ lines.push(`### ${rel}`);
94
+ for (const s of clean) {
95
+ if (count >= maxSignatures) break;
96
+ lines.push(s);
97
+ count++;
98
+ }
99
+ }
100
+ if (count > 0) parts.push(lines.join('\n'));
101
+ }
102
+
103
+ return parts.join('\n\n');
104
+ }
105
+
106
+ /**
107
+ * Score an answer: flagged codebase-fact errors + the issue list (the §9 metric).
108
+ * @param {string} answerText
109
+ * @param {string} cwd
110
+ * @returns {{ total: number, issues: object[] }}
111
+ */
112
+ function scoreAnswerDetail(answerText, cwd) {
113
+ try {
114
+ const { issues, summary } = verify(String(answerText || ''), cwd);
115
+ return { total: summary.total || 0, issues: issues || [] };
116
+ } catch (_) {
117
+ return { total: 0, issues: [] };
118
+ }
119
+ }
120
+
121
+ /** Count flagged codebase-fact errors in an answer (the §9 metric). */
122
+ function scoreAnswer(answerText, cwd) {
123
+ return scoreAnswerDetail(answerText, cwd).total;
124
+ }
125
+
126
+ /**
127
+ * Run the A/B ablation over a task corpus.
128
+ * @param {Array<{id:string, prompt:string}>} tasks
129
+ * @param {string} cwd
130
+ * @param {(prompt:string, meta:object)=>string} complete injected model call
131
+ * @param {object} [opts]
132
+ * @param {string} [opts.grounding] precomputed grounding (else built from cwd)
133
+ * @param {boolean} [opts.collectIssues] attach `aIssues`/`bIssues` per task
134
+ * @returns {{ tasks: object[], aggregate: object }}
135
+ */
136
+ function runAblation(tasks, cwd, complete, opts = {}) {
137
+ const grounding = opts.grounding != null ? opts.grounding : buildGrounding(cwd);
138
+ const rows = [];
139
+ let sumA = 0;
140
+ let sumB = 0;
141
+
142
+ for (const task of tasks || []) {
143
+ const basePrompt = task.prompt || '';
144
+ const groundedPrompt = grounding ? `${grounding}\n\n---\n\n${basePrompt}` : basePrompt;
145
+
146
+ const outA = String(complete(basePrompt, { id: task.id, grounded: false }) || '');
147
+ const outB = String(complete(groundedPrompt, { id: task.id, grounded: true }) || '');
148
+
149
+ const a = scoreAnswerDetail(outA, cwd);
150
+ const b = scoreAnswerDetail(outB, cwd);
151
+ sumA += a.total;
152
+ sumB += b.total;
153
+ const row = { id: task.id, aFlagged: a.total, bFlagged: b.total };
154
+ if (opts.collectIssues) { row.aIssues = a.issues; row.bIssues = b.issues; }
155
+ rows.push(row);
156
+ }
157
+
158
+ const n = rows.length;
159
+ const per100 = (sum) => (n > 0 ? (sum / n) * 100 : 0);
160
+ return {
161
+ tasks: rows,
162
+ aggregate: {
163
+ n,
164
+ withoutFlagged: sumA,
165
+ withFlagged: sumB,
166
+ delta: sumA - sumB,
167
+ withoutPer100: per100(sumA),
168
+ withPer100: per100(sumB),
169
+ },
170
+ };
171
+ }
172
+
173
+ /** mean/min/max of a number list (0s for an empty list). */
174
+ function _stats(nums) {
175
+ if (!nums.length) return { mean: 0, min: 0, max: 0 };
176
+ const sum = nums.reduce((a, b) => a + b, 0);
177
+ return { mean: sum / nums.length, min: Math.min(...nums), max: Math.max(...nums) };
178
+ }
179
+
180
+ /**
181
+ * Aggregate several `runAblation` passes into a stable estimate — mean ± range
182
+ * of the without/with per-100 flag rates and their delta. At N=40 with tiny raw
183
+ * counts a single pass is noisy; averaging repeated passes gives a publishable
184
+ * number with an honest spread.
185
+ * @param {object[]} aggregates the `.aggregate` object from each runAblation pass
186
+ * @returns {{ runs:number, n:number, withoutPer100:object, withPer100:object, deltaPer100:object }}
187
+ */
188
+ function aggregateRuns(aggregates) {
189
+ const runs = (aggregates || []).filter(Boolean);
190
+ const without = runs.map((a) => a.withoutPer100);
191
+ const withG = runs.map((a) => a.withPer100);
192
+ const delta = runs.map((a) => a.withoutPer100 - a.withPer100);
193
+ return {
194
+ runs: runs.length,
195
+ n: runs.length ? runs[0].n : 0,
196
+ withoutPer100: _stats(without),
197
+ withPer100: _stats(withG),
198
+ deltaPer100: _stats(delta),
199
+ };
200
+ }
201
+
202
+ module.exports = { buildGrounding, scoreAnswer, scoreAnswerDetail, runAblation, aggregateRuns };
203
+
204
+ };
205
+
35
206
  // ── ./src/conventions/fix ──
36
207
  // ── ./src/conventions/update ──
37
208
  // ── ./src/scaffold/persist ──
@@ -7931,7 +8102,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
7931
8102
 
7932
8103
  const SERVER_INFO = {
7933
8104
  name: 'sigmap',
7934
- version: '7.22.2',
8105
+ version: '7.23.0',
7935
8106
  description: 'SigMap MCP server — code signatures on demand',
7936
8107
  };
7937
8108
 
@@ -13634,7 +13805,7 @@ function __tryGit(args, opts = {}) {
13634
13805
  catch (_) { return ''; }
13635
13806
  }
13636
13807
 
13637
- const VERSION = '7.22.2';
13808
+ const VERSION = '7.23.0';
13638
13809
  const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
13639
13810
 
13640
13811
  function requireSourceOrBundled(key) {
package/llms-full.txt CHANGED
@@ -9,7 +9,7 @@ the files relevant to the task — cutting tokens ~97% while keeping answers
9
9
  grounded. Deterministic, offline, no embeddings or vector database. Works with
10
10
  Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
11
11
 
12
- # Version: 7.22.2 | Benchmark: sigmap-v7.0-main (2026-06-19)
12
+ # Version: 7.23.0 | Benchmark: sigmap-v7.0-main (2026-06-19)
13
13
  # Source: auto-generated from package.json, version.json, src/mcp/tools.js, src/config/defaults.js
14
14
  # Regenerate: npm run generate:llms | Validate: npm run validate:llms
15
15
 
package/llms.txt CHANGED
@@ -9,7 +9,7 @@ the files relevant to the task — cutting tokens ~97% while keeping answers
9
9
  grounded. Deterministic, offline, no embeddings or vector database. Works with
10
10
  Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
11
11
 
12
- # Version: 7.22.2 | Benchmark: sigmap-v7.0-main (2026-06-19)
12
+ # Version: 7.23.0 | Benchmark: sigmap-v7.0-main (2026-06-19)
13
13
  # Source: auto-generated from package.json, version.json, src/mcp/tools.js, src/config/defaults.js
14
14
  # Regenerate: npm run generate:llms | Validate: npm run validate:llms
15
15
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap",
3
- "version": "7.22.2",
3
+ "version": "7.23.0",
4
4
  "description": "97% token reduction for AI coding. Extracts function & class signatures with TF-IDF ranking to feed only the right files to Claude, Cursor, Copilot, Aider, Windsurf, local LLMs & MCP. Zero dependencies, runs offline via npx.",
5
5
  "main": "packages/core/index.js",
6
6
  "exports": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap-cli",
3
- "version": "7.22.2",
3
+ "version": "7.23.0",
4
4
  "description": "SigMap CLI wrapper — thin adapter for programmatic CLI invocation",
5
5
  "main": "index.js",
6
6
  "keywords": [
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap-core",
3
- "version": "7.22.2",
3
+ "version": "7.23.0",
4
4
  "description": "SigMap core library — zero-dependency code signature extraction, retrieval, and security scanning",
5
5
  "main": "index.js",
6
6
  "keywords": [
@@ -136,4 +136,33 @@ function runAblation(tasks, cwd, complete, opts = {}) {
136
136
  };
137
137
  }
138
138
 
139
- module.exports = { buildGrounding, scoreAnswer, scoreAnswerDetail, runAblation };
139
+ /** mean/min/max of a number list (0s for an empty list). */
140
+ function _stats(nums) {
141
+ if (!nums.length) return { mean: 0, min: 0, max: 0 };
142
+ const sum = nums.reduce((a, b) => a + b, 0);
143
+ return { mean: sum / nums.length, min: Math.min(...nums), max: Math.max(...nums) };
144
+ }
145
+
146
+ /**
147
+ * Aggregate several `runAblation` passes into a stable estimate — mean ± range
148
+ * of the without/with per-100 flag rates and their delta. At N=40 with tiny raw
149
+ * counts a single pass is noisy; averaging repeated passes gives a publishable
150
+ * number with an honest spread.
151
+ * @param {object[]} aggregates the `.aggregate` object from each runAblation pass
152
+ * @returns {{ runs:number, n:number, withoutPer100:object, withPer100:object, deltaPer100:object }}
153
+ */
154
+ function aggregateRuns(aggregates) {
155
+ const runs = (aggregates || []).filter(Boolean);
156
+ const without = runs.map((a) => a.withoutPer100);
157
+ const withG = runs.map((a) => a.withPer100);
158
+ const delta = runs.map((a) => a.withoutPer100 - a.withPer100);
159
+ return {
160
+ runs: runs.length,
161
+ n: runs.length ? runs[0].n : 0,
162
+ withoutPer100: _stats(without),
163
+ withPer100: _stats(withG),
164
+ deltaPer100: _stats(delta),
165
+ };
166
+ }
167
+
168
+ module.exports = { buildGrounding, scoreAnswer, scoreAnswerDetail, runAblation, aggregateRuns };
package/src/mcp/server.js CHANGED
@@ -18,7 +18,7 @@ const { readContext, searchSignatures, getMap, createCheckpoint, getRouting, exp
18
18
 
19
19
  const SERVER_INFO = {
20
20
  name: 'sigmap',
21
- version: '7.22.2',
21
+ version: '7.23.0',
22
22
  description: 'SigMap MCP server — code signatures on demand',
23
23
  };
24
24