sigmap 7.25.1 → 7.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/llms-full.txt CHANGED
@@ -9,13 +9,13 @@ the files relevant to the task — cutting tokens ~97% while keeping answers
9
9
  grounded. Deterministic, offline, no embeddings or vector database. Works with
10
10
  Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
11
11
 
12
- # Version: 7.25.1 | Benchmark: sigmap-v7.25-main (2026-06-21)
12
+ # Version: 7.26.0 | Benchmark: sigmap-v7.26-main (2026-06-22)
13
13
  # Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
14
14
  # Regenerate: npm run generate:llms | Validate: npm run validate:llms
15
15
 
16
16
  ---
17
17
 
18
- ## Core metrics (benchmark: sigmap-v7.25-main, 2026-06-21)
18
+ ## Core metrics (benchmark: sigmap-v7.26-main, 2026-06-22)
19
19
 
20
20
  | Metric | Without SigMap | With SigMap |
21
21
  |--------|----------------|-------------|
@@ -109,6 +109,9 @@ sigmap squeeze <file|-> Minimize a pasted stacktrace/CI-log/JSO
109
109
  sigmap ask "<query>" --squeeze Auto-accept input minimization (no prompt; for scripts/CI)
110
110
  sigmap ask "<query>" --no-squeeze Disable input minimization entirely
111
111
  sigmap ask "<query>" --squeeze-threshold N Min reduction %% to prompt (default 30)
112
+ sigmap evidence "<query>" Build a deterministic Evidence Pack (JSON) → .context/evidence-pack.json
113
+ sigmap evidence "<query>" --markdown Emit the Markdown handoff rendering to stdout
114
+ sigmap evidence "<query>" --top <n> --budget <n> --out <path> Tune ranked files / token budget / write rendered output
112
115
  sigmap note "<text>" Append a note to the cross-session decision log
113
116
  sigmap note List recent notes (also: note --list <N>)
114
117
  sigmap status Show repo state — branch, dirty files, index freshness, notes
package/llms.txt CHANGED
@@ -9,7 +9,7 @@ the files relevant to the task — cutting tokens ~97% while keeping answers
9
9
  grounded. Deterministic, offline, no embeddings or vector database. Works with
10
10
  Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
11
11
 
12
- # Version: 7.25.1 | Benchmark: sigmap-v7.25-main (2026-06-21)
12
+ # Version: 7.26.0 | Benchmark: sigmap-v7.26-main (2026-06-22)
13
13
  # Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
14
14
  # Regenerate: npm run generate:llms | Validate: npm run validate:llms
15
15
 
@@ -21,7 +21,7 @@ Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
21
21
  - No blast-radius awareness before editing a hub file — `--impact` shows every file a change touches.
22
22
  - Pasted stack traces, CI logs, and JSON bloat the prompt — `squeeze` minimizes them and enriches the top frame from the symbol index.
23
23
 
24
- ## Core metrics (benchmark: sigmap-v7.25-main, 2026-06-21)
24
+ ## Core metrics (benchmark: sigmap-v7.26-main, 2026-06-22)
25
25
 
26
26
  - hit@5 retrieval: 75.6% vs 13.6% random baseline (5.6× lift)
27
27
  - Token reduction: 97.0% average across benchmark repos
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap",
3
- "version": "7.25.1",
3
+ "version": "7.26.0",
4
4
  "description": "97% token reduction for AI coding. Extracts function & class signatures with TF-IDF ranking to feed only the right files to Claude, Cursor, Copilot, Aider, Windsurf, local LLMs & MCP. Zero dependencies, runs offline via npx.",
5
5
  "main": "packages/core/index.js",
6
6
  "exports": {
@@ -33,6 +33,8 @@
33
33
  "mcp": "node gen-context.js --mcp",
34
34
  "check:bundle": "node scripts/check-bundle.mjs",
35
35
  "check:version-meta": "node scripts/check-version-meta.mjs",
36
+ "build:bundle": "node scripts/build-bundle.mjs",
37
+ "check:bundle:repro": "node scripts/build-bundle.mjs --check",
36
38
  "build:binary": "node scripts/build-binary.mjs",
37
39
  "verify:binary": "node scripts/verify-binary.mjs",
38
40
  "version:sync": "node scripts/sync-versions.mjs",
@@ -41,7 +43,7 @@
41
43
  "metrics:latest": "node scripts/gen-benchmark-latest.mjs",
42
44
  "metrics:sync": "node scripts/gen-benchmark-latest.mjs && node scripts/check-version-meta.mjs --fix && node scripts/sync-metrics.mjs && node scripts/generate-llms.mjs",
43
45
  "check:metrics": "node scripts/gen-benchmark-latest.mjs --check && node scripts/check-version-meta.mjs && node scripts/sync-metrics.mjs --check",
44
- "prepublishOnly": "node scripts/check-bundle.mjs && node scripts/gen-benchmark-latest.mjs --check && node scripts/check-version-meta.mjs && node scripts/sync-metrics.mjs --check && node scripts/generate-llms.mjs",
46
+ "prepublishOnly": "node scripts/check-bundle.mjs && node scripts/build-bundle.mjs --check && node scripts/gen-benchmark-latest.mjs --check && node scripts/check-version-meta.mjs && node scripts/sync-metrics.mjs --check && node scripts/generate-llms.mjs",
45
47
  "benchmark:grounding": "node scripts/run-hallucination-benchmark.mjs",
46
48
  "benchmark:llm-ablation": "node scripts/run-llm-ablation.mjs"
47
49
  },
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap-cli",
3
- "version": "7.25.1",
3
+ "version": "7.26.0",
4
4
  "description": "SigMap CLI wrapper — thin adapter for programmatic CLI invocation",
5
5
  "main": "index.js",
6
6
  "keywords": [
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap-core",
3
- "version": "7.25.1",
3
+ "version": "7.26.0",
4
4
  "description": "SigMap core library — zero-dependency code signature extraction, retrieval, and security scanning",
5
5
  "main": "index.js",
6
6
  "keywords": [
@@ -0,0 +1,267 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Evidence Pack v1 (v8.0 E1).
5
+ *
6
+ * A deterministic, machine-consumable signature-and-evidence map. Replaces the
7
+ * "paste this into your prompt" workflow with a byte-stable JSON artifact that
8
+ * an agent or CI can ingest directly — every entry anchored to a real file,
9
+ * symbol, and line range.
10
+ *
11
+ * Composed entirely from shipped zero-dep modules:
12
+ * - retrieval/ranker → ranked files, scores, signals
13
+ * - extractors/line-anchor → `:start-end` suffix parsing (sourceLines)
14
+ * - security/scanner → secret redaction of symbols
15
+ * - crypto (node builtin) → sha256 grounding hash
16
+ *
17
+ * Determinism: the pack carries NO wall-clock timestamp. Given an unchanged
18
+ * repository, `buildEvidencePack` returns a byte-identical object, and
19
+ * `grounding.contextHash` is stable. This is the point — the pack is auditable.
20
+ */
21
+
22
+ const fs = require('fs');
23
+ const path = require('path');
24
+ const crypto = require('crypto');
25
+
26
+ const { buildSigIndex, rank, detectIntent } = require('../retrieval/ranker');
27
+ const { scan } = require('../security/scanner');
28
+
29
+ const SCHEMA_VERSION = '1.0';
30
+ const DEFAULT_BUDGET = 6000;
31
+ const DEFAULT_TOP = 12;
32
+
33
+ const GENERATED_RE = /(^|\/)(dist|build|out|vendor|node_modules)\/|\.(generated|min|bundle)\.|\.(pb|_pb)\.|\.pb\.go$|_pb2\.py$/;
34
+ const TEST_RE = /(^|\/)(tests?|__tests__|spec|specs)\/|\.(test|spec)\.[a-z]+$|(^|\/)test_[^/]+\.py$|_test\.(go|py|rb)$/;
35
+ const CONFIG_RE = /\.(json|ya?ml|toml|ini|conf|config|properties|env)$|(^|\/)(\.?[a-z]+rc)$|\.config\.[a-z]+$/i;
36
+ const SECURITY_RE = /(^|\/|[._-])(auth|authn|authz|login|password|passwd|secret|credential|token|session|crypto|cipher|payment|billing|checkout|oauth|jwt|permission|acl|rbac)([._-]|\/|$)/i;
37
+
38
+ /**
39
+ * Split a signature's ` :start-end` line anchor from its symbol text.
40
+ * @param {string} sig
41
+ * @returns {{ symbol: string, start: number|null, end: number|null }}
42
+ */
43
+ function parseAnchor(sig) {
44
+ const m = /\s*:(\d+)-(\d+)\s*$/.exec(sig);
45
+ if (!m) return { symbol: sig.trim(), start: null, end: null };
46
+ return {
47
+ symbol: sig.slice(0, m.index).trim(),
48
+ start: parseInt(m[1], 10),
49
+ end: parseInt(m[2], 10),
50
+ };
51
+ }
52
+
53
+ /**
54
+ * Classify a file into a coarse risk label. Path-based heuristic (v1) — the
55
+ * richer label set (C3) lands in v8.5.
56
+ * @param {string} relPath
57
+ * @returns {'generated'|'test'|'config'|'security'|'source'}
58
+ */
59
+ function riskLabelFor(relPath) {
60
+ const p = relPath.replace(/\\/g, '/');
61
+ if (GENERATED_RE.test(p)) return 'generated';
62
+ if (TEST_RE.test(p)) return 'test';
63
+ if (SECURITY_RE.test(p)) return 'security';
64
+ if (CONFIG_RE.test(p)) return 'config';
65
+ return 'source';
66
+ }
67
+
68
+ /** Filename stem (basename minus the first extension chain). */
69
+ function stemOf(relPath) {
70
+ const base = path.basename(relPath);
71
+ return base.replace(/\.[^.]+$/, '').replace(/\.(test|spec)$/i, '');
72
+ }
73
+
74
+ /**
75
+ * Best-effort impl→test discovery (v1). Matches test files whose stem equals
76
+ * the implementation file's stem, by common convention. Deterministic. The
77
+ * accuracy-measured discovery (C2) lands in v8.5.
78
+ * @param {string} relPath
79
+ * @param {string[]} allFiles - universe of indexed files (relative paths)
80
+ * @returns {string[]}
81
+ */
82
+ function findRelatedTests(relPath, allFiles) {
83
+ if (riskLabelFor(relPath) === 'test') return [];
84
+ const stem = stemOf(relPath).toLowerCase();
85
+ if (!stem) return [];
86
+ const out = [];
87
+ for (const f of allFiles) {
88
+ if (f === relPath) continue;
89
+ if (riskLabelFor(f) !== 'test') continue;
90
+ if (stemOf(f).toLowerCase() === stem) out.push(f);
91
+ }
92
+ return out.sort();
93
+ }
94
+
95
+ /** Map a ranker `signals` object into a short human-readable reason string. */
96
+ function reasonFor(signals) {
97
+ if (!signals) return 'ranked match';
98
+ const parts = [];
99
+ if (signals.symbolMatch > 0) parts.push('symbol-name match');
100
+ if (signals.exactToken > 0) parts.push('exact token match');
101
+ if (signals.prefixMatch > 0) parts.push('prefix match');
102
+ if (signals.pathMatch > 0) parts.push('path match');
103
+ if (signals.graphBoost > 0) parts.push('dependency-graph neighbor');
104
+ if (signals.recencyBoost > 1) parts.push('recently changed');
105
+ if (signals.learnedWeights && signals.learnedWeights !== 1) parts.push('learned weight');
106
+ return parts.length ? parts.join('; ') : 'ranked match';
107
+ }
108
+
109
+ /** Token estimate for a signature block (matches the ranker's heuristic). */
110
+ function sigTokens(sigs) {
111
+ return Math.ceil(sigs.join('\n').length / 4);
112
+ }
113
+
114
+ /**
115
+ * Stable stringify with recursively sorted object keys, for hashing.
116
+ * @param {*} value
117
+ * @returns {string}
118
+ */
119
+ function canonicalize(value) {
120
+ return JSON.stringify(sortKeys(value));
121
+ }
122
+
123
+ function sortKeys(value) {
124
+ if (Array.isArray(value)) return value.map(sortKeys);
125
+ if (value && typeof value === 'object') {
126
+ const out = {};
127
+ for (const k of Object.keys(value).sort()) out[k] = sortKeys(value[k]);
128
+ return out;
129
+ }
130
+ return value;
131
+ }
132
+
133
+ /**
134
+ * Build an Evidence Pack for a query.
135
+ *
136
+ * @param {string} query
137
+ * @param {string} cwd
138
+ * @param {object} [opts]
139
+ * @param {number} [opts.budget=6000] - token budget for included files
140
+ * @param {number} [opts.top=12] - max ranked files to consider
141
+ * @param {Map<string,string[]>} [opts.sigIndex] - pre-built index (else built from cwd)
142
+ * @returns {object} Evidence Pack v1
143
+ */
144
+ function buildEvidencePack(query, cwd, opts = {}) {
145
+ const budget = Number.isFinite(opts.budget) ? opts.budget : DEFAULT_BUDGET;
146
+ const top = Number.isFinite(opts.top) ? opts.top : DEFAULT_TOP;
147
+
148
+ const sigIndex = opts.sigIndex instanceof Map ? opts.sigIndex : buildSigIndex(cwd);
149
+ const intent = detectIntent(query);
150
+ const allFiles = Array.from(sigIndex.keys());
151
+
152
+ const ranked = rank(query, sigIndex, { topK: top, cwd })
153
+ .filter((r) => r.score > 0 || ranked0Empty(query));
154
+ const maxScore = ranked.reduce((m, r) => Math.max(m, r.score), 0);
155
+
156
+ // Greedy budget fill in rank order; the remainder is reported as dropped.
157
+ const files = [];
158
+ const droppedFiles = [];
159
+ let used = 0;
160
+
161
+ for (const r of ranked) {
162
+ const tokens = sigTokens(r.sigs);
163
+ if (files.length > 0 && used + tokens > budget) {
164
+ droppedFiles.push({ path: r.file, reason: `budget: would exceed ${budget}-token limit` });
165
+ continue;
166
+ }
167
+ used += tokens;
168
+
169
+ const safe = scan(r.sigs, r.file).safe;
170
+ const symbols = [];
171
+ const sourceLines = [];
172
+ for (const sig of safe) {
173
+ const { symbol, start, end } = parseAnchor(sig);
174
+ symbols.push(symbol);
175
+ if (start !== null) sourceLines.push({ symbol, start, end });
176
+ }
177
+
178
+ files.push({
179
+ path: r.file,
180
+ symbols,
181
+ reason: reasonFor(r.signals),
182
+ confidence: maxScore > 0 ? Math.round((r.score / maxScore) * 100) / 100 : 0,
183
+ sourceLines,
184
+ relatedTests: findRelatedTests(r.file, allFiles),
185
+ riskLabel: riskLabelFor(r.file),
186
+ });
187
+ }
188
+
189
+ const symbolCount = files.reduce((n, f) => n + f.symbols.length, 0);
190
+ const anchoredSymbols = files.reduce((n, f) => n + f.sourceLines.length, 0);
191
+
192
+ const pack = {
193
+ schemaVersion: SCHEMA_VERSION,
194
+ query,
195
+ intent,
196
+ files,
197
+ tokenBudget: { limit: budget, used, remaining: Math.max(0, budget - used) },
198
+ droppedFiles,
199
+ grounding: {
200
+ symbolCount,
201
+ anchoredSymbols,
202
+ anchorCoverage: symbolCount > 0 ? Math.round((anchoredSymbols / symbolCount) * 1000) / 1000 : 0,
203
+ contextHash: null,
204
+ deterministic: true,
205
+ },
206
+ };
207
+
208
+ // Hash everything except the hash field itself.
209
+ const forHash = Object.assign({}, pack, {
210
+ grounding: Object.assign({}, pack.grounding, { contextHash: undefined }),
211
+ });
212
+ pack.grounding.contextHash = 'sha256:' + crypto.createHash('sha256').update(canonicalize(forHash)).digest('hex');
213
+
214
+ return pack;
215
+ }
216
+
217
+ // rank() returns [] for an empty/whitespace query; keep the filter readable.
218
+ function ranked0Empty(query) {
219
+ return !query || !query.trim();
220
+ }
221
+
222
+ /** Pretty-printed canonical JSON rendering of a pack. */
223
+ function formatJSON(pack) {
224
+ return JSON.stringify(pack, null, 2);
225
+ }
226
+
227
+ /** Markdown handoff rendering of a pack. */
228
+ function formatMarkdown(pack) {
229
+ const L = [];
230
+ L.push(`# Evidence Pack — \`${pack.query}\``);
231
+ L.push('');
232
+ L.push(`- **Schema:** v${pack.schemaVersion}`);
233
+ L.push(`- **Intent:** ${pack.intent}`);
234
+ L.push(`- **Budget:** ${pack.tokenBudget.used} / ${pack.tokenBudget.limit} tokens used (${pack.tokenBudget.remaining} remaining)`);
235
+ L.push(`- **Grounding:** ${pack.grounding.anchoredSymbols}/${pack.grounding.symbolCount} symbols anchored (${Math.round(pack.grounding.anchorCoverage * 100)}%)`);
236
+ L.push(`- **Hash:** \`${pack.grounding.contextHash}\``);
237
+ L.push('');
238
+
239
+ for (const f of pack.files) {
240
+ L.push(`## \`${f.path}\` _(${f.riskLabel}, confidence ${f.confidence})_`);
241
+ L.push(`_${f.reason}_`);
242
+ if (f.relatedTests.length) L.push(`Related tests: ${f.relatedTests.map((t) => `\`${t}\``).join(', ')}`);
243
+ L.push('');
244
+ L.push('```');
245
+ for (const s of f.symbols) L.push(s);
246
+ L.push('```');
247
+ L.push('');
248
+ }
249
+
250
+ if (pack.droppedFiles.length) {
251
+ L.push('## Dropped (over budget)');
252
+ for (const d of pack.droppedFiles) L.push(`- \`${d.path}\` — ${d.reason}`);
253
+ L.push('');
254
+ }
255
+
256
+ return L.join('\n');
257
+ }
258
+
259
+ module.exports = {
260
+ buildEvidencePack,
261
+ formatJSON,
262
+ formatMarkdown,
263
+ parseAnchor,
264
+ riskLabelFor,
265
+ findRelatedTests,
266
+ SCHEMA_VERSION,
267
+ };
package/src/mcp/server.js CHANGED
@@ -18,7 +18,7 @@ const { readContext, searchSignatures, getMap, createCheckpoint, getRouting, exp
18
18
 
19
19
  const SERVER_INFO = {
20
20
  name: 'sigmap',
21
- version: '7.25.1',
21
+ version: '7.26.0',
22
22
  description: 'SigMap MCP server — code signatures on demand',
23
23
  };
24
24