sigmap 7.25.2 → 7.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -10,6 +10,15 @@ Format: [Semantic Versioning](https://semver.org/)
10
10
 
11
11
  ---
12
12
 
13
+ ## [7.26.0] — 2026-06-22
14
+
15
+ Minor release — **v8.0 "The Evidence Pack & the Pivot" (E1):** the keystone artifact that makes SigMap consumable by machines instead of copy-paste.
16
+
17
+ ### Added
18
+ - **Evidence Pack JSON v1 (#372):** new `sigmap evidence "<query>"` command emits a deterministic, machine-consumable signature-and-evidence map — a byte-stable JSON artifact (plus a `--markdown`/`--md` handoff rendering) that an agent or CI can ingest directly, every entry anchored to a real file, symbol, and line range. Schema v1: `{ schemaVersion, query, intent, files:[{ path, symbols, reason, confidence, sourceLines, relatedTests, riskLabel }], tokenBudget, droppedFiles, grounding:{ symbolCount, anchoredSymbols, anchorCoverage, contextHash, deterministic } }`. Composed entirely from shipped zero-dep modules (ranker, line-anchor parsing, security scanner, sha256 grounding hash). The pack carries **no wall-clock timestamp** — an unchanged repo yields byte-identical output and a stable `grounding.contextHash`, so the artifact is auditable. CLI flags: `--top`, `--budget`, `--out`; always writes `.context/evidence-pack.json`. `riskLabel` ∈ {generated, test, config, security, source} and `relatedTests` are best-effort v1 (measured test-discovery and richer labels land in v8.5).
19
+
20
+ ---
21
+
13
22
  ## [7.25.2] — 2026-06-22
14
23
 
15
24
  Patch release — **Trust Hygiene (H2):** reproducible bundle build. Completes the v7.25.x "Trust Hygiene" milestone (H1+H2+H3+H4 all shipped).
package/README.md CHANGED
@@ -91,7 +91,7 @@ Ask → Rank → Context → Validate → Judge → Learn
91
91
 
92
92
  <!--SM:benchmarkBlock-->
93
93
  ```
94
- Benchmark : sigmap-v7.25-main (21 repositories, including R language)
94
+ Benchmark : sigmap-v7.26-main (21 repositories, including R language)
95
95
  Date : 2026-06-22
96
96
 
97
97
  Hit@5 : 75.6% (baseline 13.6% — 5.6× lift)
package/gen-context.js CHANGED
@@ -4385,6 +4385,277 @@ __factories["./src/eval/usefulness-scorer"] = function(module, exports) {
4385
4385
 
4386
4386
  };
4387
4387
 
4388
+ // ── ./src/evidence/pack ──
4389
+ __factories["./src/evidence/pack"] = function(module, exports) {
4390
+
4391
+ /**
4392
+ * Evidence Pack v1 (v8.0 E1).
4393
+ *
4394
+ * A deterministic, machine-consumable signature-and-evidence map. Replaces the
4395
+ * "paste this into your prompt" workflow with a byte-stable JSON artifact that
4396
+ * an agent or CI can ingest directly — every entry anchored to a real file,
4397
+ * symbol, and line range.
4398
+ *
4399
+ * Composed entirely from shipped zero-dep modules:
4400
+ * - retrieval/ranker → ranked files, scores, signals
4401
+ * - extractors/line-anchor → `:start-end` suffix parsing (sourceLines)
4402
+ * - security/scanner → secret redaction of symbols
4403
+ * - crypto (node builtin) → sha256 grounding hash
4404
+ *
4405
+ * Determinism: the pack carries NO wall-clock timestamp. Given an unchanged
4406
+ * repository, `buildEvidencePack` returns a byte-identical object, and
4407
+ * `grounding.contextHash` is stable. This is the point — the pack is auditable.
4408
+ */
4409
+
4410
+ const fs = require('fs');
4411
+ const path = require('path');
4412
+ const crypto = require('crypto');
4413
+
4414
+ const { buildSigIndex, rank, detectIntent } = __require('./src/retrieval/ranker');
4415
+ const { scan } = __require('./src/security/scanner');
4416
+
4417
+ const SCHEMA_VERSION = '1.0';
4418
+ const DEFAULT_BUDGET = 6000;
4419
+ const DEFAULT_TOP = 12;
4420
+
4421
+ const GENERATED_RE = /(^|\/)(dist|build|out|vendor|node_modules)\/|\.(generated|min|bundle)\.|\.(pb|_pb)\.|\.pb\.go$|_pb2\.py$/;
4422
+ const TEST_RE = /(^|\/)(tests?|__tests__|spec|specs)\/|\.(test|spec)\.[a-z]+$|(^|\/)test_[^/]+\.py$|_test\.(go|py|rb)$/;
4423
+ const CONFIG_RE = /\.(json|ya?ml|toml|ini|conf|config|properties|env)$|(^|\/)(\.?[a-z]+rc)$|\.config\.[a-z]+$/i;
4424
+ const SECURITY_RE = /(^|\/|[._-])(auth|authn|authz|login|password|passwd|secret|credential|token|session|crypto|cipher|payment|billing|checkout|oauth|jwt|permission|acl|rbac)([._-]|\/|$)/i;
4425
+
4426
+ /**
4427
+ * Split a signature's ` :start-end` line anchor from its symbol text.
4428
+ * @param {string} sig
4429
+ * @returns {{ symbol: string, start: number|null, end: number|null }}
4430
+ */
4431
+ function parseAnchor(sig) {
4432
+ const m = /\s*:(\d+)-(\d+)\s*$/.exec(sig);
4433
+ if (!m) return { symbol: sig.trim(), start: null, end: null };
4434
+ return {
4435
+ symbol: sig.slice(0, m.index).trim(),
4436
+ start: parseInt(m[1], 10),
4437
+ end: parseInt(m[2], 10),
4438
+ };
4439
+ }
4440
+
4441
+ /**
4442
+ * Classify a file into a coarse risk label. Path-based heuristic (v1) — the
4443
+ * richer label set (C3) lands in v8.5.
4444
+ * @param {string} relPath
4445
+ * @returns {'generated'|'test'|'config'|'security'|'source'}
4446
+ */
4447
+ function riskLabelFor(relPath) {
4448
+ const p = relPath.replace(/\\/g, '/');
4449
+ if (GENERATED_RE.test(p)) return 'generated';
4450
+ if (TEST_RE.test(p)) return 'test';
4451
+ if (SECURITY_RE.test(p)) return 'security';
4452
+ if (CONFIG_RE.test(p)) return 'config';
4453
+ return 'source';
4454
+ }
4455
+
4456
+ /** Filename stem (basename minus the first extension chain). */
4457
+ function stemOf(relPath) {
4458
+ const base = path.basename(relPath);
4459
+ return base.replace(/\.[^.]+$/, '').replace(/\.(test|spec)$/i, '');
4460
+ }
4461
+
4462
+ /**
4463
+ * Best-effort impl→test discovery (v1). Matches test files whose stem equals
4464
+ * the implementation file's stem, by common convention. Deterministic. The
4465
+ * accuracy-measured discovery (C2) lands in v8.5.
4466
+ * @param {string} relPath
4467
+ * @param {string[]} allFiles - universe of indexed files (relative paths)
4468
+ * @returns {string[]}
4469
+ */
4470
+ function findRelatedTests(relPath, allFiles) {
4471
+ if (riskLabelFor(relPath) === 'test') return [];
4472
+ const stem = stemOf(relPath).toLowerCase();
4473
+ if (!stem) return [];
4474
+ const out = [];
4475
+ for (const f of allFiles) {
4476
+ if (f === relPath) continue;
4477
+ if (riskLabelFor(f) !== 'test') continue;
4478
+ if (stemOf(f).toLowerCase() === stem) out.push(f);
4479
+ }
4480
+ return out.sort();
4481
+ }
4482
+
4483
+ /** Map a ranker `signals` object into a short human-readable reason string. */
4484
+ function reasonFor(signals) {
4485
+ if (!signals) return 'ranked match';
4486
+ const parts = [];
4487
+ if (signals.symbolMatch > 0) parts.push('symbol-name match');
4488
+ if (signals.exactToken > 0) parts.push('exact token match');
4489
+ if (signals.prefixMatch > 0) parts.push('prefix match');
4490
+ if (signals.pathMatch > 0) parts.push('path match');
4491
+ if (signals.graphBoost > 0) parts.push('dependency-graph neighbor');
4492
+ if (signals.recencyBoost > 1) parts.push('recently changed');
4493
+ if (signals.learnedWeights && signals.learnedWeights !== 1) parts.push('learned weight');
4494
+ return parts.length ? parts.join('; ') : 'ranked match';
4495
+ }
4496
+
4497
+ /** Token estimate for a signature block (matches the ranker's heuristic). */
4498
+ function sigTokens(sigs) {
4499
+ return Math.ceil(sigs.join('\n').length / 4);
4500
+ }
4501
+
4502
+ /**
4503
+ * Stable stringify with recursively sorted object keys, for hashing.
4504
+ * @param {*} value
4505
+ * @returns {string}
4506
+ */
4507
+ function canonicalize(value) {
4508
+ return JSON.stringify(sortKeys(value));
4509
+ }
4510
+
4511
+ function sortKeys(value) {
4512
+ if (Array.isArray(value)) return value.map(sortKeys);
4513
+ if (value && typeof value === 'object') {
4514
+ const out = {};
4515
+ for (const k of Object.keys(value).sort()) out[k] = sortKeys(value[k]);
4516
+ return out;
4517
+ }
4518
+ return value;
4519
+ }
4520
+
4521
+ /**
4522
+ * Build an Evidence Pack for a query.
4523
+ *
4524
+ * @param {string} query
4525
+ * @param {string} cwd
4526
+ * @param {object} [opts]
4527
+ * @param {number} [opts.budget=6000] - token budget for included files
4528
+ * @param {number} [opts.top=12] - max ranked files to consider
4529
+ * @param {Map<string,string[]>} [opts.sigIndex] - pre-built index (else built from cwd)
4530
+ * @returns {object} Evidence Pack v1
4531
+ */
4532
+ function buildEvidencePack(query, cwd, opts = {}) {
4533
+ const budget = Number.isFinite(opts.budget) ? opts.budget : DEFAULT_BUDGET;
4534
+ const top = Number.isFinite(opts.top) ? opts.top : DEFAULT_TOP;
4535
+
4536
+ const sigIndex = opts.sigIndex instanceof Map ? opts.sigIndex : buildSigIndex(cwd);
4537
+ const intent = detectIntent(query);
4538
+ const allFiles = Array.from(sigIndex.keys());
4539
+
4540
+ const ranked = rank(query, sigIndex, { topK: top, cwd })
4541
+ .filter((r) => r.score > 0 || ranked0Empty(query));
4542
+ const maxScore = ranked.reduce((m, r) => Math.max(m, r.score), 0);
4543
+
4544
+ // Greedy budget fill in rank order; the remainder is reported as dropped.
4545
+ const files = [];
4546
+ const droppedFiles = [];
4547
+ let used = 0;
4548
+
4549
+ for (const r of ranked) {
4550
+ const tokens = sigTokens(r.sigs);
4551
+ if (files.length > 0 && used + tokens > budget) {
4552
+ droppedFiles.push({ path: r.file, reason: `budget: would exceed ${budget}-token limit` });
4553
+ continue;
4554
+ }
4555
+ used += tokens;
4556
+
4557
+ const safe = scan(r.sigs, r.file).safe;
4558
+ const symbols = [];
4559
+ const sourceLines = [];
4560
+ for (const sig of safe) {
4561
+ const { symbol, start, end } = parseAnchor(sig);
4562
+ symbols.push(symbol);
4563
+ if (start !== null) sourceLines.push({ symbol, start, end });
4564
+ }
4565
+
4566
+ files.push({
4567
+ path: r.file,
4568
+ symbols,
4569
+ reason: reasonFor(r.signals),
4570
+ confidence: maxScore > 0 ? Math.round((r.score / maxScore) * 100) / 100 : 0,
4571
+ sourceLines,
4572
+ relatedTests: findRelatedTests(r.file, allFiles),
4573
+ riskLabel: riskLabelFor(r.file),
4574
+ });
4575
+ }
4576
+
4577
+ const symbolCount = files.reduce((n, f) => n + f.symbols.length, 0);
4578
+ const anchoredSymbols = files.reduce((n, f) => n + f.sourceLines.length, 0);
4579
+
4580
+ const pack = {
4581
+ schemaVersion: SCHEMA_VERSION,
4582
+ query,
4583
+ intent,
4584
+ files,
4585
+ tokenBudget: { limit: budget, used, remaining: Math.max(0, budget - used) },
4586
+ droppedFiles,
4587
+ grounding: {
4588
+ symbolCount,
4589
+ anchoredSymbols,
4590
+ anchorCoverage: symbolCount > 0 ? Math.round((anchoredSymbols / symbolCount) * 1000) / 1000 : 0,
4591
+ contextHash: null,
4592
+ deterministic: true,
4593
+ },
4594
+ };
4595
+
4596
+ // Hash everything except the hash field itself.
4597
+ const forHash = Object.assign({}, pack, {
4598
+ grounding: Object.assign({}, pack.grounding, { contextHash: undefined }),
4599
+ });
4600
+ pack.grounding.contextHash = 'sha256:' + crypto.createHash('sha256').update(canonicalize(forHash)).digest('hex');
4601
+
4602
+ return pack;
4603
+ }
4604
+
4605
+ // rank() returns [] for an empty/whitespace query; keep the filter readable.
4606
+ function ranked0Empty(query) {
4607
+ return !query || !query.trim();
4608
+ }
4609
+
4610
+ /** Pretty-printed canonical JSON rendering of a pack. */
4611
+ function formatJSON(pack) {
4612
+ return JSON.stringify(pack, null, 2);
4613
+ }
4614
+
4615
+ /** Markdown handoff rendering of a pack. */
4616
+ function formatMarkdown(pack) {
4617
+ const L = [];
4618
+ L.push(`# Evidence Pack — \`${pack.query}\``);
4619
+ L.push('');
4620
+ L.push(`- **Schema:** v${pack.schemaVersion}`);
4621
+ L.push(`- **Intent:** ${pack.intent}`);
4622
+ L.push(`- **Budget:** ${pack.tokenBudget.used} / ${pack.tokenBudget.limit} tokens used (${pack.tokenBudget.remaining} remaining)`);
4623
+ L.push(`- **Grounding:** ${pack.grounding.anchoredSymbols}/${pack.grounding.symbolCount} symbols anchored (${Math.round(pack.grounding.anchorCoverage * 100)}%)`);
4624
+ L.push(`- **Hash:** \`${pack.grounding.contextHash}\``);
4625
+ L.push('');
4626
+
4627
+ for (const f of pack.files) {
4628
+ L.push(`## \`${f.path}\` _(${f.riskLabel}, confidence ${f.confidence})_`);
4629
+ L.push(`_${f.reason}_`);
4630
+ if (f.relatedTests.length) L.push(`Related tests: ${f.relatedTests.map((t) => `\`${t}\``).join(', ')}`);
4631
+ L.push('');
4632
+ L.push('```');
4633
+ for (const s of f.symbols) L.push(s);
4634
+ L.push('```');
4635
+ L.push('');
4636
+ }
4637
+
4638
+ if (pack.droppedFiles.length) {
4639
+ L.push('## Dropped (over budget)');
4640
+ for (const d of pack.droppedFiles) L.push(`- \`${d.path}\` — ${d.reason}`);
4641
+ L.push('');
4642
+ }
4643
+
4644
+ return L.join('\n');
4645
+ }
4646
+
4647
+ module.exports = {
4648
+ buildEvidencePack,
4649
+ formatJSON,
4650
+ formatMarkdown,
4651
+ parseAnchor,
4652
+ riskLabelFor,
4653
+ findRelatedTests,
4654
+ SCHEMA_VERSION,
4655
+ };
4656
+
4657
+ };
4658
+
4388
4659
  // ── ./src/extractors/coverage ──
4389
4660
  __factories["./src/extractors/coverage"] = function(module, exports) {
4390
4661
 
@@ -11863,7 +12134,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
11863
12134
 
11864
12135
  const SERVER_INFO = {
11865
12136
  name: 'sigmap',
11866
- version: '7.25.2',
12137
+ version: '7.26.0',
11867
12138
  description: 'SigMap MCP server — code signatures on demand',
11868
12139
  };
11869
12140
 
@@ -15652,7 +15923,7 @@ function __tryGit(args, opts = {}) {
15652
15923
  catch (_) { return ''; }
15653
15924
  }
15654
15925
 
15655
- const VERSION = '7.25.2';
15926
+ const VERSION = '7.26.0';
15656
15927
  const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
15657
15928
 
15658
15929
  function requireSourceOrBundled(key) {
@@ -17459,6 +17730,9 @@ Usage:
17459
17730
  ${cmd} ask "<query>" --squeeze Auto-accept input minimization (no prompt; for scripts/CI)
17460
17731
  ${cmd} ask "<query>" --no-squeeze Disable input minimization entirely
17461
17732
  ${cmd} ask "<query>" --squeeze-threshold N Min reduction %% to prompt (default 30)
17733
+ ${cmd} evidence "<query>" Build a deterministic Evidence Pack (JSON) → .context/evidence-pack.json
17734
+ ${cmd} evidence "<query>" --markdown Emit the Markdown handoff rendering to stdout
17735
+ ${cmd} evidence "<query>" --top <n> --budget <n> --out <path> Tune ranked files / token budget / write rendered output
17462
17736
  ${cmd} note "<text>" Append a note to the cross-session decision log
17463
17737
  ${cmd} note List recent notes (also: note --list <N>)
17464
17738
  ${cmd} status Show repo state — branch, dirty files, index freshness, notes
@@ -18031,6 +18305,66 @@ function main() {
18031
18305
  process.exit(0);
18032
18306
  }
18033
18307
 
18308
+ // `sigmap evidence "<query>"` — Evidence Pack v1 (v8.0 E1).
18309
+ // Deterministic, machine-consumable signature+evidence map. Always writes the
18310
+ // JSON artifact to .context/evidence-pack.json; stdout carries the requested
18311
+ // mode (JSON default, or Markdown handoff with --markdown/--md).
18312
+ if (args[0] === 'evidence') {
18313
+ const query = args[1];
18314
+ if (!query || query.startsWith('--')) {
18315
+ console.error('[sigmap] Usage: sigmap evidence "<query>" [--markdown] [--top <n>] [--budget <n>] [--out <path>]');
18316
+ console.error(' Example: sigmap evidence "how does auth work" --markdown');
18317
+ process.exit(1);
18318
+ }
18319
+
18320
+ const { buildEvidencePack, formatJSON, formatMarkdown } = requireSourceOrBundled('./src/evidence/pack');
18321
+
18322
+ const opts = {};
18323
+ const topIdx = args.indexOf('--top');
18324
+ if (topIdx !== -1 && args[topIdx + 1]) opts.top = parseInt(args[topIdx + 1], 10);
18325
+ const budgetIdx = args.indexOf('--budget');
18326
+ if (budgetIdx !== -1 && args[budgetIdx + 1]) opts.budget = parseInt(args[budgetIdx + 1], 10);
18327
+ else opts.budget = (config && config.maxTokens) || 6000;
18328
+
18329
+ let pack;
18330
+ try {
18331
+ pack = buildEvidencePack(query, cwd, opts);
18332
+ } catch (e) {
18333
+ console.error('[sigmap] evidence: ' + e.message);
18334
+ process.exit(1);
18335
+ }
18336
+
18337
+ if (pack.files.length === 0) {
18338
+ process.stderr.write('[sigmap] ⚠ no matching files indexed. Run: sigmap (to generate context first)\n');
18339
+ }
18340
+
18341
+ const jsonText = formatJSON(pack);
18342
+ const artifactPath = path.join(cwd, '.context', 'evidence-pack.json');
18343
+ try {
18344
+ fs.mkdirSync(path.dirname(artifactPath), { recursive: true });
18345
+ fs.writeFileSync(artifactPath, jsonText, 'utf8');
18346
+ process.stderr.write(`[sigmap] evidence pack → ${path.relative(cwd, artifactPath)} (${pack.files.length} files, ${pack.grounding.symbolCount} symbols)\n`);
18347
+ } catch (_) { /* artifact write is best-effort */ }
18348
+
18349
+ const markdown = args.includes('--markdown') || args.includes('--md');
18350
+ const rendered = markdown ? formatMarkdown(pack) : jsonText;
18351
+
18352
+ const outIdx = args.indexOf('--out');
18353
+ if (outIdx !== -1 && args[outIdx + 1]) {
18354
+ const outPath = path.resolve(cwd, args[outIdx + 1]);
18355
+ try {
18356
+ fs.mkdirSync(path.dirname(outPath), { recursive: true });
18357
+ fs.writeFileSync(outPath, rendered + '\n', 'utf8');
18358
+ } catch (e) {
18359
+ console.error('[sigmap] evidence: could not write --out ' + outPath + ': ' + e.message);
18360
+ process.exit(1);
18361
+ }
18362
+ }
18363
+
18364
+ process.stdout.write(rendered + '\n');
18365
+ process.exit(0);
18366
+ }
18367
+
18034
18368
  // `sigmap gain` — token-savings dashboard (totals, by-operation, trends).
18035
18369
  if (args[0] === 'gain') {
18036
18370
  const valOf = (f, d) => { const i = args.indexOf(f); return i >= 0 && args[i + 1] ? args[i + 1] : d; };
package/llms-full.txt CHANGED
@@ -9,13 +9,13 @@ the files relevant to the task — cutting tokens ~97% while keeping answers
9
9
  grounded. Deterministic, offline, no embeddings or vector database. Works with
10
10
  Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
11
11
 
12
- # Version: 7.25.2 | Benchmark: sigmap-v7.25-main (2026-06-22)
12
+ # Version: 7.26.0 | Benchmark: sigmap-v7.26-main (2026-06-22)
13
13
  # Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
14
14
  # Regenerate: npm run generate:llms | Validate: npm run validate:llms
15
15
 
16
16
  ---
17
17
 
18
- ## Core metrics (benchmark: sigmap-v7.25-main, 2026-06-22)
18
+ ## Core metrics (benchmark: sigmap-v7.26-main, 2026-06-22)
19
19
 
20
20
  | Metric | Without SigMap | With SigMap |
21
21
  |--------|----------------|-------------|
@@ -109,6 +109,9 @@ sigmap squeeze <file|-> Minimize a pasted stacktrace/CI-log/JSO
109
109
  sigmap ask "<query>" --squeeze Auto-accept input minimization (no prompt; for scripts/CI)
110
110
  sigmap ask "<query>" --no-squeeze Disable input minimization entirely
111
111
  sigmap ask "<query>" --squeeze-threshold N Min reduction %% to prompt (default 30)
112
+ sigmap evidence "<query>" Build a deterministic Evidence Pack (JSON) → .context/evidence-pack.json
113
+ sigmap evidence "<query>" --markdown Emit the Markdown handoff rendering to stdout
114
+ sigmap evidence "<query>" --top <n> --budget <n> --out <path> Tune ranked files / token budget / write rendered output
112
115
  sigmap note "<text>" Append a note to the cross-session decision log
113
116
  sigmap note List recent notes (also: note --list <N>)
114
117
  sigmap status Show repo state — branch, dirty files, index freshness, notes
package/llms.txt CHANGED
@@ -9,7 +9,7 @@ the files relevant to the task — cutting tokens ~97% while keeping answers
9
9
  grounded. Deterministic, offline, no embeddings or vector database. Works with
10
10
  Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
11
11
 
12
- # Version: 7.25.2 | Benchmark: sigmap-v7.25-main (2026-06-22)
12
+ # Version: 7.26.0 | Benchmark: sigmap-v7.26-main (2026-06-22)
13
13
  # Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
14
14
  # Regenerate: npm run generate:llms | Validate: npm run validate:llms
15
15
 
@@ -21,7 +21,7 @@ Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
21
21
  - No blast-radius awareness before editing a hub file — `--impact` shows every file a change touches.
22
22
  - Pasted stack traces, CI logs, and JSON bloat the prompt — `squeeze` minimizes them and enriches the top frame from the symbol index.
23
23
 
24
- ## Core metrics (benchmark: sigmap-v7.25-main, 2026-06-22)
24
+ ## Core metrics (benchmark: sigmap-v7.26-main, 2026-06-22)
25
25
 
26
26
  - hit@5 retrieval: 75.6% vs 13.6% random baseline (5.6× lift)
27
27
  - Token reduction: 97.0% average across benchmark repos
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap",
3
- "version": "7.25.2",
3
+ "version": "7.26.0",
4
4
  "description": "97% token reduction for AI coding. Extracts function & class signatures with TF-IDF ranking to feed only the right files to Claude, Cursor, Copilot, Aider, Windsurf, local LLMs & MCP. Zero dependencies, runs offline via npx.",
5
5
  "main": "packages/core/index.js",
6
6
  "exports": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap-cli",
3
- "version": "7.25.2",
3
+ "version": "7.26.0",
4
4
  "description": "SigMap CLI wrapper — thin adapter for programmatic CLI invocation",
5
5
  "main": "index.js",
6
6
  "keywords": [
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap-core",
3
- "version": "7.25.2",
3
+ "version": "7.26.0",
4
4
  "description": "SigMap core library — zero-dependency code signature extraction, retrieval, and security scanning",
5
5
  "main": "index.js",
6
6
  "keywords": [
@@ -0,0 +1,267 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Evidence Pack v1 (v8.0 E1).
5
+ *
6
+ * A deterministic, machine-consumable signature-and-evidence map. Replaces the
7
+ * "paste this into your prompt" workflow with a byte-stable JSON artifact that
8
+ * an agent or CI can ingest directly — every entry anchored to a real file,
9
+ * symbol, and line range.
10
+ *
11
+ * Composed entirely from shipped zero-dep modules:
12
+ * - retrieval/ranker → ranked files, scores, signals
13
+ * - extractors/line-anchor → `:start-end` suffix parsing (sourceLines)
14
+ * - security/scanner → secret redaction of symbols
15
+ * - crypto (node builtin) → sha256 grounding hash
16
+ *
17
+ * Determinism: the pack carries NO wall-clock timestamp. Given an unchanged
18
+ * repository, `buildEvidencePack` returns a byte-identical object, and
19
+ * `grounding.contextHash` is stable. This is the point — the pack is auditable.
20
+ */
21
+
22
+ const fs = require('fs');
23
+ const path = require('path');
24
+ const crypto = require('crypto');
25
+
26
+ const { buildSigIndex, rank, detectIntent } = require('../retrieval/ranker');
27
+ const { scan } = require('../security/scanner');
28
+
29
+ const SCHEMA_VERSION = '1.0';
30
+ const DEFAULT_BUDGET = 6000;
31
+ const DEFAULT_TOP = 12;
32
+
33
+ const GENERATED_RE = /(^|\/)(dist|build|out|vendor|node_modules)\/|\.(generated|min|bundle)\.|\.(pb|_pb)\.|\.pb\.go$|_pb2\.py$/;
34
+ const TEST_RE = /(^|\/)(tests?|__tests__|spec|specs)\/|\.(test|spec)\.[a-z]+$|(^|\/)test_[^/]+\.py$|_test\.(go|py|rb)$/;
35
+ const CONFIG_RE = /\.(json|ya?ml|toml|ini|conf|config|properties|env)$|(^|\/)(\.?[a-z]+rc)$|\.config\.[a-z]+$/i;
36
+ const SECURITY_RE = /(^|\/|[._-])(auth|authn|authz|login|password|passwd|secret|credential|token|session|crypto|cipher|payment|billing|checkout|oauth|jwt|permission|acl|rbac)([._-]|\/|$)/i;
37
+
38
+ /**
39
+ * Split a signature's ` :start-end` line anchor from its symbol text.
40
+ * @param {string} sig
41
+ * @returns {{ symbol: string, start: number|null, end: number|null }}
42
+ */
43
+ function parseAnchor(sig) {
44
+ const m = /\s*:(\d+)-(\d+)\s*$/.exec(sig);
45
+ if (!m) return { symbol: sig.trim(), start: null, end: null };
46
+ return {
47
+ symbol: sig.slice(0, m.index).trim(),
48
+ start: parseInt(m[1], 10),
49
+ end: parseInt(m[2], 10),
50
+ };
51
+ }
52
+
53
+ /**
54
+ * Classify a file into a coarse risk label. Path-based heuristic (v1) — the
55
+ * richer label set (C3) lands in v8.5.
56
+ * @param {string} relPath
57
+ * @returns {'generated'|'test'|'config'|'security'|'source'}
58
+ */
59
+ function riskLabelFor(relPath) {
60
+ const p = relPath.replace(/\\/g, '/');
61
+ if (GENERATED_RE.test(p)) return 'generated';
62
+ if (TEST_RE.test(p)) return 'test';
63
+ if (SECURITY_RE.test(p)) return 'security';
64
+ if (CONFIG_RE.test(p)) return 'config';
65
+ return 'source';
66
+ }
67
+
68
+ /** Filename stem (basename minus the first extension chain). */
69
+ function stemOf(relPath) {
70
+ const base = path.basename(relPath);
71
+ return base.replace(/\.[^.]+$/, '').replace(/\.(test|spec)$/i, '');
72
+ }
73
+
74
+ /**
75
+ * Best-effort impl→test discovery (v1). Matches test files whose stem equals
76
+ * the implementation file's stem, by common convention. Deterministic. The
77
+ * accuracy-measured discovery (C2) lands in v8.5.
78
+ * @param {string} relPath
79
+ * @param {string[]} allFiles - universe of indexed files (relative paths)
80
+ * @returns {string[]}
81
+ */
82
+ function findRelatedTests(relPath, allFiles) {
83
+ if (riskLabelFor(relPath) === 'test') return [];
84
+ const stem = stemOf(relPath).toLowerCase();
85
+ if (!stem) return [];
86
+ const out = [];
87
+ for (const f of allFiles) {
88
+ if (f === relPath) continue;
89
+ if (riskLabelFor(f) !== 'test') continue;
90
+ if (stemOf(f).toLowerCase() === stem) out.push(f);
91
+ }
92
+ return out.sort();
93
+ }
94
+
95
+ /** Map a ranker `signals` object into a short human-readable reason string. */
96
+ function reasonFor(signals) {
97
+ if (!signals) return 'ranked match';
98
+ const parts = [];
99
+ if (signals.symbolMatch > 0) parts.push('symbol-name match');
100
+ if (signals.exactToken > 0) parts.push('exact token match');
101
+ if (signals.prefixMatch > 0) parts.push('prefix match');
102
+ if (signals.pathMatch > 0) parts.push('path match');
103
+ if (signals.graphBoost > 0) parts.push('dependency-graph neighbor');
104
+ if (signals.recencyBoost > 1) parts.push('recently changed');
105
+ if (signals.learnedWeights && signals.learnedWeights !== 1) parts.push('learned weight');
106
+ return parts.length ? parts.join('; ') : 'ranked match';
107
+ }
108
+
109
+ /** Token estimate for a signature block (matches the ranker's heuristic). */
110
+ function sigTokens(sigs) {
111
+ return Math.ceil(sigs.join('\n').length / 4);
112
+ }
113
+
114
+ /**
115
+ * Stable stringify with recursively sorted object keys, for hashing.
116
+ * @param {*} value
117
+ * @returns {string}
118
+ */
119
+ function canonicalize(value) {
120
+ return JSON.stringify(sortKeys(value));
121
+ }
122
+
123
+ function sortKeys(value) {
124
+ if (Array.isArray(value)) return value.map(sortKeys);
125
+ if (value && typeof value === 'object') {
126
+ const out = {};
127
+ for (const k of Object.keys(value).sort()) out[k] = sortKeys(value[k]);
128
+ return out;
129
+ }
130
+ return value;
131
+ }
132
+
133
+ /**
134
+ * Build an Evidence Pack for a query.
135
+ *
136
+ * @param {string} query
137
+ * @param {string} cwd
138
+ * @param {object} [opts]
139
+ * @param {number} [opts.budget=6000] - token budget for included files
140
+ * @param {number} [opts.top=12] - max ranked files to consider
141
+ * @param {Map<string,string[]>} [opts.sigIndex] - pre-built index (else built from cwd)
142
+ * @returns {object} Evidence Pack v1
143
+ */
144
+ function buildEvidencePack(query, cwd, opts = {}) {
145
+ const budget = Number.isFinite(opts.budget) ? opts.budget : DEFAULT_BUDGET;
146
+ const top = Number.isFinite(opts.top) ? opts.top : DEFAULT_TOP;
147
+
148
+ const sigIndex = opts.sigIndex instanceof Map ? opts.sigIndex : buildSigIndex(cwd);
149
+ const intent = detectIntent(query);
150
+ const allFiles = Array.from(sigIndex.keys());
151
+
152
+ const ranked = rank(query, sigIndex, { topK: top, cwd })
153
+ .filter((r) => r.score > 0 || ranked0Empty(query));
154
+ const maxScore = ranked.reduce((m, r) => Math.max(m, r.score), 0);
155
+
156
+ // Greedy budget fill in rank order; the remainder is reported as dropped.
157
+ const files = [];
158
+ const droppedFiles = [];
159
+ let used = 0;
160
+
161
+ for (const r of ranked) {
162
+ const tokens = sigTokens(r.sigs);
163
+ if (files.length > 0 && used + tokens > budget) {
164
+ droppedFiles.push({ path: r.file, reason: `budget: would exceed ${budget}-token limit` });
165
+ continue;
166
+ }
167
+ used += tokens;
168
+
169
+ const safe = scan(r.sigs, r.file).safe;
170
+ const symbols = [];
171
+ const sourceLines = [];
172
+ for (const sig of safe) {
173
+ const { symbol, start, end } = parseAnchor(sig);
174
+ symbols.push(symbol);
175
+ if (start !== null) sourceLines.push({ symbol, start, end });
176
+ }
177
+
178
+ files.push({
179
+ path: r.file,
180
+ symbols,
181
+ reason: reasonFor(r.signals),
182
+ confidence: maxScore > 0 ? Math.round((r.score / maxScore) * 100) / 100 : 0,
183
+ sourceLines,
184
+ relatedTests: findRelatedTests(r.file, allFiles),
185
+ riskLabel: riskLabelFor(r.file),
186
+ });
187
+ }
188
+
189
+ const symbolCount = files.reduce((n, f) => n + f.symbols.length, 0);
190
+ const anchoredSymbols = files.reduce((n, f) => n + f.sourceLines.length, 0);
191
+
192
+ const pack = {
193
+ schemaVersion: SCHEMA_VERSION,
194
+ query,
195
+ intent,
196
+ files,
197
+ tokenBudget: { limit: budget, used, remaining: Math.max(0, budget - used) },
198
+ droppedFiles,
199
+ grounding: {
200
+ symbolCount,
201
+ anchoredSymbols,
202
+ anchorCoverage: symbolCount > 0 ? Math.round((anchoredSymbols / symbolCount) * 1000) / 1000 : 0,
203
+ contextHash: null,
204
+ deterministic: true,
205
+ },
206
+ };
207
+
208
+ // Hash everything except the hash field itself.
209
+ const forHash = Object.assign({}, pack, {
210
+ grounding: Object.assign({}, pack.grounding, { contextHash: undefined }),
211
+ });
212
+ pack.grounding.contextHash = 'sha256:' + crypto.createHash('sha256').update(canonicalize(forHash)).digest('hex');
213
+
214
+ return pack;
215
+ }
216
+
217
+ // rank() returns [] for an empty/whitespace query; keep the filter readable.
218
+ function ranked0Empty(query) {
219
+ return !query || !query.trim();
220
+ }
221
+
222
+ /** Pretty-printed canonical JSON rendering of a pack. */
223
+ function formatJSON(pack) {
224
+ return JSON.stringify(pack, null, 2);
225
+ }
226
+
227
+ /** Markdown handoff rendering of a pack. */
228
+ function formatMarkdown(pack) {
229
+ const L = [];
230
+ L.push(`# Evidence Pack — \`${pack.query}\``);
231
+ L.push('');
232
+ L.push(`- **Schema:** v${pack.schemaVersion}`);
233
+ L.push(`- **Intent:** ${pack.intent}`);
234
+ L.push(`- **Budget:** ${pack.tokenBudget.used} / ${pack.tokenBudget.limit} tokens used (${pack.tokenBudget.remaining} remaining)`);
235
+ L.push(`- **Grounding:** ${pack.grounding.anchoredSymbols}/${pack.grounding.symbolCount} symbols anchored (${Math.round(pack.grounding.anchorCoverage * 100)}%)`);
236
+ L.push(`- **Hash:** \`${pack.grounding.contextHash}\``);
237
+ L.push('');
238
+
239
+ for (const f of pack.files) {
240
+ L.push(`## \`${f.path}\` _(${f.riskLabel}, confidence ${f.confidence})_`);
241
+ L.push(`_${f.reason}_`);
242
+ if (f.relatedTests.length) L.push(`Related tests: ${f.relatedTests.map((t) => `\`${t}\``).join(', ')}`);
243
+ L.push('');
244
+ L.push('```');
245
+ for (const s of f.symbols) L.push(s);
246
+ L.push('```');
247
+ L.push('');
248
+ }
249
+
250
+ if (pack.droppedFiles.length) {
251
+ L.push('## Dropped (over budget)');
252
+ for (const d of pack.droppedFiles) L.push(`- \`${d.path}\` — ${d.reason}`);
253
+ L.push('');
254
+ }
255
+
256
+ return L.join('\n');
257
+ }
258
+
259
+ module.exports = {
260
+ buildEvidencePack,
261
+ formatJSON,
262
+ formatMarkdown,
263
+ parseAnchor,
264
+ riskLabelFor,
265
+ findRelatedTests,
266
+ SCHEMA_VERSION,
267
+ };
package/src/mcp/server.js CHANGED
@@ -18,7 +18,7 @@ const { readContext, searchSignatures, getMap, createCheckpoint, getRouting, exp
18
18
 
19
19
  const SERVER_INFO = {
20
20
  name: 'sigmap',
21
- version: '7.25.2',
21
+ version: '7.26.0',
22
22
  description: 'SigMap MCP server — code signatures on demand',
23
23
  };
24
24