npm - sigmap - Versions diffs - 7.25.2 → 7.26.0 - Mend

sigmap 7.25.2 → 7.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md +9 -0
package/README.md +1 -1
package/gen-context.js +336 -2
package/llms-full.txt +5 -2
package/llms.txt +2 -2
package/package.json +1 -1
package/packages/cli/package.json +1 -1
package/packages/core/package.json +1 -1
package/src/evidence/pack.js +267 -0
package/src/mcp/server.js +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -10,6 +10,15 @@ Format: [Semantic Versioning](https://semver.org/)
 ---
+## [7.26.0] — 2026-06-22
+Minor release — **v8.0 "The Evidence Pack & the Pivot" (E1):** the keystone artifact that makes SigMap consumable by machines instead of copy-paste.
+### Added
+- **Evidence Pack JSON v1 (#372):** new `sigmap evidence "<query>"` command emits a deterministic, machine-consumable signature-and-evidence map — a byte-stable JSON artifact (plus a `--markdown`/`--md` handoff rendering) that an agent or CI can ingest directly, every entry anchored to a real file, symbol, and line range. Schema v1: `{ schemaVersion, query, intent, files:[{ path, symbols, reason, confidence, sourceLines, relatedTests, riskLabel }], tokenBudget, droppedFiles, grounding:{ symbolCount, anchoredSymbols, anchorCoverage, contextHash, deterministic } }`. Composed entirely from shipped zero-dep modules (ranker, line-anchor parsing, security scanner, sha256 grounding hash). The pack carries **no wall-clock timestamp** — an unchanged repo yields byte-identical output and a stable `grounding.contextHash`, so the artifact is auditable. CLI flags: `--top`, `--budget`, `--out`; always writes `.context/evidence-pack.json`. `riskLabel` ∈ {generated, test, config, security, source} and `relatedTests` are best-effort v1 (measured test-discovery and richer labels land in v8.5).
+---
 ## [7.25.2] — 2026-06-22
 Patch release — **Trust Hygiene (H2):** reproducible bundle build. Completes the v7.25.x "Trust Hygiene" milestone (H1+H2+H3+H4 all shipped).

package/README.md CHANGED Viewed

@@ -91,7 +91,7 @@ Ask → Rank → Context → Validate → Judge → Learn
 <!--SM:benchmarkBlock-->
 ```
-Benchmark : sigmap-v7.25-main (21 repositories, including R language)
+Benchmark : sigmap-v7.26-main (21 repositories, including R language)
 Date      : 2026-06-22
 Hit@5          : 75.6%   (baseline 13.6%  — 5.6× lift)

package/gen-context.js CHANGED Viewed

@@ -4385,6 +4385,277 @@ __factories["./src/eval/usefulness-scorer"] = function(module, exports) {
 };
+// ── ./src/evidence/pack ──
+__factories["./src/evidence/pack"] = function(module, exports) {
+  /**
+   * Evidence Pack v1 (v8.0 E1).
+   *
+   * A deterministic, machine-consumable signature-and-evidence map. Replaces the
+   * "paste this into your prompt" workflow with a byte-stable JSON artifact that
+   * an agent or CI can ingest directly — every entry anchored to a real file,
+   * symbol, and line range.
+   *
+   * Composed entirely from shipped zero-dep modules:
+   *   - retrieval/ranker        → ranked files, scores, signals
+   *   - extractors/line-anchor  → `:start-end` suffix parsing (sourceLines)
+   *   - security/scanner        → secret redaction of symbols
+   *   - crypto (node builtin)    → sha256 grounding hash
+   *
+   * Determinism: the pack carries NO wall-clock timestamp. Given an unchanged
+   * repository, `buildEvidencePack` returns a byte-identical object, and
+   * `grounding.contextHash` is stable. This is the point — the pack is auditable.
+   */
+  const fs = require('fs');
+  const path = require('path');
+  const crypto = require('crypto');
+  const { buildSigIndex, rank, detectIntent } = __require('./src/retrieval/ranker');
+  const { scan } = __require('./src/security/scanner');
+  const SCHEMA_VERSION = '1.0';
+  const DEFAULT_BUDGET = 6000;
+  const DEFAULT_TOP = 12;
+  const GENERATED_RE = /(^|\/)(dist|build|out|vendor|node_modules)\/|\.(generated|min|bundle)\.|\.(pb|_pb)\.|\.pb\.go$|_pb2\.py$/;
+  const TEST_RE = /(^|\/)(tests?|__tests__|spec|specs)\/|\.(test|spec)\.[a-z]+$|(^|\/)test_[^/]+\.py$|_test\.(go|py|rb)$/;
+  const CONFIG_RE = /\.(json|ya?ml|toml|ini|conf|config|properties|env)$|(^|\/)(\.?[a-z]+rc)$|\.config\.[a-z]+$/i;
+  const SECURITY_RE = /(^|\/|[._-])(auth|authn|authz|login|password|passwd|secret|credential|token|session|crypto|cipher|payment|billing|checkout|oauth|jwt|permission|acl|rbac)([._-]|\/|$)/i;
+  /**
+   * Split a signature's `  :start-end` line anchor from its symbol text.
+   * @param {string} sig
+   * @returns {{ symbol: string, start: number|null, end: number|null }}
+   */
+  function parseAnchor(sig) {
+    const m = /\s*:(\d+)-(\d+)\s*$/.exec(sig);
+    if (!m) return { symbol: sig.trim(), start: null, end: null };
+    return {
+      symbol: sig.slice(0, m.index).trim(),
+      start: parseInt(m[1], 10),
+      end: parseInt(m[2], 10),
+    };
+  }
+  /**
+   * Classify a file into a coarse risk label. Path-based heuristic (v1) — the
+   * richer label set (C3) lands in v8.5.
+   * @param {string} relPath
+   * @returns {'generated'|'test'|'config'|'security'|'source'}
+   */
+  function riskLabelFor(relPath) {
+    const p = relPath.replace(/\\/g, '/');
+    if (GENERATED_RE.test(p)) return 'generated';
+    if (TEST_RE.test(p)) return 'test';
+    if (SECURITY_RE.test(p)) return 'security';
+    if (CONFIG_RE.test(p)) return 'config';
+    return 'source';
+  }
+  /** Filename stem (basename minus the first extension chain). */
+  function stemOf(relPath) {
+    const base = path.basename(relPath);
+    return base.replace(/\.[^.]+$/, '').replace(/\.(test|spec)$/i, '');
+  }
+  /**
+   * Best-effort impl→test discovery (v1). Matches test files whose stem equals
+   * the implementation file's stem, by common convention. Deterministic. The
+   * accuracy-measured discovery (C2) lands in v8.5.
+   * @param {string} relPath
+   * @param {string[]} allFiles  - universe of indexed files (relative paths)
+   * @returns {string[]}
+   */
+  function findRelatedTests(relPath, allFiles) {
+    if (riskLabelFor(relPath) === 'test') return [];
+    const stem = stemOf(relPath).toLowerCase();
+    if (!stem) return [];
+    const out = [];
+    for (const f of allFiles) {
+      if (f === relPath) continue;
+      if (riskLabelFor(f) !== 'test') continue;
+      if (stemOf(f).toLowerCase() === stem) out.push(f);
+    }
+    return out.sort();
+  }
+  /** Map a ranker `signals` object into a short human-readable reason string. */
+  function reasonFor(signals) {
+    if (!signals) return 'ranked match';
+    const parts = [];
+    if (signals.symbolMatch > 0) parts.push('symbol-name match');
+    if (signals.exactToken > 0) parts.push('exact token match');
+    if (signals.prefixMatch > 0) parts.push('prefix match');
+    if (signals.pathMatch > 0) parts.push('path match');
+    if (signals.graphBoost > 0) parts.push('dependency-graph neighbor');
+    if (signals.recencyBoost > 1) parts.push('recently changed');
+    if (signals.learnedWeights && signals.learnedWeights !== 1) parts.push('learned weight');
+    return parts.length ? parts.join('; ') : 'ranked match';
+  }
+  /** Token estimate for a signature block (matches the ranker's heuristic). */
+  function sigTokens(sigs) {
+    return Math.ceil(sigs.join('\n').length / 4);
+  }
+  /**
+   * Stable stringify with recursively sorted object keys, for hashing.
+   * @param {*} value
+   * @returns {string}
+   */
+  function canonicalize(value) {
+    return JSON.stringify(sortKeys(value));
+  }
+  function sortKeys(value) {
+    if (Array.isArray(value)) return value.map(sortKeys);
+    if (value && typeof value === 'object') {
+      const out = {};
+      for (const k of Object.keys(value).sort()) out[k] = sortKeys(value[k]);
+      return out;
+    }
+    return value;
+  }
+  /**
+   * Build an Evidence Pack for a query.
+   *
+   * @param {string} query
+   * @param {string} cwd
+   * @param {object} [opts]
+   * @param {number} [opts.budget=6000]      - token budget for included files
+   * @param {number} [opts.top=12]           - max ranked files to consider
+   * @param {Map<string,string[]>} [opts.sigIndex] - pre-built index (else built from cwd)
+   * @returns {object} Evidence Pack v1
+   */
+  function buildEvidencePack(query, cwd, opts = {}) {
+    const budget = Number.isFinite(opts.budget) ? opts.budget : DEFAULT_BUDGET;
+    const top = Number.isFinite(opts.top) ? opts.top : DEFAULT_TOP;
+    const sigIndex = opts.sigIndex instanceof Map ? opts.sigIndex : buildSigIndex(cwd);
+    const intent = detectIntent(query);
+    const allFiles = Array.from(sigIndex.keys());
+    const ranked = rank(query, sigIndex, { topK: top, cwd })
+      .filter((r) => r.score > 0 || ranked0Empty(query));
+    const maxScore = ranked.reduce((m, r) => Math.max(m, r.score), 0);
+    // Greedy budget fill in rank order; the remainder is reported as dropped.
+    const files = [];
+    const droppedFiles = [];
+    let used = 0;
+    for (const r of ranked) {
+      const tokens = sigTokens(r.sigs);
+      if (files.length > 0 && used + tokens > budget) {
+        droppedFiles.push({ path: r.file, reason: `budget: would exceed ${budget}-token limit` });
+        continue;
+      }
+      used += tokens;
+      const safe = scan(r.sigs, r.file).safe;
+      const symbols = [];
+      const sourceLines = [];
+      for (const sig of safe) {
+        const { symbol, start, end } = parseAnchor(sig);
+        symbols.push(symbol);
+        if (start !== null) sourceLines.push({ symbol, start, end });
+      }
+      files.push({
+        path: r.file,
+        symbols,
+        reason: reasonFor(r.signals),
+        confidence: maxScore > 0 ? Math.round((r.score / maxScore) * 100) / 100 : 0,
+        sourceLines,
+        relatedTests: findRelatedTests(r.file, allFiles),
+        riskLabel: riskLabelFor(r.file),
+      });
+    }
+    const symbolCount = files.reduce((n, f) => n + f.symbols.length, 0);
+    const anchoredSymbols = files.reduce((n, f) => n + f.sourceLines.length, 0);
+    const pack = {
+      schemaVersion: SCHEMA_VERSION,
+      query,
+      intent,
+      files,
+      tokenBudget: { limit: budget, used, remaining: Math.max(0, budget - used) },
+      droppedFiles,
+      grounding: {
+        symbolCount,
+        anchoredSymbols,
+        anchorCoverage: symbolCount > 0 ? Math.round((anchoredSymbols / symbolCount) * 1000) / 1000 : 0,
+        contextHash: null,
+        deterministic: true,
+      },
+    };
+    // Hash everything except the hash field itself.
+    const forHash = Object.assign({}, pack, {
+      grounding: Object.assign({}, pack.grounding, { contextHash: undefined }),
+    });
+    pack.grounding.contextHash = 'sha256:' + crypto.createHash('sha256').update(canonicalize(forHash)).digest('hex');
+    return pack;
+  }
+  // rank() returns [] for an empty/whitespace query; keep the filter readable.
+  function ranked0Empty(query) {
+    return !query || !query.trim();
+  }
+  /** Pretty-printed canonical JSON rendering of a pack. */
+  function formatJSON(pack) {
+    return JSON.stringify(pack, null, 2);
+  }
+  /** Markdown handoff rendering of a pack. */
+  function formatMarkdown(pack) {
+    const L = [];
+    L.push(`# Evidence Pack — \`${pack.query}\``);
+    L.push('');
+    L.push(`- **Schema:** v${pack.schemaVersion}`);
+    L.push(`- **Intent:** ${pack.intent}`);
+    L.push(`- **Budget:** ${pack.tokenBudget.used} / ${pack.tokenBudget.limit} tokens used (${pack.tokenBudget.remaining} remaining)`);
+    L.push(`- **Grounding:** ${pack.grounding.anchoredSymbols}/${pack.grounding.symbolCount} symbols anchored (${Math.round(pack.grounding.anchorCoverage * 100)}%)`);
+    L.push(`- **Hash:** \`${pack.grounding.contextHash}\``);
+    L.push('');
+    for (const f of pack.files) {
+      L.push(`## \`${f.path}\`  _(${f.riskLabel}, confidence ${f.confidence})_`);
+      L.push(`_${f.reason}_`);
+      if (f.relatedTests.length) L.push(`Related tests: ${f.relatedTests.map((t) => `\`${t}\``).join(', ')}`);
+      L.push('');
+      L.push('```');
+      for (const s of f.symbols) L.push(s);
+      L.push('```');
+      L.push('');
+    }
+    if (pack.droppedFiles.length) {
+      L.push('## Dropped (over budget)');
+      for (const d of pack.droppedFiles) L.push(`- \`${d.path}\` — ${d.reason}`);
+      L.push('');
+    }
+    return L.join('\n');
+  }
+  module.exports = {
+    buildEvidencePack,
+    formatJSON,
+    formatMarkdown,
+    parseAnchor,
+    riskLabelFor,
+    findRelatedTests,
+    SCHEMA_VERSION,
+  };
+};
 // ── ./src/extractors/coverage ──
 __factories["./src/extractors/coverage"] = function(module, exports) {
@@ -11863,7 +12134,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
   const SERVER_INFO = {
     name: 'sigmap',
-    version: '7.25.2',
+    version: '7.26.0',
     description: 'SigMap MCP server — code signatures on demand',
   };
@@ -15652,7 +15923,7 @@ function __tryGit(args, opts = {}) {
   catch (_) { return ''; }
 }
-const VERSION = '7.25.2';
+const VERSION = '7.26.0';
 const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
 function requireSourceOrBundled(key) {
@@ -17459,6 +17730,9 @@ Usage:
   ${cmd} ask "<query>" --squeeze           Auto-accept input minimization (no prompt; for scripts/CI)
   ${cmd} ask "<query>" --no-squeeze        Disable input minimization entirely
   ${cmd} ask "<query>" --squeeze-threshold N  Min reduction %% to prompt (default 30)
+  ${cmd} evidence "<query>"                Build a deterministic Evidence Pack (JSON) → .context/evidence-pack.json
+  ${cmd} evidence "<query>" --markdown     Emit the Markdown handoff rendering to stdout
+  ${cmd} evidence "<query>" --top <n> --budget <n> --out <path>   Tune ranked files / token budget / write rendered output
   ${cmd} note "<text>"                     Append a note to the cross-session decision log
   ${cmd} note                              List recent notes (also: note --list <N>)
   ${cmd} status                            Show repo state — branch, dirty files, index freshness, notes
@@ -18031,6 +18305,66 @@ function main() {
     process.exit(0);
   }
+  // `sigmap evidence "<query>"` — Evidence Pack v1 (v8.0 E1).
+  // Deterministic, machine-consumable signature+evidence map. Always writes the
+  // JSON artifact to .context/evidence-pack.json; stdout carries the requested
+  // mode (JSON default, or Markdown handoff with --markdown/--md).
+  if (args[0] === 'evidence') {
+    const query = args[1];
+    if (!query || query.startsWith('--')) {
+      console.error('[sigmap] Usage: sigmap evidence "<query>" [--markdown] [--top <n>] [--budget <n>] [--out <path>]');
+      console.error('  Example: sigmap evidence "how does auth work" --markdown');
+      process.exit(1);
+    }
+    const { buildEvidencePack, formatJSON, formatMarkdown } = requireSourceOrBundled('./src/evidence/pack');
+    const opts = {};
+    const topIdx = args.indexOf('--top');
+    if (topIdx !== -1 && args[topIdx + 1]) opts.top = parseInt(args[topIdx + 1], 10);
+    const budgetIdx = args.indexOf('--budget');
+    if (budgetIdx !== -1 && args[budgetIdx + 1]) opts.budget = parseInt(args[budgetIdx + 1], 10);
+    else opts.budget = (config && config.maxTokens) || 6000;
+    let pack;
+    try {
+      pack = buildEvidencePack(query, cwd, opts);
+    } catch (e) {
+      console.error('[sigmap] evidence: ' + e.message);
+      process.exit(1);
+    }
+    if (pack.files.length === 0) {
+      process.stderr.write('[sigmap] ⚠  no matching files indexed. Run: sigmap  (to generate context first)\n');
+    }
+    const jsonText = formatJSON(pack);
+    const artifactPath = path.join(cwd, '.context', 'evidence-pack.json');
+    try {
+      fs.mkdirSync(path.dirname(artifactPath), { recursive: true });
+      fs.writeFileSync(artifactPath, jsonText, 'utf8');
+      process.stderr.write(`[sigmap] evidence pack → ${path.relative(cwd, artifactPath)} (${pack.files.length} files, ${pack.grounding.symbolCount} symbols)\n`);
+    } catch (_) { /* artifact write is best-effort */ }
+    const markdown = args.includes('--markdown') || args.includes('--md');
+    const rendered = markdown ? formatMarkdown(pack) : jsonText;
+    const outIdx = args.indexOf('--out');
+    if (outIdx !== -1 && args[outIdx + 1]) {
+      const outPath = path.resolve(cwd, args[outIdx + 1]);
+      try {
+        fs.mkdirSync(path.dirname(outPath), { recursive: true });
+        fs.writeFileSync(outPath, rendered + '\n', 'utf8');
+      } catch (e) {
+        console.error('[sigmap] evidence: could not write --out ' + outPath + ': ' + e.message);
+        process.exit(1);
+      }
+    }
+    process.stdout.write(rendered + '\n');
+    process.exit(0);
+  }
   // `sigmap gain` — token-savings dashboard (totals, by-operation, trends).
   if (args[0] === 'gain') {
     const valOf = (f, d) => { const i = args.indexOf(f); return i >= 0 && args[i + 1] ? args[i + 1] : d; };

package/llms-full.txt CHANGED Viewed

@@ -9,13 +9,13 @@ the files relevant to the task — cutting tokens ~97% while keeping answers
 grounded. Deterministic, offline, no embeddings or vector database. Works with
 Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
-# Version: 7.25.2 | Benchmark: sigmap-v7.25-main (2026-06-22)
+# Version: 7.26.0 | Benchmark: sigmap-v7.26-main (2026-06-22)
 # Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
 # Regenerate: npm run generate:llms   |   Validate: npm run validate:llms
 ---
-## Core metrics (benchmark: sigmap-v7.25-main, 2026-06-22)
+## Core metrics (benchmark: sigmap-v7.26-main, 2026-06-22)
 | Metric | Without SigMap | With SigMap |
 |--------|----------------|-------------|
@@ -109,6 +109,9 @@ sigmap squeeze <file|->                  Minimize a pasted stacktrace/CI-log/JSO
 sigmap ask "<query>" --squeeze           Auto-accept input minimization (no prompt; for scripts/CI)
 sigmap ask "<query>" --no-squeeze        Disable input minimization entirely
 sigmap ask "<query>" --squeeze-threshold N  Min reduction %% to prompt (default 30)
+sigmap evidence "<query>"                Build a deterministic Evidence Pack (JSON) → .context/evidence-pack.json
+sigmap evidence "<query>" --markdown     Emit the Markdown handoff rendering to stdout
+sigmap evidence "<query>" --top <n> --budget <n> --out <path>   Tune ranked files / token budget / write rendered output
 sigmap note "<text>"                     Append a note to the cross-session decision log
 sigmap note                              List recent notes (also: note --list <N>)
 sigmap status                            Show repo state — branch, dirty files, index freshness, notes

package/llms.txt CHANGED Viewed

@@ -9,7 +9,7 @@ the files relevant to the task — cutting tokens ~97% while keeping answers
 grounded. Deterministic, offline, no embeddings or vector database. Works with
 Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
-# Version: 7.25.2 | Benchmark: sigmap-v7.25-main (2026-06-22)
+# Version: 7.26.0 | Benchmark: sigmap-v7.26-main (2026-06-22)
 # Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
 # Regenerate: npm run generate:llms   |   Validate: npm run validate:llms
@@ -21,7 +21,7 @@ Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
 - No blast-radius awareness before editing a hub file — `--impact` shows every file a change touches.
 - Pasted stack traces, CI logs, and JSON bloat the prompt — `squeeze` minimizes them and enriches the top frame from the symbol index.
-## Core metrics (benchmark: sigmap-v7.25-main, 2026-06-22)
+## Core metrics (benchmark: sigmap-v7.26-main, 2026-06-22)
 - hit@5 retrieval: 75.6% vs 13.6% random baseline (5.6× lift)
 - Token reduction: 97.0% average across benchmark repos

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap",
-  "version": "7.25.2",
+  "version": "7.26.0",
   "description": "97% token reduction for AI coding. Extracts function & class signatures with TF-IDF ranking to feed only the right files to Claude, Cursor, Copilot, Aider, Windsurf, local LLMs & MCP. Zero dependencies, runs offline via npx.",
   "main": "packages/core/index.js",
   "exports": {

package/packages/cli/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap-cli",
-  "version": "7.25.2",
+  "version": "7.26.0",
   "description": "SigMap CLI wrapper — thin adapter for programmatic CLI invocation",
   "main": "index.js",
   "keywords": [

package/packages/core/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap-core",
-  "version": "7.25.2",
+  "version": "7.26.0",
   "description": "SigMap core library — zero-dependency code signature extraction, retrieval, and security scanning",
   "main": "index.js",
   "keywords": [

package/src/evidence/pack.js ADDED Viewed

@@ -0,0 +1,267 @@
+'use strict';
+/**
+ * Evidence Pack v1 (v8.0 E1).
+ *
+ * A deterministic, machine-consumable signature-and-evidence map. Replaces the
+ * "paste this into your prompt" workflow with a byte-stable JSON artifact that
+ * an agent or CI can ingest directly — every entry anchored to a real file,
+ * symbol, and line range.
+ *
+ * Composed entirely from shipped zero-dep modules:
+ *   - retrieval/ranker        → ranked files, scores, signals
+ *   - extractors/line-anchor  → `:start-end` suffix parsing (sourceLines)
+ *   - security/scanner        → secret redaction of symbols
+ *   - crypto (node builtin)    → sha256 grounding hash
+ *
+ * Determinism: the pack carries NO wall-clock timestamp. Given an unchanged
+ * repository, `buildEvidencePack` returns a byte-identical object, and
+ * `grounding.contextHash` is stable. This is the point — the pack is auditable.
+ */
+const fs = require('fs');
+const path = require('path');
+const crypto = require('crypto');
+const { buildSigIndex, rank, detectIntent } = require('../retrieval/ranker');
+const { scan } = require('../security/scanner');
+const SCHEMA_VERSION = '1.0';
+const DEFAULT_BUDGET = 6000;
+const DEFAULT_TOP = 12;
+const GENERATED_RE = /(^|\/)(dist|build|out|vendor|node_modules)\/|\.(generated|min|bundle)\.|\.(pb|_pb)\.|\.pb\.go$|_pb2\.py$/;
+const TEST_RE = /(^|\/)(tests?|__tests__|spec|specs)\/|\.(test|spec)\.[a-z]+$|(^|\/)test_[^/]+\.py$|_test\.(go|py|rb)$/;
+const CONFIG_RE = /\.(json|ya?ml|toml|ini|conf|config|properties|env)$|(^|\/)(\.?[a-z]+rc)$|\.config\.[a-z]+$/i;
+const SECURITY_RE = /(^|\/|[._-])(auth|authn|authz|login|password|passwd|secret|credential|token|session|crypto|cipher|payment|billing|checkout|oauth|jwt|permission|acl|rbac)([._-]|\/|$)/i;
+/**
+ * Split a signature's `  :start-end` line anchor from its symbol text.
+ * @param {string} sig
+ * @returns {{ symbol: string, start: number|null, end: number|null }}
+ */
+function parseAnchor(sig) {
+  const m = /\s*:(\d+)-(\d+)\s*$/.exec(sig);
+  if (!m) return { symbol: sig.trim(), start: null, end: null };
+  return {
+    symbol: sig.slice(0, m.index).trim(),
+    start: parseInt(m[1], 10),
+    end: parseInt(m[2], 10),
+  };
+}
+/**
+ * Classify a file into a coarse risk label. Path-based heuristic (v1) — the
+ * richer label set (C3) lands in v8.5.
+ * @param {string} relPath
+ * @returns {'generated'|'test'|'config'|'security'|'source'}
+ */
+function riskLabelFor(relPath) {
+  const p = relPath.replace(/\\/g, '/');
+  if (GENERATED_RE.test(p)) return 'generated';
+  if (TEST_RE.test(p)) return 'test';
+  if (SECURITY_RE.test(p)) return 'security';
+  if (CONFIG_RE.test(p)) return 'config';
+  return 'source';
+}
+/** Filename stem (basename minus the first extension chain). */
+function stemOf(relPath) {
+  const base = path.basename(relPath);
+  return base.replace(/\.[^.]+$/, '').replace(/\.(test|spec)$/i, '');
+}
+/**
+ * Best-effort impl→test discovery (v1). Matches test files whose stem equals
+ * the implementation file's stem, by common convention. Deterministic. The
+ * accuracy-measured discovery (C2) lands in v8.5.
+ * @param {string} relPath
+ * @param {string[]} allFiles  - universe of indexed files (relative paths)
+ * @returns {string[]}
+ */
+function findRelatedTests(relPath, allFiles) {
+  if (riskLabelFor(relPath) === 'test') return [];
+  const stem = stemOf(relPath).toLowerCase();
+  if (!stem) return [];
+  const out = [];
+  for (const f of allFiles) {
+    if (f === relPath) continue;
+    if (riskLabelFor(f) !== 'test') continue;
+    if (stemOf(f).toLowerCase() === stem) out.push(f);
+  }
+  return out.sort();
+}
+/** Map a ranker `signals` object into a short human-readable reason string. */
+function reasonFor(signals) {
+  if (!signals) return 'ranked match';
+  const parts = [];
+  if (signals.symbolMatch > 0) parts.push('symbol-name match');
+  if (signals.exactToken > 0) parts.push('exact token match');
+  if (signals.prefixMatch > 0) parts.push('prefix match');
+  if (signals.pathMatch > 0) parts.push('path match');
+  if (signals.graphBoost > 0) parts.push('dependency-graph neighbor');
+  if (signals.recencyBoost > 1) parts.push('recently changed');
+  if (signals.learnedWeights && signals.learnedWeights !== 1) parts.push('learned weight');
+  return parts.length ? parts.join('; ') : 'ranked match';
+}
+/** Token estimate for a signature block (matches the ranker's heuristic). */
+function sigTokens(sigs) {
+  return Math.ceil(sigs.join('\n').length / 4);
+}
+/**
+ * Stable stringify with recursively sorted object keys, for hashing.
+ * @param {*} value
+ * @returns {string}
+ */
+function canonicalize(value) {
+  return JSON.stringify(sortKeys(value));
+}
+function sortKeys(value) {
+  if (Array.isArray(value)) return value.map(sortKeys);
+  if (value && typeof value === 'object') {
+    const out = {};
+    for (const k of Object.keys(value).sort()) out[k] = sortKeys(value[k]);
+    return out;
+  }
+  return value;
+}
+/**
+ * Build an Evidence Pack for a query.
+ *
+ * @param {string} query
+ * @param {string} cwd
+ * @param {object} [opts]
+ * @param {number} [opts.budget=6000]      - token budget for included files
+ * @param {number} [opts.top=12]           - max ranked files to consider
+ * @param {Map<string,string[]>} [opts.sigIndex] - pre-built index (else built from cwd)
+ * @returns {object} Evidence Pack v1
+ */
+function buildEvidencePack(query, cwd, opts = {}) {
+  const budget = Number.isFinite(opts.budget) ? opts.budget : DEFAULT_BUDGET;
+  const top = Number.isFinite(opts.top) ? opts.top : DEFAULT_TOP;
+  const sigIndex = opts.sigIndex instanceof Map ? opts.sigIndex : buildSigIndex(cwd);
+  const intent = detectIntent(query);
+  const allFiles = Array.from(sigIndex.keys());
+  const ranked = rank(query, sigIndex, { topK: top, cwd })
+    .filter((r) => r.score > 0 || ranked0Empty(query));
+  const maxScore = ranked.reduce((m, r) => Math.max(m, r.score), 0);
+  // Greedy budget fill in rank order; the remainder is reported as dropped.
+  const files = [];
+  const droppedFiles = [];
+  let used = 0;
+  for (const r of ranked) {
+    const tokens = sigTokens(r.sigs);
+    if (files.length > 0 && used + tokens > budget) {
+      droppedFiles.push({ path: r.file, reason: `budget: would exceed ${budget}-token limit` });
+      continue;
+    }
+    used += tokens;
+    const safe = scan(r.sigs, r.file).safe;
+    const symbols = [];
+    const sourceLines = [];
+    for (const sig of safe) {
+      const { symbol, start, end } = parseAnchor(sig);
+      symbols.push(symbol);
+      if (start !== null) sourceLines.push({ symbol, start, end });
+    }
+    files.push({
+      path: r.file,
+      symbols,
+      reason: reasonFor(r.signals),
+      confidence: maxScore > 0 ? Math.round((r.score / maxScore) * 100) / 100 : 0,
+      sourceLines,
+      relatedTests: findRelatedTests(r.file, allFiles),
+      riskLabel: riskLabelFor(r.file),
+    });
+  }
+  const symbolCount = files.reduce((n, f) => n + f.symbols.length, 0);
+  const anchoredSymbols = files.reduce((n, f) => n + f.sourceLines.length, 0);
+  const pack = {
+    schemaVersion: SCHEMA_VERSION,
+    query,
+    intent,
+    files,
+    tokenBudget: { limit: budget, used, remaining: Math.max(0, budget - used) },
+    droppedFiles,
+    grounding: {
+      symbolCount,
+      anchoredSymbols,
+      anchorCoverage: symbolCount > 0 ? Math.round((anchoredSymbols / symbolCount) * 1000) / 1000 : 0,
+      contextHash: null,
+      deterministic: true,
+    },
+  };
+  // Hash everything except the hash field itself.
+  const forHash = Object.assign({}, pack, {
+    grounding: Object.assign({}, pack.grounding, { contextHash: undefined }),
+  });
+  pack.grounding.contextHash = 'sha256:' + crypto.createHash('sha256').update(canonicalize(forHash)).digest('hex');
+  return pack;
+}
+// rank() returns [] for an empty/whitespace query; keep the filter readable.
+function ranked0Empty(query) {
+  return !query || !query.trim();
+}
+/** Pretty-printed canonical JSON rendering of a pack. */
+function formatJSON(pack) {
+  return JSON.stringify(pack, null, 2);
+}
+/** Markdown handoff rendering of a pack. */
+function formatMarkdown(pack) {
+  const L = [];
+  L.push(`# Evidence Pack — \`${pack.query}\``);
+  L.push('');
+  L.push(`- **Schema:** v${pack.schemaVersion}`);
+  L.push(`- **Intent:** ${pack.intent}`);
+  L.push(`- **Budget:** ${pack.tokenBudget.used} / ${pack.tokenBudget.limit} tokens used (${pack.tokenBudget.remaining} remaining)`);
+  L.push(`- **Grounding:** ${pack.grounding.anchoredSymbols}/${pack.grounding.symbolCount} symbols anchored (${Math.round(pack.grounding.anchorCoverage * 100)}%)`);
+  L.push(`- **Hash:** \`${pack.grounding.contextHash}\``);
+  L.push('');
+  for (const f of pack.files) {
+    L.push(`## \`${f.path}\`  _(${f.riskLabel}, confidence ${f.confidence})_`);
+    L.push(`_${f.reason}_`);
+    if (f.relatedTests.length) L.push(`Related tests: ${f.relatedTests.map((t) => `\`${t}\``).join(', ')}`);
+    L.push('');
+    L.push('```');
+    for (const s of f.symbols) L.push(s);
+    L.push('```');
+    L.push('');
+  }
+  if (pack.droppedFiles.length) {
+    L.push('## Dropped (over budget)');
+    for (const d of pack.droppedFiles) L.push(`- \`${d.path}\` — ${d.reason}`);
+    L.push('');
+  }
+  return L.join('\n');
+}
+module.exports = {
+  buildEvidencePack,
+  formatJSON,
+  formatMarkdown,
+  parseAnchor,
+  riskLabelFor,
+  findRelatedTests,
+  SCHEMA_VERSION,
+};

package/src/mcp/server.js CHANGED Viewed

@@ -18,7 +18,7 @@ const { readContext, searchSignatures, getMap, createCheckpoint, getRouting, exp
 const SERVER_INFO = {
   name: 'sigmap',
-  version: '7.25.2',
+  version: '7.26.0',
   description: 'SigMap MCP server — code signatures on demand',
 };