npm - sigmap - Versions diffs - 8.3.0 → 8.5.0 - Mend

sigmap 8.3.0 → 8.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/CHANGELOG.md +14 -0
package/README.md +1 -1
package/gen-context.js +232 -6
package/llms-full.txt +4 -3
package/llms.txt +2 -2
package/package.json +1 -1
package/packages/cli/package.json +1 -1
package/packages/core/package.json +1 -1
package/src/mcp/server.js +1 -1
package/src/retrieval/bm25.js +76 -3
package/src/review/pr-evidence.js +139 -0

package/CHANGELOG.md CHANGED Viewed

@@ -10,6 +10,20 @@ Format: [Semantic Versioning](https://semver.org/)
 ---
+## [8.5.0] — 2026-07-05
+Minor release — **deterministic query expansion (a vocabulary-mismatch recall aid).** The BM25 ranker now bridges common code-domain synonyms/abbreviations so a query for `authentication` can still surface a file whose signatures only say `auth`. Zero-dependency, deterministic. **Honest framing:** measured on the retrieval benchmark, this is **benchmark-neutral** (hit@5 unchanged within the harness's 86.7–87.8% noise band at the shipped weight) — not a hit@5 improvement. The benefit is for real users whose query vocabulary differs from the code, a case the curated benchmark doesn't exercise.
+### Added
+- **Query expansion (#421, PR #422):** `src/retrieval/bm25.js` gains a curated, high-precision synonym/abbreviation table (`auth`↔`authentication`/`login`, `db`↔`database`, `ctx`↔`context`, `config`↔`configuration`, `req`/`res`, `init`, `impl`, …). `expandQuery()` adds synonyms to the query tokens at a **discount weight (0.15)** so an exact-term match always outranks a synonym-only match; documents are unchanged. Wired through the ranker, so `sigmap ask`, `--query`, and MCP `query_context` all benefit. A weight sweep confirmed higher weights regress retrieval, so 0.15 (benchmark-neutral) is the shipped setting.
+## [8.4.0] — 2026-07-05
+Minor release — **PR Evidence Report (v9.0 G3): a branded, deterministic review artifact.** SigMap already had the pieces — `review-pr` findings and `get_diff_context` — but no single Markdown comment an agent or CI could post on a PR. This adds it: one report that answers *"what changed, what it touches, and what to test"*, with no LLM.
+### Added
+- **PR Evidence Report (#417, PR #418):** new `src/review/pr-evidence.js` — `buildPrEvidence(changedFiles, cwd)` folds together, per changed file, its extracted **signatures**, **blast radius** (direct/transitive importers, impacted tests + routes), cross-language **related tests**, a **risk label**, and the **`review-pr` findings** (scope drift, god-node edits, missing tests, security-sensitive files). `formatPrEvidenceMarkdown` renders the branded **"🔍 PR Evidence Report"** — with **no wall-clock timestamp**, so it's byte-stable given a fixed tree (diff-friendly as a comment). Exposed via `sigmap review-pr --markdown` (alias `--evidence`); honors `--staged`/`--base`; the exit code reflects the review pass/fail so CI can both post the comment and gate on it. Reuses shipped zero-dep modules only; git stays behind the shell-free `git()` util.
 ## [8.3.0] — 2026-07-05
 Minor release — **Python site-packages grounding: the moat now spans both major ecosystems.** v8.1/v8.2 built local-library grounding for JS/TS (`node_modules` `.d.ts`); this extends it to **Python**, so `verify-ai-output` and the `verify_suggestion` MCP tool ground AI-suggested Python code against the libraries actually installed in the project's venv — with pinned versions (D8). Zero-dependency, no Python runtime, deterministic.

package/README.md CHANGED Viewed

@@ -98,7 +98,7 @@ Ask → Rank → Context → Validate → Judge → Learn
 <!--SM:benchmarkBlock-->
 ```
-Benchmark : sigmap-v8.3-main (21 repositories, including R language)
+Benchmark : sigmap-v8.5-main (21 repositories, including R language)
 Date      : 2026-07-04
 Hit@5          : 86.7%   (baseline 13.6%  — 6.4× lift)

package/gen-context.js CHANGED Viewed

@@ -13108,7 +13108,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
   const SERVER_INFO = {
     name: 'sigmap',
-    version: '8.3.0',
+    version: '8.5.0',
     description: 'SigMap MCP server — code signatures on demand',
   };
@@ -13923,6 +13923,78 @@ __factories["./src/retrieval/bm25"] = function(module, exports) {
   // are counted PATH_BOOST times when building the document term-frequency map.
   const PATH_BOOST = 3;
+  // Curated, high-precision code-domain synonym / abbreviation expansions. A query
+  // for `authentication` should still surface a file whose signatures only say
+  // `auth`. Kept deliberately tight — over-broad synonyms hurt precision. Groups
+  // are expanded bidirectionally (every member maps to the others). Values are
+  // tokenized+stemmed at load, so entries are written in natural form.
+  const EXPANSION_GROUPS = [
+    ['auth', 'authenticate', 'authentication', 'login', 'signin', 'credential'],
+    ['authorize', 'authorization', 'permission', 'access'],
+    ['config', 'configuration', 'settings', 'options'],
+    ['db', 'database'],
+    ['ctx', 'context'],
+    ['req', 'request'],
+    ['res', 'response'],
+    ['err', 'error'],
+    ['msg', 'message'],
+    ['init', 'initialize', 'initialization', 'setup'],
+    ['async', 'asynchronous'],
+    ['sync', 'synchronize', 'synchronous'],
+    ['repo', 'repository'],
+    ['impl', 'implementation'],
+    ['util', 'utility', 'helper'],
+    ['param', 'parameter', 'argument'],
+    ['fn', 'func', 'function'],
+    ['btn', 'button'],
+    ['calc', 'calculate', 'calculation'],
+    ['gen', 'generate', 'generator'],
+    ['val', 'validate', 'validation'],
+    ['del', 'delete', 'remove'],
+    ['dir', 'directory', 'folder'],
+    ['env', 'environment'],
+    ['doc', 'document', 'documentation'],
+    ['id', 'identifier'],
+    ['num', 'number'],
+    ['str', 'string'],
+  ];
+  // The weight applied to an expanded (synonym) query term, so an exact match on
+  // the literal query token always outranks a synonym-only match.
+  const EXPANSION_WEIGHT = 0.15;
+  // Build a stemmed lookup: stem(member) → Set of the group's other stemmed members.
+  const EXPANSIONS = (() => {
+    const map = new Map();
+    for (const group of EXPANSION_GROUPS) {
+      const stemmed = [...new Set(group.map((w) => tokenize(w).join('')).filter(Boolean))];
+      for (const s of stemmed) {
+        if (!map.has(s)) map.set(s, new Set());
+        for (const other of stemmed) if (other !== s) map.get(s).add(other);
+      }
+    }
+    return map;
+  })();
+  /**
+   * Expand stemmed query tokens with curated synonyms. Returns a Map of
+   * token → weight (1 for the original query tokens, EXPANSION_WEIGHT for
+   * synonyms). Original tokens always keep full weight even if also a synonym.
+   *
+   * @param {string[]} qToks  stemmed, de-duplicated query tokens
+   * @returns {Map<string, number>}
+   */
+  function expandQuery(qToks) {
+    const weights = new Map();
+    for (const t of qToks) weights.set(t, 1);
+    for (const t of qToks) {
+      const syns = EXPANSIONS.get(t);
+      if (!syns) continue;
+      for (const s of syns) if (!weights.has(s)) weights.set(s, EXPANSION_WEIGHT);
+    }
+    return weights;
+  }
   /**
    * BM25 re-rank of candidates against a query. Each candidate is
    * `{ file, sigs }`; the returned objects preserve all original candidate
@@ -13958,23 +14030,24 @@ __factories["./src/retrieval/bm25"] = function(module, exports) {
     }
     const qToks = [...new Set(tokenize(query))];
+    const qWeights = expandQuery(qToks); // token → weight (1 exact, <1 synonym)
     return docs
       .map((d) => {
         let score = 0;
-        for (const t of qToks) {
+        for (const [t, w] of qWeights) {
           const f = d.tf.get(t);
           if (!f) continue;
           const dfT = df.get(t);
           const idf = Math.log(1 + (N - dfT + 0.5) / (dfT + 0.5));
-          score += (idf * (f * (k1 + 1))) / (f + k1 * (1 - b + (b * d.len) / avgdl));
+          score += w * ((idf * (f * (k1 + 1))) / (f + k1 * (1 - b + (b * d.len) / avgdl)));
         }
         return Object.assign({}, d.cand, { score });
       })
       .sort((a, c) => c.score - a.score || String(a.file).localeCompare(String(c.file)));
   }
-  module.exports = { tokenize, stem, bm25rank, PATH_BOOST, STOP };
+  module.exports = { tokenize, stem, bm25rank, PATH_BOOST, STOP, expandQuery, EXPANSIONS, EXPANSION_WEIGHT };
 };
@@ -14610,6 +14683,149 @@ __factories["./src/retrieval/tokenizer"] = function(module, exports) {
 };
+// ── ./src/review/pr-evidence ──
+__factories["./src/review/pr-evidence"] = function(module, exports) {
+  /**
+   * PR Evidence Report (v9.0 G3).
+   *
+   * A single, branded, deterministic Markdown artifact for code review: for each
+   * changed file it folds together the signature context, blast radius (direct /
+   * transitive importers, impacted tests + routes), cross-language related tests,
+   * a risk label, and the `review-pr` findings (scope drift, god-node edits,
+   * missing tests, security-sensitive files). Posted as a PR comment, it answers
+   * "what changed, what it touches, and what to test" — without an LLM.
+   *
+   * Built entirely from shipped zero-dep modules (reviewPr, graph/impact,
+   * evidence/pack, extractors/dispatch). Carries NO wall-clock timestamp, so the
+   * report is byte-stable given a fixed tree — diff-friendly as a comment.
+   */
+  const fs = require('fs');
+  const path = require('path');
+  const { reviewPr } = __require('./src/review/review-pr');
+  /**
+   * Build the structured PR evidence for a changed-file list.
+   * @param {Array<{path:string,status?:string}>|string[]} changedFiles
+   * @param {string} cwd
+   * @param {object} [opts]
+   * @param {number} [opts.depth=2]   blast-radius BFS depth
+   * @param {string} [opts.scope]     label for the diff scope (e.g. "vs main")
+   * @returns {{ scope:string, files:object[], review:object }}
+   */
+  function buildPrEvidence(changedFiles, cwd, opts = {}) {
+    const files = (changedFiles || []).map((f) =>
+      typeof f === 'string' ? { path: f, status: 'M' } : { path: f.path, status: f.status || 'M' });
+    const review = reviewPr(files, cwd, opts);
+    let riskLabelFor = () => 'source';
+    let findRelatedTests = () => [];
+    try { ({ riskLabelFor, findRelatedTests } = __require('./src/evidence/pack')); } catch (_) { /* defaults */ }
+    const { extractFile, langFor } = __require('./src/extractors/dispatch');
+    let allFiles = [];
+    try { const { buildSigIndex } = __require('./src/retrieval/ranker'); allFiles = [...buildSigIndex(cwd).keys()]; } catch (_) { /* no index */ }
+    const depth = Number.isFinite(opts.depth) ? opts.depth : 2;
+    const srcPaths = files.filter((f) => f.status !== 'D' && langFor(f.path)).map((f) => f.path);
+    let impactByFile = new Map();
+    try {
+      const { analyzeImpact } = __require('./src/graph/impact');
+      impactByFile = new Map(analyzeImpact(srcPaths, cwd, { depth }).map((r) => [r.file, r.impact]));
+    } catch (_) { /* graph optional */ }
+    const fileReports = files.map((f) => {
+      const deleted = f.status === 'D';
+      let signatures = [];
+      if (!deleted && langFor(f.path)) {
+        try { signatures = extractFile(f.path, fs.readFileSync(path.resolve(cwd, f.path), 'utf8')); } catch (_) { /* unreadable */ }
+      }
+      const impact = impactByFile.get(f.path) || null;
+      return {
+        path: f.path,
+        status: f.status,
+        riskLabel: riskLabelFor(f.path),
+        signatures,
+        blast: impact ? {
+          total: impact.totalImpact,
+          direct: impact.direct || [],
+          transitive: (impact.transitive || []).length,
+          tests: impact.tests || [],
+          routes: impact.routes || [],
+        } : null,
+        relatedTests: deleted ? [] : findRelatedTests(f.path, allFiles),
+      };
+    });
+    return { scope: opts.scope || 'diff', files: fileReports, review };
+  }
+  const STATUS_LABEL = { M: 'modified', A: 'added', D: 'deleted', R: 'renamed', C: 'copied' };
+  /** Render the branded, deterministic "PR Evidence Report" Markdown. */
+  function formatPrEvidenceMarkdown(evidence, opts = {}) {
+    const L = [];
+    const s = evidence.review.summary;
+    const maxSigs = Number.isFinite(opts.maxSignatures) ? opts.maxSignatures : 30;
+    L.push('## 🔍 PR Evidence Report');
+    L.push('');
+    L.push(
+      `**${s.filesChanged} file(s) changed** — ${s.sourceChanged} source, ${s.testsChanged} test · ` +
+      (s.ok ? '✅ no review findings' : `⚠️ ${s.findings} finding(s)`) +
+      ` · scope: ${evidence.scope}`
+    );
+    L.push('');
+    if (!s.ok) {
+      L.push('### Review findings');
+      for (const f of evidence.review.findings) {
+        if (f.type === 'missing-tests') L.push(`- ⚠️ **missing tests** — \`${f.file}\` changed with no matching test`);
+        else if (f.type === 'security-file') L.push(`- ⚠️ **security-sensitive file** — \`${f.file}\``);
+        else if (f.type === 'god-node') L.push(`- ⚠️ **god node** — \`${f.file}\` → ${f.count} dependents (high blast radius)`);
+        else if (f.type === 'scope-drift') L.push(`- ⚠️ **scope drift** — ${f.count} top-level dirs touched (${f.dirs.join(', ')})`);
+      }
+      L.push('');
+    }
+    L.push('### Changed files');
+    for (const f of evidence.files) {
+      const st = STATUS_LABEL[f.status] || f.status;
+      L.push(`#### \`${f.path}\`  _(${st} · risk: ${f.riskLabel})_`);
+      if (f.status === 'D') { L.push('_deleted_', ''); continue; }
+      if (f.blast) {
+        L.push(
+          `**Blast radius:** ${f.blast.total} file(s) impacted — ${f.blast.direct.length} direct, ${f.blast.transitive} transitive` +
+          (f.blast.tests.length ? `, ${f.blast.tests.length} test(s)` : '') +
+          (f.blast.routes.length ? `, ${f.blast.routes.length} route(s)` : '')
+        );
+        if (f.blast.tests.length) L.push(`Tests to run: ${f.blast.tests.slice(0, 8).map((t) => '`' + t + '`').join(', ')}`);
+      } else {
+        L.push('**Blast radius:** _(not in dependency graph — new or leaf file)_');
+      }
+      if (f.relatedTests.length) L.push(`Related tests: ${f.relatedTests.slice(0, 8).map((t) => '`' + t + '`').join(', ')}`);
+      if (f.signatures.length) {
+        L.push('```');
+        for (const sig of f.signatures.slice(0, maxSigs)) L.push(sig);
+        if (f.signatures.length > maxSigs) L.push(`… +${f.signatures.length - maxSigs} more`);
+        L.push('```');
+      }
+      L.push('');
+    }
+    L.push('---');
+    L.push('_Deterministic PR Evidence Report — generated by [SigMap](https://sigmap.io). No LLM; byte-stable given a fixed tree._');
+    return L.join('\n');
+  }
+  module.exports = { buildPrEvidence, formatPrEvidenceMarkdown };
+};
 // ── ./src/review/review-pr ──
 __factories["./src/review/review-pr"] = function(module, exports) {
@@ -17431,7 +17647,7 @@ function __tryGit(args, opts = {}) {
   catch (_) { return ''; }
 }
-const VERSION = '8.3.0';
+const VERSION = '8.5.0';
 const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
 function requireSourceOrBundled(key) {
@@ -19232,7 +19448,8 @@ Usage:
   ${cmd} conventions                       Extract repo file-naming/export/test conventions (--conflicts, --inject, --report, --fix)
   ${cmd} scaffold "<name>"                 Propose a convention-matched file/dir scaffold (--ext, --threshold, --force, --json)
   ${cmd} verify-plan <plan.md|->           Check a plan vs the live index — files/symbols exist, blast radius, scope (--json)
-  ${cmd} review-pr                         Audit a diff — scope drift, god-node edits, missing tests, security files (--staged, --json)
+  ${cmd} review-pr                         Audit a diff — scope drift, god-node edits, missing tests, security files (--staged, --base, --json, --markdown)
+  ${cmd} review-pr --markdown              PR Evidence Report — branded Markdown (signatures + blast radius + tests) to post as a PR comment
   ${cmd} create "<task>"                   Grounded-creation pipeline: scaffold → verify-plan → verify-ai-output → review-pr (--staged)
   ${cmd} squeeze <file|->                  Minimize a pasted stacktrace/CI-log/JSON blob (--json for stats)
   ${cmd} ask "<query>" --squeeze           Auto-accept input minimization (no prompt; for scripts/CI)
@@ -21173,6 +21390,15 @@ function main() {
       return { path: file, status };
     });
+    // --markdown / --evidence: emit the branded, deterministic PR Evidence Report.
+    if (args.includes('--markdown') || args.includes('--evidence')) {
+      const { buildPrEvidence, formatPrEvidenceMarkdown } = requireSourceOrBundled('./src/review/pr-evidence');
+      const scope = staged ? 'staged' : (baseArg ? `vs ${baseArg}` : 'branch');
+      const ev = buildPrEvidence(changedFiles, cwd, { scope });
+      process.stdout.write(formatPrEvidenceMarkdown(ev) + '\n');
+      process.exit(ev.review.summary.ok ? 0 : 1);
+    }
     const { reviewPr } = requireSourceOrBundled('./src/review/review-pr');
     const result = reviewPr(changedFiles, cwd, {});

package/llms-full.txt CHANGED Viewed

@@ -11,13 +11,13 @@ ranking keeps the relevant context in scope (cutting tokens ~97% as a side
 effect), with no LLM calls, embeddings, or vector database. Works with Claude,
 Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
-# Version: 8.3.0 | Benchmark: sigmap-v8.3-main (2026-07-04)
+# Version: 8.5.0 | Benchmark: sigmap-v8.5-main (2026-07-04)
 # Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
 # Regenerate: npm run generate:llms   |   Validate: npm run validate:llms
 ---
-## Core metrics (benchmark: sigmap-v8.3-main, 2026-07-04)
+## Core metrics (benchmark: sigmap-v8.5-main, 2026-07-04)
 | Metric | Without SigMap | With SigMap |
 |--------|----------------|-------------|
@@ -105,7 +105,8 @@ sigmap verify-ai-output <answer.md> --report  Write a standalone HTML report (re
 sigmap conventions                       Extract repo file-naming/export/test conventions (--conflicts, --inject, --report, --fix)
 sigmap scaffold "<name>"                 Propose a convention-matched file/dir scaffold (--ext, --threshold, --force, --json)
 sigmap verify-plan <plan.md|->           Check a plan vs the live index — files/symbols exist, blast radius, scope (--json)
-sigmap review-pr                         Audit a diff — scope drift, god-node edits, missing tests, security files (--staged, --json)
+sigmap review-pr                         Audit a diff — scope drift, god-node edits, missing tests, security files (--staged, --base, --json, --markdown)
+sigmap review-pr --markdown              PR Evidence Report — branded Markdown (signatures + blast radius + tests) to post as a PR comment
 sigmap create "<task>"                   Grounded-creation pipeline: scaffold → verify-plan → verify-ai-output → review-pr (--staged)
 sigmap squeeze <file|->                  Minimize a pasted stacktrace/CI-log/JSON blob (--json for stats)
 sigmap ask "<query>" --squeeze           Auto-accept input minimization (no prompt; for scripts/CI)

package/llms.txt CHANGED Viewed

@@ -11,7 +11,7 @@ ranking keeps the relevant context in scope (cutting tokens ~97% as a side
 effect), with no LLM calls, embeddings, or vector database. Works with Claude,
 Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
-# Version: 8.3.0 | Benchmark: sigmap-v8.3-main (2026-07-04)
+# Version: 8.5.0 | Benchmark: sigmap-v8.5-main (2026-07-04)
 # Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
 # Regenerate: npm run generate:llms   |   Validate: npm run validate:llms
@@ -23,7 +23,7 @@ Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
 - No blast-radius awareness before editing a hub file — `--impact` shows every file a change touches.
 - Pasted stack traces, CI logs, and JSON bloat the prompt — `squeeze` minimizes them and enriches the top frame from the symbol index.
-## Core metrics (benchmark: sigmap-v8.3-main, 2026-07-04)
+## Core metrics (benchmark: sigmap-v8.5-main, 2026-07-04)
 - hit@5 retrieval: 86.7% vs 13.6% random baseline (6.4× lift)
 - Token reduction: 97.0% average across benchmark repos

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap",
-  "version": "8.3.0",
+  "version": "8.5.0",
   "description": "97% token reduction for AI coding. Extracts function & class signatures with TF-IDF ranking to feed only the right files to Claude, Cursor, Copilot, Aider, Windsurf, local LLMs & MCP. Zero dependencies, runs offline via npx.",
   "main": "packages/core/index.js",
   "exports": {

package/packages/cli/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap-cli",
-  "version": "8.3.0",
+  "version": "8.5.0",
   "description": "SigMap CLI wrapper — thin adapter for programmatic CLI invocation",
   "main": "index.js",
   "keywords": [

package/packages/core/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap-core",
-  "version": "8.3.0",
+  "version": "8.5.0",
   "description": "SigMap core library — zero-dependency code signature extraction, retrieval, and security scanning",
   "main": "index.js",
   "keywords": [

package/src/mcp/server.js CHANGED Viewed

@@ -18,7 +18,7 @@ const { readContext, searchSignatures, getMap, createCheckpoint, getRouting, exp
 const SERVER_INFO = {
   name: 'sigmap',
-  version: '8.3.0',
+  version: '8.5.0',
   description: 'SigMap MCP server — code signatures on demand',
 };

package/src/retrieval/bm25.js CHANGED Viewed

@@ -68,6 +68,78 @@ function tokenize(text) {
 // are counted PATH_BOOST times when building the document term-frequency map.
 const PATH_BOOST = 3;
+// Curated, high-precision code-domain synonym / abbreviation expansions. A query
+// for `authentication` should still surface a file whose signatures only say
+// `auth`. Kept deliberately tight — over-broad synonyms hurt precision. Groups
+// are expanded bidirectionally (every member maps to the others). Values are
+// tokenized+stemmed at load, so entries are written in natural form.
+const EXPANSION_GROUPS = [
+  ['auth', 'authenticate', 'authentication', 'login', 'signin', 'credential'],
+  ['authorize', 'authorization', 'permission', 'access'],
+  ['config', 'configuration', 'settings', 'options'],
+  ['db', 'database'],
+  ['ctx', 'context'],
+  ['req', 'request'],
+  ['res', 'response'],
+  ['err', 'error'],
+  ['msg', 'message'],
+  ['init', 'initialize', 'initialization', 'setup'],
+  ['async', 'asynchronous'],
+  ['sync', 'synchronize', 'synchronous'],
+  ['repo', 'repository'],
+  ['impl', 'implementation'],
+  ['util', 'utility', 'helper'],
+  ['param', 'parameter', 'argument'],
+  ['fn', 'func', 'function'],
+  ['btn', 'button'],
+  ['calc', 'calculate', 'calculation'],
+  ['gen', 'generate', 'generator'],
+  ['val', 'validate', 'validation'],
+  ['del', 'delete', 'remove'],
+  ['dir', 'directory', 'folder'],
+  ['env', 'environment'],
+  ['doc', 'document', 'documentation'],
+  ['id', 'identifier'],
+  ['num', 'number'],
+  ['str', 'string'],
+];
+// The weight applied to an expanded (synonym) query term, so an exact match on
+// the literal query token always outranks a synonym-only match.
+const EXPANSION_WEIGHT = 0.15;
+// Build a stemmed lookup: stem(member) → Set of the group's other stemmed members.
+const EXPANSIONS = (() => {
+  const map = new Map();
+  for (const group of EXPANSION_GROUPS) {
+    const stemmed = [...new Set(group.map((w) => tokenize(w).join('')).filter(Boolean))];
+    for (const s of stemmed) {
+      if (!map.has(s)) map.set(s, new Set());
+      for (const other of stemmed) if (other !== s) map.get(s).add(other);
+    }
+  }
+  return map;
+})();
+/**
+ * Expand stemmed query tokens with curated synonyms. Returns a Map of
+ * token → weight (1 for the original query tokens, EXPANSION_WEIGHT for
+ * synonyms). Original tokens always keep full weight even if also a synonym.
+ *
+ * @param {string[]} qToks  stemmed, de-duplicated query tokens
+ * @returns {Map<string, number>}
+ */
+function expandQuery(qToks) {
+  const weights = new Map();
+  for (const t of qToks) weights.set(t, 1);
+  for (const t of qToks) {
+    const syns = EXPANSIONS.get(t);
+    if (!syns) continue;
+    for (const s of syns) if (!weights.has(s)) weights.set(s, EXPANSION_WEIGHT);
+  }
+  return weights;
+}
 /**
  * BM25 re-rank of candidates against a query. Each candidate is
  * `{ file, sigs }`; the returned objects preserve all original candidate
@@ -103,20 +175,21 @@ function bm25rank(query, candidates) {
   }
   const qToks = [...new Set(tokenize(query))];
+  const qWeights = expandQuery(qToks); // token → weight (1 exact, <1 synonym)
   return docs
     .map((d) => {
       let score = 0;
-      for (const t of qToks) {
+      for (const [t, w] of qWeights) {
         const f = d.tf.get(t);
         if (!f) continue;
         const dfT = df.get(t);
         const idf = Math.log(1 + (N - dfT + 0.5) / (dfT + 0.5));
-        score += (idf * (f * (k1 + 1))) / (f + k1 * (1 - b + (b * d.len) / avgdl));
+        score += w * ((idf * (f * (k1 + 1))) / (f + k1 * (1 - b + (b * d.len) / avgdl)));
       }
       return Object.assign({}, d.cand, { score });
     })
     .sort((a, c) => c.score - a.score || String(a.file).localeCompare(String(c.file)));
 }
-module.exports = { tokenize, stem, bm25rank, PATH_BOOST, STOP };
+module.exports = { tokenize, stem, bm25rank, PATH_BOOST, STOP, expandQuery, EXPANSIONS, EXPANSION_WEIGHT };

package/src/review/pr-evidence.js ADDED Viewed

@@ -0,0 +1,139 @@
+'use strict';
+/**
+ * PR Evidence Report (v9.0 G3).
+ *
+ * A single, branded, deterministic Markdown artifact for code review: for each
+ * changed file it folds together the signature context, blast radius (direct /
+ * transitive importers, impacted tests + routes), cross-language related tests,
+ * a risk label, and the `review-pr` findings (scope drift, god-node edits,
+ * missing tests, security-sensitive files). Posted as a PR comment, it answers
+ * "what changed, what it touches, and what to test" — without an LLM.
+ *
+ * Built entirely from shipped zero-dep modules (reviewPr, graph/impact,
+ * evidence/pack, extractors/dispatch). Carries NO wall-clock timestamp, so the
+ * report is byte-stable given a fixed tree — diff-friendly as a comment.
+ */
+const fs = require('fs');
+const path = require('path');
+const { reviewPr } = require('./review-pr');
+/**
+ * Build the structured PR evidence for a changed-file list.
+ * @param {Array<{path:string,status?:string}>|string[]} changedFiles
+ * @param {string} cwd
+ * @param {object} [opts]
+ * @param {number} [opts.depth=2]   blast-radius BFS depth
+ * @param {string} [opts.scope]     label for the diff scope (e.g. "vs main")
+ * @returns {{ scope:string, files:object[], review:object }}
+ */
+function buildPrEvidence(changedFiles, cwd, opts = {}) {
+  const files = (changedFiles || []).map((f) =>
+    typeof f === 'string' ? { path: f, status: 'M' } : { path: f.path, status: f.status || 'M' });
+  const review = reviewPr(files, cwd, opts);
+  let riskLabelFor = () => 'source';
+  let findRelatedTests = () => [];
+  try { ({ riskLabelFor, findRelatedTests } = require('../evidence/pack')); } catch (_) { /* defaults */ }
+  const { extractFile, langFor } = require('../extractors/dispatch');
+  let allFiles = [];
+  try { const { buildSigIndex } = require('../retrieval/ranker'); allFiles = [...buildSigIndex(cwd).keys()]; } catch (_) { /* no index */ }
+  const depth = Number.isFinite(opts.depth) ? opts.depth : 2;
+  const srcPaths = files.filter((f) => f.status !== 'D' && langFor(f.path)).map((f) => f.path);
+  let impactByFile = new Map();
+  try {
+    const { analyzeImpact } = require('../graph/impact');
+    impactByFile = new Map(analyzeImpact(srcPaths, cwd, { depth }).map((r) => [r.file, r.impact]));
+  } catch (_) { /* graph optional */ }
+  const fileReports = files.map((f) => {
+    const deleted = f.status === 'D';
+    let signatures = [];
+    if (!deleted && langFor(f.path)) {
+      try { signatures = extractFile(f.path, fs.readFileSync(path.resolve(cwd, f.path), 'utf8')); } catch (_) { /* unreadable */ }
+    }
+    const impact = impactByFile.get(f.path) || null;
+    return {
+      path: f.path,
+      status: f.status,
+      riskLabel: riskLabelFor(f.path),
+      signatures,
+      blast: impact ? {
+        total: impact.totalImpact,
+        direct: impact.direct || [],
+        transitive: (impact.transitive || []).length,
+        tests: impact.tests || [],
+        routes: impact.routes || [],
+      } : null,
+      relatedTests: deleted ? [] : findRelatedTests(f.path, allFiles),
+    };
+  });
+  return { scope: opts.scope || 'diff', files: fileReports, review };
+}
+const STATUS_LABEL = { M: 'modified', A: 'added', D: 'deleted', R: 'renamed', C: 'copied' };
+/** Render the branded, deterministic "PR Evidence Report" Markdown. */
+function formatPrEvidenceMarkdown(evidence, opts = {}) {
+  const L = [];
+  const s = evidence.review.summary;
+  const maxSigs = Number.isFinite(opts.maxSignatures) ? opts.maxSignatures : 30;
+  L.push('## 🔍 PR Evidence Report');
+  L.push('');
+  L.push(
+    `**${s.filesChanged} file(s) changed** — ${s.sourceChanged} source, ${s.testsChanged} test · ` +
+    (s.ok ? '✅ no review findings' : `⚠️ ${s.findings} finding(s)`) +
+    ` · scope: ${evidence.scope}`
+  );
+  L.push('');
+  if (!s.ok) {
+    L.push('### Review findings');
+    for (const f of evidence.review.findings) {
+      if (f.type === 'missing-tests') L.push(`- ⚠️ **missing tests** — \`${f.file}\` changed with no matching test`);
+      else if (f.type === 'security-file') L.push(`- ⚠️ **security-sensitive file** — \`${f.file}\``);
+      else if (f.type === 'god-node') L.push(`- ⚠️ **god node** — \`${f.file}\` → ${f.count} dependents (high blast radius)`);
+      else if (f.type === 'scope-drift') L.push(`- ⚠️ **scope drift** — ${f.count} top-level dirs touched (${f.dirs.join(', ')})`);
+    }
+    L.push('');
+  }
+  L.push('### Changed files');
+  for (const f of evidence.files) {
+    const st = STATUS_LABEL[f.status] || f.status;
+    L.push(`#### \`${f.path}\`  _(${st} · risk: ${f.riskLabel})_`);
+    if (f.status === 'D') { L.push('_deleted_', ''); continue; }
+    if (f.blast) {
+      L.push(
+        `**Blast radius:** ${f.blast.total} file(s) impacted — ${f.blast.direct.length} direct, ${f.blast.transitive} transitive` +
+        (f.blast.tests.length ? `, ${f.blast.tests.length} test(s)` : '') +
+        (f.blast.routes.length ? `, ${f.blast.routes.length} route(s)` : '')
+      );
+      if (f.blast.tests.length) L.push(`Tests to run: ${f.blast.tests.slice(0, 8).map((t) => '`' + t + '`').join(', ')}`);
+    } else {
+      L.push('**Blast radius:** _(not in dependency graph — new or leaf file)_');
+    }
+    if (f.relatedTests.length) L.push(`Related tests: ${f.relatedTests.slice(0, 8).map((t) => '`' + t + '`').join(', ')}`);
+    if (f.signatures.length) {
+      L.push('```');
+      for (const sig of f.signatures.slice(0, maxSigs)) L.push(sig);
+      if (f.signatures.length > maxSigs) L.push(`… +${f.signatures.length - maxSigs} more`);
+      L.push('```');
+    }
+    L.push('');
+  }
+  L.push('---');
+  L.push('_Deterministic PR Evidence Report — generated by [SigMap](https://sigmap.io). No LLM; byte-stable given a fixed tree._');
+  return L.join('\n');
+}
+module.exports = { buildPrEvidence, formatPrEvidenceMarkdown };