sigmap 8.3.0 → 8.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -10,6 +10,20 @@ Format: [Semantic Versioning](https://semver.org/)
10
10
 
11
11
  ---
12
12
 
13
+ ## [8.5.0] — 2026-07-05
14
+
15
+ Minor release — **deterministic query expansion (a vocabulary-mismatch recall aid).** The BM25 ranker now bridges common code-domain synonyms/abbreviations so a query for `authentication` can still surface a file whose signatures only say `auth`. Zero-dependency, deterministic. **Honest framing:** measured on the retrieval benchmark, this is **benchmark-neutral** (hit@5 unchanged within the harness's 86.7–87.8% noise band at the shipped weight) — not a hit@5 improvement. The benefit is for real users whose query vocabulary differs from the code, a case the curated benchmark doesn't exercise.
16
+
17
+ ### Added
18
+ - **Query expansion (#421, PR #422):** `src/retrieval/bm25.js` gains a curated, high-precision synonym/abbreviation table (`auth`↔`authentication`/`login`, `db`↔`database`, `ctx`↔`context`, `config`↔`configuration`, `req`/`res`, `init`, `impl`, …). `expandQuery()` adds synonyms to the query tokens at a **discount weight (0.15)** so an exact-term match always outranks a synonym-only match; documents are unchanged. Wired through the ranker, so `sigmap ask`, `--query`, and MCP `query_context` all benefit. A weight sweep confirmed higher weights regress retrieval, so 0.15 (benchmark-neutral) is the shipped setting.
19
+
20
+ ## [8.4.0] — 2026-07-05
21
+
22
+ Minor release — **PR Evidence Report (v9.0 G3): a branded, deterministic review artifact.** SigMap already had the pieces — `review-pr` findings and `get_diff_context` — but no single Markdown comment an agent or CI could post on a PR. This adds it: one report that answers *"what changed, what it touches, and what to test"*, with no LLM.
23
+
24
+ ### Added
25
+ - **PR Evidence Report (#417, PR #418):** new `src/review/pr-evidence.js` — `buildPrEvidence(changedFiles, cwd)` folds together, per changed file, its extracted **signatures**, **blast radius** (direct/transitive importers, impacted tests + routes), cross-language **related tests**, a **risk label**, and the **`review-pr` findings** (scope drift, god-node edits, missing tests, security-sensitive files). `formatPrEvidenceMarkdown` renders the branded **"🔍 PR Evidence Report"** — with **no wall-clock timestamp**, so it's byte-stable given a fixed tree (diff-friendly as a comment). Exposed via `sigmap review-pr --markdown` (alias `--evidence`); honors `--staged`/`--base`; the exit code reflects the review pass/fail so CI can both post the comment and gate on it. Reuses shipped zero-dep modules only; git stays behind the shell-free `git()` util.
26
+
13
27
  ## [8.3.0] — 2026-07-05
14
28
 
15
29
  Minor release — **Python site-packages grounding: the moat now spans both major ecosystems.** v8.1/v8.2 built local-library grounding for JS/TS (`node_modules` `.d.ts`); this extends it to **Python**, so `verify-ai-output` and the `verify_suggestion` MCP tool ground AI-suggested Python code against the libraries actually installed in the project's venv — with pinned versions (D8). Zero-dependency, no Python runtime, deterministic.
package/README.md CHANGED
@@ -98,7 +98,7 @@ Ask → Rank → Context → Validate → Judge → Learn
98
98
 
99
99
  <!--SM:benchmarkBlock-->
100
100
  ```
101
- Benchmark : sigmap-v8.3-main (21 repositories, including R language)
101
+ Benchmark : sigmap-v8.5-main (21 repositories, including R language)
102
102
  Date : 2026-07-04
103
103
 
104
104
  Hit@5 : 86.7% (baseline 13.6% — 6.4× lift)
package/gen-context.js CHANGED
@@ -13108,7 +13108,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
13108
13108
 
13109
13109
  const SERVER_INFO = {
13110
13110
  name: 'sigmap',
13111
- version: '8.3.0',
13111
+ version: '8.5.0',
13112
13112
  description: 'SigMap MCP server — code signatures on demand',
13113
13113
  };
13114
13114
 
@@ -13923,6 +13923,78 @@ __factories["./src/retrieval/bm25"] = function(module, exports) {
13923
13923
  // are counted PATH_BOOST times when building the document term-frequency map.
13924
13924
  const PATH_BOOST = 3;
13925
13925
 
13926
+ // Curated, high-precision code-domain synonym / abbreviation expansions. A query
13927
+ // for `authentication` should still surface a file whose signatures only say
13928
+ // `auth`. Kept deliberately tight — over-broad synonyms hurt precision. Groups
13929
+ // are expanded bidirectionally (every member maps to the others). Values are
13930
+ // tokenized+stemmed at load, so entries are written in natural form.
13931
+ const EXPANSION_GROUPS = [
13932
+ ['auth', 'authenticate', 'authentication', 'login', 'signin', 'credential'],
13933
+ ['authorize', 'authorization', 'permission', 'access'],
13934
+ ['config', 'configuration', 'settings', 'options'],
13935
+ ['db', 'database'],
13936
+ ['ctx', 'context'],
13937
+ ['req', 'request'],
13938
+ ['res', 'response'],
13939
+ ['err', 'error'],
13940
+ ['msg', 'message'],
13941
+ ['init', 'initialize', 'initialization', 'setup'],
13942
+ ['async', 'asynchronous'],
13943
+ ['sync', 'synchronize', 'synchronous'],
13944
+ ['repo', 'repository'],
13945
+ ['impl', 'implementation'],
13946
+ ['util', 'utility', 'helper'],
13947
+ ['param', 'parameter', 'argument'],
13948
+ ['fn', 'func', 'function'],
13949
+ ['btn', 'button'],
13950
+ ['calc', 'calculate', 'calculation'],
13951
+ ['gen', 'generate', 'generator'],
13952
+ ['val', 'validate', 'validation'],
13953
+ ['del', 'delete', 'remove'],
13954
+ ['dir', 'directory', 'folder'],
13955
+ ['env', 'environment'],
13956
+ ['doc', 'document', 'documentation'],
13957
+ ['id', 'identifier'],
13958
+ ['num', 'number'],
13959
+ ['str', 'string'],
13960
+ ];
13961
+
13962
+ // The weight applied to an expanded (synonym) query term, so an exact match on
13963
+ // the literal query token always outranks a synonym-only match.
13964
+ const EXPANSION_WEIGHT = 0.15;
13965
+
13966
+ // Build a stemmed lookup: stem(member) → Set of the group's other stemmed members.
13967
+ const EXPANSIONS = (() => {
13968
+ const map = new Map();
13969
+ for (const group of EXPANSION_GROUPS) {
13970
+ const stemmed = [...new Set(group.map((w) => tokenize(w).join('')).filter(Boolean))];
13971
+ for (const s of stemmed) {
13972
+ if (!map.has(s)) map.set(s, new Set());
13973
+ for (const other of stemmed) if (other !== s) map.get(s).add(other);
13974
+ }
13975
+ }
13976
+ return map;
13977
+ })();
13978
+
13979
+ /**
13980
+ * Expand stemmed query tokens with curated synonyms. Returns a Map of
13981
+ * token → weight (1 for the original query tokens, EXPANSION_WEIGHT for
13982
+ * synonyms). Original tokens always keep full weight even if also a synonym.
13983
+ *
13984
+ * @param {string[]} qToks stemmed, de-duplicated query tokens
13985
+ * @returns {Map<string, number>}
13986
+ */
13987
+ function expandQuery(qToks) {
13988
+ const weights = new Map();
13989
+ for (const t of qToks) weights.set(t, 1);
13990
+ for (const t of qToks) {
13991
+ const syns = EXPANSIONS.get(t);
13992
+ if (!syns) continue;
13993
+ for (const s of syns) if (!weights.has(s)) weights.set(s, EXPANSION_WEIGHT);
13994
+ }
13995
+ return weights;
13996
+ }
13997
+
13926
13998
  /**
13927
13999
  * BM25 re-rank of candidates against a query. Each candidate is
13928
14000
  * `{ file, sigs }`; the returned objects preserve all original candidate
@@ -13958,23 +14030,24 @@ __factories["./src/retrieval/bm25"] = function(module, exports) {
13958
14030
  }
13959
14031
 
13960
14032
  const qToks = [...new Set(tokenize(query))];
14033
+ const qWeights = expandQuery(qToks); // token → weight (1 exact, <1 synonym)
13961
14034
 
13962
14035
  return docs
13963
14036
  .map((d) => {
13964
14037
  let score = 0;
13965
- for (const t of qToks) {
14038
+ for (const [t, w] of qWeights) {
13966
14039
  const f = d.tf.get(t);
13967
14040
  if (!f) continue;
13968
14041
  const dfT = df.get(t);
13969
14042
  const idf = Math.log(1 + (N - dfT + 0.5) / (dfT + 0.5));
13970
- score += (idf * (f * (k1 + 1))) / (f + k1 * (1 - b + (b * d.len) / avgdl));
14043
+ score += w * ((idf * (f * (k1 + 1))) / (f + k1 * (1 - b + (b * d.len) / avgdl)));
13971
14044
  }
13972
14045
  return Object.assign({}, d.cand, { score });
13973
14046
  })
13974
14047
  .sort((a, c) => c.score - a.score || String(a.file).localeCompare(String(c.file)));
13975
14048
  }
13976
14049
 
13977
- module.exports = { tokenize, stem, bm25rank, PATH_BOOST, STOP };
14050
+ module.exports = { tokenize, stem, bm25rank, PATH_BOOST, STOP, expandQuery, EXPANSIONS, EXPANSION_WEIGHT };
13978
14051
 
13979
14052
  };
13980
14053
 
@@ -14610,6 +14683,149 @@ __factories["./src/retrieval/tokenizer"] = function(module, exports) {
14610
14683
 
14611
14684
  };
14612
14685
 
14686
+ // ── ./src/review/pr-evidence ──
14687
+ __factories["./src/review/pr-evidence"] = function(module, exports) {
14688
+
14689
+ /**
14690
+ * PR Evidence Report (v9.0 G3).
14691
+ *
14692
+ * A single, branded, deterministic Markdown artifact for code review: for each
14693
+ * changed file it folds together the signature context, blast radius (direct /
14694
+ * transitive importers, impacted tests + routes), cross-language related tests,
14695
+ * a risk label, and the `review-pr` findings (scope drift, god-node edits,
14696
+ * missing tests, security-sensitive files). Posted as a PR comment, it answers
14697
+ * "what changed, what it touches, and what to test" — without an LLM.
14698
+ *
14699
+ * Built entirely from shipped zero-dep modules (reviewPr, graph/impact,
14700
+ * evidence/pack, extractors/dispatch). Carries NO wall-clock timestamp, so the
14701
+ * report is byte-stable given a fixed tree — diff-friendly as a comment.
14702
+ */
14703
+
14704
+ const fs = require('fs');
14705
+ const path = require('path');
14706
+ const { reviewPr } = __require('./src/review/review-pr');
14707
+
14708
+ /**
14709
+ * Build the structured PR evidence for a changed-file list.
14710
+ * @param {Array<{path:string,status?:string}>|string[]} changedFiles
14711
+ * @param {string} cwd
14712
+ * @param {object} [opts]
14713
+ * @param {number} [opts.depth=2] blast-radius BFS depth
14714
+ * @param {string} [opts.scope] label for the diff scope (e.g. "vs main")
14715
+ * @returns {{ scope:string, files:object[], review:object }}
14716
+ */
14717
+ function buildPrEvidence(changedFiles, cwd, opts = {}) {
14718
+ const files = (changedFiles || []).map((f) =>
14719
+ typeof f === 'string' ? { path: f, status: 'M' } : { path: f.path, status: f.status || 'M' });
14720
+
14721
+ const review = reviewPr(files, cwd, opts);
14722
+
14723
+ let riskLabelFor = () => 'source';
14724
+ let findRelatedTests = () => [];
14725
+ try { ({ riskLabelFor, findRelatedTests } = __require('./src/evidence/pack')); } catch (_) { /* defaults */ }
14726
+ const { extractFile, langFor } = __require('./src/extractors/dispatch');
14727
+
14728
+ let allFiles = [];
14729
+ try { const { buildSigIndex } = __require('./src/retrieval/ranker'); allFiles = [...buildSigIndex(cwd).keys()]; } catch (_) { /* no index */ }
14730
+
14731
+ const depth = Number.isFinite(opts.depth) ? opts.depth : 2;
14732
+ const srcPaths = files.filter((f) => f.status !== 'D' && langFor(f.path)).map((f) => f.path);
14733
+ let impactByFile = new Map();
14734
+ try {
14735
+ const { analyzeImpact } = __require('./src/graph/impact');
14736
+ impactByFile = new Map(analyzeImpact(srcPaths, cwd, { depth }).map((r) => [r.file, r.impact]));
14737
+ } catch (_) { /* graph optional */ }
14738
+
14739
+ const fileReports = files.map((f) => {
14740
+ const deleted = f.status === 'D';
14741
+ let signatures = [];
14742
+ if (!deleted && langFor(f.path)) {
14743
+ try { signatures = extractFile(f.path, fs.readFileSync(path.resolve(cwd, f.path), 'utf8')); } catch (_) { /* unreadable */ }
14744
+ }
14745
+ const impact = impactByFile.get(f.path) || null;
14746
+ return {
14747
+ path: f.path,
14748
+ status: f.status,
14749
+ riskLabel: riskLabelFor(f.path),
14750
+ signatures,
14751
+ blast: impact ? {
14752
+ total: impact.totalImpact,
14753
+ direct: impact.direct || [],
14754
+ transitive: (impact.transitive || []).length,
14755
+ tests: impact.tests || [],
14756
+ routes: impact.routes || [],
14757
+ } : null,
14758
+ relatedTests: deleted ? [] : findRelatedTests(f.path, allFiles),
14759
+ };
14760
+ });
14761
+
14762
+ return { scope: opts.scope || 'diff', files: fileReports, review };
14763
+ }
14764
+
14765
+ const STATUS_LABEL = { M: 'modified', A: 'added', D: 'deleted', R: 'renamed', C: 'copied' };
14766
+
14767
+ /** Render the branded, deterministic "PR Evidence Report" Markdown. */
14768
+ function formatPrEvidenceMarkdown(evidence, opts = {}) {
14769
+ const L = [];
14770
+ const s = evidence.review.summary;
14771
+ const maxSigs = Number.isFinite(opts.maxSignatures) ? opts.maxSignatures : 30;
14772
+
14773
+ L.push('## 🔍 PR Evidence Report');
14774
+ L.push('');
14775
+ L.push(
14776
+ `**${s.filesChanged} file(s) changed** — ${s.sourceChanged} source, ${s.testsChanged} test · ` +
14777
+ (s.ok ? '✅ no review findings' : `⚠️ ${s.findings} finding(s)`) +
14778
+ ` · scope: ${evidence.scope}`
14779
+ );
14780
+ L.push('');
14781
+
14782
+ if (!s.ok) {
14783
+ L.push('### Review findings');
14784
+ for (const f of evidence.review.findings) {
14785
+ if (f.type === 'missing-tests') L.push(`- ⚠️ **missing tests** — \`${f.file}\` changed with no matching test`);
14786
+ else if (f.type === 'security-file') L.push(`- ⚠️ **security-sensitive file** — \`${f.file}\``);
14787
+ else if (f.type === 'god-node') L.push(`- ⚠️ **god node** — \`${f.file}\` → ${f.count} dependents (high blast radius)`);
14788
+ else if (f.type === 'scope-drift') L.push(`- ⚠️ **scope drift** — ${f.count} top-level dirs touched (${f.dirs.join(', ')})`);
14789
+ }
14790
+ L.push('');
14791
+ }
14792
+
14793
+ L.push('### Changed files');
14794
+ for (const f of evidence.files) {
14795
+ const st = STATUS_LABEL[f.status] || f.status;
14796
+ L.push(`#### \`${f.path}\` _(${st} · risk: ${f.riskLabel})_`);
14797
+ if (f.status === 'D') { L.push('_deleted_', ''); continue; }
14798
+
14799
+ if (f.blast) {
14800
+ L.push(
14801
+ `**Blast radius:** ${f.blast.total} file(s) impacted — ${f.blast.direct.length} direct, ${f.blast.transitive} transitive` +
14802
+ (f.blast.tests.length ? `, ${f.blast.tests.length} test(s)` : '') +
14803
+ (f.blast.routes.length ? `, ${f.blast.routes.length} route(s)` : '')
14804
+ );
14805
+ if (f.blast.tests.length) L.push(`Tests to run: ${f.blast.tests.slice(0, 8).map((t) => '`' + t + '`').join(', ')}`);
14806
+ } else {
14807
+ L.push('**Blast radius:** _(not in dependency graph — new or leaf file)_');
14808
+ }
14809
+ if (f.relatedTests.length) L.push(`Related tests: ${f.relatedTests.slice(0, 8).map((t) => '`' + t + '`').join(', ')}`);
14810
+
14811
+ if (f.signatures.length) {
14812
+ L.push('```');
14813
+ for (const sig of f.signatures.slice(0, maxSigs)) L.push(sig);
14814
+ if (f.signatures.length > maxSigs) L.push(`… +${f.signatures.length - maxSigs} more`);
14815
+ L.push('```');
14816
+ }
14817
+ L.push('');
14818
+ }
14819
+
14820
+ L.push('---');
14821
+ L.push('_Deterministic PR Evidence Report — generated by [SigMap](https://sigmap.io). No LLM; byte-stable given a fixed tree._');
14822
+ return L.join('\n');
14823
+ }
14824
+
14825
+ module.exports = { buildPrEvidence, formatPrEvidenceMarkdown };
14826
+
14827
+ };
14828
+
14613
14829
  // ── ./src/review/review-pr ──
14614
14830
  __factories["./src/review/review-pr"] = function(module, exports) {
14615
14831
 
@@ -17431,7 +17647,7 @@ function __tryGit(args, opts = {}) {
17431
17647
  catch (_) { return ''; }
17432
17648
  }
17433
17649
 
17434
- const VERSION = '8.3.0';
17650
+ const VERSION = '8.5.0';
17435
17651
  const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
17436
17652
 
17437
17653
  function requireSourceOrBundled(key) {
@@ -19232,7 +19448,8 @@ Usage:
19232
19448
  ${cmd} conventions Extract repo file-naming/export/test conventions (--conflicts, --inject, --report, --fix)
19233
19449
  ${cmd} scaffold "<name>" Propose a convention-matched file/dir scaffold (--ext, --threshold, --force, --json)
19234
19450
  ${cmd} verify-plan <plan.md|-> Check a plan vs the live index — files/symbols exist, blast radius, scope (--json)
19235
- ${cmd} review-pr Audit a diff — scope drift, god-node edits, missing tests, security files (--staged, --json)
19451
+ ${cmd} review-pr Audit a diff — scope drift, god-node edits, missing tests, security files (--staged, --base, --json, --markdown)
19452
+ ${cmd} review-pr --markdown PR Evidence Report — branded Markdown (signatures + blast radius + tests) to post as a PR comment
19236
19453
  ${cmd} create "<task>" Grounded-creation pipeline: scaffold → verify-plan → verify-ai-output → review-pr (--staged)
19237
19454
  ${cmd} squeeze <file|-> Minimize a pasted stacktrace/CI-log/JSON blob (--json for stats)
19238
19455
  ${cmd} ask "<query>" --squeeze Auto-accept input minimization (no prompt; for scripts/CI)
@@ -21173,6 +21390,15 @@ function main() {
21173
21390
  return { path: file, status };
21174
21391
  });
21175
21392
 
21393
+ // --markdown / --evidence: emit the branded, deterministic PR Evidence Report.
21394
+ if (args.includes('--markdown') || args.includes('--evidence')) {
21395
+ const { buildPrEvidence, formatPrEvidenceMarkdown } = requireSourceOrBundled('./src/review/pr-evidence');
21396
+ const scope = staged ? 'staged' : (baseArg ? `vs ${baseArg}` : 'branch');
21397
+ const ev = buildPrEvidence(changedFiles, cwd, { scope });
21398
+ process.stdout.write(formatPrEvidenceMarkdown(ev) + '\n');
21399
+ process.exit(ev.review.summary.ok ? 0 : 1);
21400
+ }
21401
+
21176
21402
  const { reviewPr } = requireSourceOrBundled('./src/review/review-pr');
21177
21403
  const result = reviewPr(changedFiles, cwd, {});
21178
21404
 
package/llms-full.txt CHANGED
@@ -11,13 +11,13 @@ ranking keeps the relevant context in scope (cutting tokens ~97% as a side
11
11
  effect), with no LLM calls, embeddings, or vector database. Works with Claude,
12
12
  Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
13
13
 
14
- # Version: 8.3.0 | Benchmark: sigmap-v8.3-main (2026-07-04)
14
+ # Version: 8.5.0 | Benchmark: sigmap-v8.5-main (2026-07-04)
15
15
  # Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
16
16
  # Regenerate: npm run generate:llms | Validate: npm run validate:llms
17
17
 
18
18
  ---
19
19
 
20
- ## Core metrics (benchmark: sigmap-v8.3-main, 2026-07-04)
20
+ ## Core metrics (benchmark: sigmap-v8.5-main, 2026-07-04)
21
21
 
22
22
  | Metric | Without SigMap | With SigMap |
23
23
  |--------|----------------|-------------|
@@ -105,7 +105,8 @@ sigmap verify-ai-output <answer.md> --report Write a standalone HTML report (re
105
105
  sigmap conventions Extract repo file-naming/export/test conventions (--conflicts, --inject, --report, --fix)
106
106
  sigmap scaffold "<name>" Propose a convention-matched file/dir scaffold (--ext, --threshold, --force, --json)
107
107
  sigmap verify-plan <plan.md|-> Check a plan vs the live index — files/symbols exist, blast radius, scope (--json)
108
- sigmap review-pr Audit a diff — scope drift, god-node edits, missing tests, security files (--staged, --json)
108
+ sigmap review-pr Audit a diff — scope drift, god-node edits, missing tests, security files (--staged, --base, --json, --markdown)
109
+ sigmap review-pr --markdown PR Evidence Report — branded Markdown (signatures + blast radius + tests) to post as a PR comment
109
110
  sigmap create "<task>" Grounded-creation pipeline: scaffold → verify-plan → verify-ai-output → review-pr (--staged)
110
111
  sigmap squeeze <file|-> Minimize a pasted stacktrace/CI-log/JSON blob (--json for stats)
111
112
  sigmap ask "<query>" --squeeze Auto-accept input minimization (no prompt; for scripts/CI)
package/llms.txt CHANGED
@@ -11,7 +11,7 @@ ranking keeps the relevant context in scope (cutting tokens ~97% as a side
11
11
  effect), with no LLM calls, embeddings, or vector database. Works with Claude,
12
12
  Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
13
13
 
14
- # Version: 8.3.0 | Benchmark: sigmap-v8.3-main (2026-07-04)
14
+ # Version: 8.5.0 | Benchmark: sigmap-v8.5-main (2026-07-04)
15
15
  # Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
16
16
  # Regenerate: npm run generate:llms | Validate: npm run validate:llms
17
17
 
@@ -23,7 +23,7 @@ Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
23
23
  - No blast-radius awareness before editing a hub file — `--impact` shows every file a change touches.
24
24
  - Pasted stack traces, CI logs, and JSON bloat the prompt — `squeeze` minimizes them and enriches the top frame from the symbol index.
25
25
 
26
- ## Core metrics (benchmark: sigmap-v8.3-main, 2026-07-04)
26
+ ## Core metrics (benchmark: sigmap-v8.5-main, 2026-07-04)
27
27
 
28
28
  - hit@5 retrieval: 86.7% vs 13.6% random baseline (6.4× lift)
29
29
  - Token reduction: 97.0% average across benchmark repos
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap",
3
- "version": "8.3.0",
3
+ "version": "8.5.0",
4
4
  "description": "97% token reduction for AI coding. Extracts function & class signatures with TF-IDF ranking to feed only the right files to Claude, Cursor, Copilot, Aider, Windsurf, local LLMs & MCP. Zero dependencies, runs offline via npx.",
5
5
  "main": "packages/core/index.js",
6
6
  "exports": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap-cli",
3
- "version": "8.3.0",
3
+ "version": "8.5.0",
4
4
  "description": "SigMap CLI wrapper — thin adapter for programmatic CLI invocation",
5
5
  "main": "index.js",
6
6
  "keywords": [
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap-core",
3
- "version": "8.3.0",
3
+ "version": "8.5.0",
4
4
  "description": "SigMap core library — zero-dependency code signature extraction, retrieval, and security scanning",
5
5
  "main": "index.js",
6
6
  "keywords": [
package/src/mcp/server.js CHANGED
@@ -18,7 +18,7 @@ const { readContext, searchSignatures, getMap, createCheckpoint, getRouting, exp
18
18
 
19
19
  const SERVER_INFO = {
20
20
  name: 'sigmap',
21
- version: '8.3.0',
21
+ version: '8.5.0',
22
22
  description: 'SigMap MCP server — code signatures on demand',
23
23
  };
24
24
 
@@ -68,6 +68,78 @@ function tokenize(text) {
68
68
  // are counted PATH_BOOST times when building the document term-frequency map.
69
69
  const PATH_BOOST = 3;
70
70
 
71
+ // Curated, high-precision code-domain synonym / abbreviation expansions. A query
72
+ // for `authentication` should still surface a file whose signatures only say
73
+ // `auth`. Kept deliberately tight — over-broad synonyms hurt precision. Groups
74
+ // are expanded bidirectionally (every member maps to the others). Values are
75
+ // tokenized+stemmed at load, so entries are written in natural form.
76
+ const EXPANSION_GROUPS = [
77
+ ['auth', 'authenticate', 'authentication', 'login', 'signin', 'credential'],
78
+ ['authorize', 'authorization', 'permission', 'access'],
79
+ ['config', 'configuration', 'settings', 'options'],
80
+ ['db', 'database'],
81
+ ['ctx', 'context'],
82
+ ['req', 'request'],
83
+ ['res', 'response'],
84
+ ['err', 'error'],
85
+ ['msg', 'message'],
86
+ ['init', 'initialize', 'initialization', 'setup'],
87
+ ['async', 'asynchronous'],
88
+ ['sync', 'synchronize', 'synchronous'],
89
+ ['repo', 'repository'],
90
+ ['impl', 'implementation'],
91
+ ['util', 'utility', 'helper'],
92
+ ['param', 'parameter', 'argument'],
93
+ ['fn', 'func', 'function'],
94
+ ['btn', 'button'],
95
+ ['calc', 'calculate', 'calculation'],
96
+ ['gen', 'generate', 'generator'],
97
+ ['val', 'validate', 'validation'],
98
+ ['del', 'delete', 'remove'],
99
+ ['dir', 'directory', 'folder'],
100
+ ['env', 'environment'],
101
+ ['doc', 'document', 'documentation'],
102
+ ['id', 'identifier'],
103
+ ['num', 'number'],
104
+ ['str', 'string'],
105
+ ];
106
+
107
+ // The weight applied to an expanded (synonym) query term, so an exact match on
108
+ // the literal query token always outranks a synonym-only match.
109
+ const EXPANSION_WEIGHT = 0.15;
110
+
111
+ // Build a stemmed lookup: stem(member) → Set of the group's other stemmed members.
112
+ const EXPANSIONS = (() => {
113
+ const map = new Map();
114
+ for (const group of EXPANSION_GROUPS) {
115
+ const stemmed = [...new Set(group.map((w) => tokenize(w).join('')).filter(Boolean))];
116
+ for (const s of stemmed) {
117
+ if (!map.has(s)) map.set(s, new Set());
118
+ for (const other of stemmed) if (other !== s) map.get(s).add(other);
119
+ }
120
+ }
121
+ return map;
122
+ })();
123
+
124
+ /**
125
+ * Expand stemmed query tokens with curated synonyms. Returns a Map of
126
+ * token → weight (1 for the original query tokens, EXPANSION_WEIGHT for
127
+ * synonyms). Original tokens always keep full weight even if also a synonym.
128
+ *
129
+ * @param {string[]} qToks stemmed, de-duplicated query tokens
130
+ * @returns {Map<string, number>}
131
+ */
132
+ function expandQuery(qToks) {
133
+ const weights = new Map();
134
+ for (const t of qToks) weights.set(t, 1);
135
+ for (const t of qToks) {
136
+ const syns = EXPANSIONS.get(t);
137
+ if (!syns) continue;
138
+ for (const s of syns) if (!weights.has(s)) weights.set(s, EXPANSION_WEIGHT);
139
+ }
140
+ return weights;
141
+ }
142
+
71
143
  /**
72
144
  * BM25 re-rank of candidates against a query. Each candidate is
73
145
  * `{ file, sigs }`; the returned objects preserve all original candidate
@@ -103,20 +175,21 @@ function bm25rank(query, candidates) {
103
175
  }
104
176
 
105
177
  const qToks = [...new Set(tokenize(query))];
178
+ const qWeights = expandQuery(qToks); // token → weight (1 exact, <1 synonym)
106
179
 
107
180
  return docs
108
181
  .map((d) => {
109
182
  let score = 0;
110
- for (const t of qToks) {
183
+ for (const [t, w] of qWeights) {
111
184
  const f = d.tf.get(t);
112
185
  if (!f) continue;
113
186
  const dfT = df.get(t);
114
187
  const idf = Math.log(1 + (N - dfT + 0.5) / (dfT + 0.5));
115
- score += (idf * (f * (k1 + 1))) / (f + k1 * (1 - b + (b * d.len) / avgdl));
188
+ score += w * ((idf * (f * (k1 + 1))) / (f + k1 * (1 - b + (b * d.len) / avgdl)));
116
189
  }
117
190
  return Object.assign({}, d.cand, { score });
118
191
  })
119
192
  .sort((a, c) => c.score - a.score || String(a.file).localeCompare(String(c.file)));
120
193
  }
121
194
 
122
- module.exports = { tokenize, stem, bm25rank, PATH_BOOST, STOP };
195
+ module.exports = { tokenize, stem, bm25rank, PATH_BOOST, STOP, expandQuery, EXPANSIONS, EXPANSION_WEIGHT };
@@ -0,0 +1,139 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * PR Evidence Report (v9.0 G3).
5
+ *
6
+ * A single, branded, deterministic Markdown artifact for code review: for each
7
+ * changed file it folds together the signature context, blast radius (direct /
8
+ * transitive importers, impacted tests + routes), cross-language related tests,
9
+ * a risk label, and the `review-pr` findings (scope drift, god-node edits,
10
+ * missing tests, security-sensitive files). Posted as a PR comment, it answers
11
+ * "what changed, what it touches, and what to test" — without an LLM.
12
+ *
13
+ * Built entirely from shipped zero-dep modules (reviewPr, graph/impact,
14
+ * evidence/pack, extractors/dispatch). Carries NO wall-clock timestamp, so the
15
+ * report is byte-stable given a fixed tree — diff-friendly as a comment.
16
+ */
17
+
18
+ const fs = require('fs');
19
+ const path = require('path');
20
+ const { reviewPr } = require('./review-pr');
21
+
22
+ /**
23
+ * Build the structured PR evidence for a changed-file list.
24
+ * @param {Array<{path:string,status?:string}>|string[]} changedFiles
25
+ * @param {string} cwd
26
+ * @param {object} [opts]
27
+ * @param {number} [opts.depth=2] blast-radius BFS depth
28
+ * @param {string} [opts.scope] label for the diff scope (e.g. "vs main")
29
+ * @returns {{ scope:string, files:object[], review:object }}
30
+ */
31
+ function buildPrEvidence(changedFiles, cwd, opts = {}) {
32
+ const files = (changedFiles || []).map((f) =>
33
+ typeof f === 'string' ? { path: f, status: 'M' } : { path: f.path, status: f.status || 'M' });
34
+
35
+ const review = reviewPr(files, cwd, opts);
36
+
37
+ let riskLabelFor = () => 'source';
38
+ let findRelatedTests = () => [];
39
+ try { ({ riskLabelFor, findRelatedTests } = require('../evidence/pack')); } catch (_) { /* defaults */ }
40
+ const { extractFile, langFor } = require('../extractors/dispatch');
41
+
42
+ let allFiles = [];
43
+ try { const { buildSigIndex } = require('../retrieval/ranker'); allFiles = [...buildSigIndex(cwd).keys()]; } catch (_) { /* no index */ }
44
+
45
+ const depth = Number.isFinite(opts.depth) ? opts.depth : 2;
46
+ const srcPaths = files.filter((f) => f.status !== 'D' && langFor(f.path)).map((f) => f.path);
47
+ let impactByFile = new Map();
48
+ try {
49
+ const { analyzeImpact } = require('../graph/impact');
50
+ impactByFile = new Map(analyzeImpact(srcPaths, cwd, { depth }).map((r) => [r.file, r.impact]));
51
+ } catch (_) { /* graph optional */ }
52
+
53
+ const fileReports = files.map((f) => {
54
+ const deleted = f.status === 'D';
55
+ let signatures = [];
56
+ if (!deleted && langFor(f.path)) {
57
+ try { signatures = extractFile(f.path, fs.readFileSync(path.resolve(cwd, f.path), 'utf8')); } catch (_) { /* unreadable */ }
58
+ }
59
+ const impact = impactByFile.get(f.path) || null;
60
+ return {
61
+ path: f.path,
62
+ status: f.status,
63
+ riskLabel: riskLabelFor(f.path),
64
+ signatures,
65
+ blast: impact ? {
66
+ total: impact.totalImpact,
67
+ direct: impact.direct || [],
68
+ transitive: (impact.transitive || []).length,
69
+ tests: impact.tests || [],
70
+ routes: impact.routes || [],
71
+ } : null,
72
+ relatedTests: deleted ? [] : findRelatedTests(f.path, allFiles),
73
+ };
74
+ });
75
+
76
+ return { scope: opts.scope || 'diff', files: fileReports, review };
77
+ }
78
+
79
+ const STATUS_LABEL = { M: 'modified', A: 'added', D: 'deleted', R: 'renamed', C: 'copied' };
80
+
81
+ /** Render the branded, deterministic "PR Evidence Report" Markdown. */
82
+ function formatPrEvidenceMarkdown(evidence, opts = {}) {
83
+ const L = [];
84
+ const s = evidence.review.summary;
85
+ const maxSigs = Number.isFinite(opts.maxSignatures) ? opts.maxSignatures : 30;
86
+
87
+ L.push('## 🔍 PR Evidence Report');
88
+ L.push('');
89
+ L.push(
90
+ `**${s.filesChanged} file(s) changed** — ${s.sourceChanged} source, ${s.testsChanged} test · ` +
91
+ (s.ok ? '✅ no review findings' : `⚠️ ${s.findings} finding(s)`) +
92
+ ` · scope: ${evidence.scope}`
93
+ );
94
+ L.push('');
95
+
96
+ if (!s.ok) {
97
+ L.push('### Review findings');
98
+ for (const f of evidence.review.findings) {
99
+ if (f.type === 'missing-tests') L.push(`- ⚠️ **missing tests** — \`${f.file}\` changed with no matching test`);
100
+ else if (f.type === 'security-file') L.push(`- ⚠️ **security-sensitive file** — \`${f.file}\``);
101
+ else if (f.type === 'god-node') L.push(`- ⚠️ **god node** — \`${f.file}\` → ${f.count} dependents (high blast radius)`);
102
+ else if (f.type === 'scope-drift') L.push(`- ⚠️ **scope drift** — ${f.count} top-level dirs touched (${f.dirs.join(', ')})`);
103
+ }
104
+ L.push('');
105
+ }
106
+
107
+ L.push('### Changed files');
108
+ for (const f of evidence.files) {
109
+ const st = STATUS_LABEL[f.status] || f.status;
110
+ L.push(`#### \`${f.path}\` _(${st} · risk: ${f.riskLabel})_`);
111
+ if (f.status === 'D') { L.push('_deleted_', ''); continue; }
112
+
113
+ if (f.blast) {
114
+ L.push(
115
+ `**Blast radius:** ${f.blast.total} file(s) impacted — ${f.blast.direct.length} direct, ${f.blast.transitive} transitive` +
116
+ (f.blast.tests.length ? `, ${f.blast.tests.length} test(s)` : '') +
117
+ (f.blast.routes.length ? `, ${f.blast.routes.length} route(s)` : '')
118
+ );
119
+ if (f.blast.tests.length) L.push(`Tests to run: ${f.blast.tests.slice(0, 8).map((t) => '`' + t + '`').join(', ')}`);
120
+ } else {
121
+ L.push('**Blast radius:** _(not in dependency graph — new or leaf file)_');
122
+ }
123
+ if (f.relatedTests.length) L.push(`Related tests: ${f.relatedTests.slice(0, 8).map((t) => '`' + t + '`').join(', ')}`);
124
+
125
+ if (f.signatures.length) {
126
+ L.push('```');
127
+ for (const sig of f.signatures.slice(0, maxSigs)) L.push(sig);
128
+ if (f.signatures.length > maxSigs) L.push(`… +${f.signatures.length - maxSigs} more`);
129
+ L.push('```');
130
+ }
131
+ L.push('');
132
+ }
133
+
134
+ L.push('---');
135
+ L.push('_Deterministic PR Evidence Report — generated by [SigMap](https://sigmap.io). No LLM; byte-stable given a fixed tree._');
136
+ return L.join('\n');
137
+ }
138
+
139
+ module.exports = { buildPrEvidence, formatPrEvidenceMarkdown };