npm - sigmap - Versions diffs - 5.2.0 → 5.3.0 - Mend

sigmap 5.2.0 → 5.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/AGENTS.md +68 -65
package/CHANGELOG.md +14 -0
package/README.md +36 -19
package/gen-context.js +32 -6
package/package.json +1 -1
package/packages/cli/package.json +1 -1
package/packages/core/package.json +1 -1
package/src/format/benchmark-report.js +443 -0
package/src/mcp/server.js +1 -1

package/AGENTS.md CHANGED Viewed

@@ -12,7 +12,7 @@ Use this marker block for all appendable context files:
 ## Auto-generated signatures
 <!-- Updated by gen-context.js -->
 You are a coding assistant with full knowledge of this codebase.
-Below are the code signatures extracted by SigMap v5.2.0 on 2026-04-16T22:22:03.099Z.
+Below are the code signatures extracted by SigMap v5.2.0 on 2026-04-16T23:13:56.540Z.
 Use these signatures to answer questions about the code accurately.
@@ -23,16 +23,51 @@ Use these signatures to answer questions about the code accurately.
 # Code signatures
-## changes (last 5 commits — 41 minutes ago)
+## changes (last 5 commits — 46 minutes ago)
 ```
 src/config/loader.js                          +loadBaseConfig  ~loadConfig  ~deepClone
 src/format/dashboard.js                       ~computeExtractorCoverage  ~readBenchmarkTrend
-src/judge/judge-engine.js                     +tokenize  +groundedness  +judge
-src/retrieval/ranker.js                       +detectIntent  ~formatRankJSON
+src/judge/judge-engine.js                     +tokenize  +groundedness  +extractContextFiles  +judge
+src/learning/weights.js                       +weightsPath  +clampMultiplier  +normalizeFile  +sanitizeWeights
+src/mcp/handlers.js                           ~queryContext  ~getImpact
+src/retrieval/ranker.js                       ~scoreFile  ~rank
+packages/core/index.js                        ~extract
 ```
 ## packages
+### packages/core/README.md
+```
+h1 sigmap-core
+h2 Installation
+h2 Quick start
+h2 API reference
+h3 `extract(src, language)` → `string[]`
+h3 `rank(query, sigIndex, opts?)` → `Result[]`
+h3 `buildSigIndex(cwd)` → `Map<string, string[]>`
+h3 `scan(sigs, filePath)` → `{ safe: string[], redacted: boolean }`
+h3 `score(cwd)` → `HealthResult`
+h2 Migration from v2.3 and earlier
+h2 v3.0 — Multi-Adapter Architecture (released)
+h2 Zero dependencies
+code-fence bash
+code-fence plain
+code-fence js
+code-fence ---
+```
+### packages/core/index.js
+```
+module.exports = { extract, rank, buildSigIndex, scan, score, adapt }
+function _resolveExtractor(language)
+function extract(src, language) → string[]
+function rank(query, sigIndex, opts) → { file: string, score: nu
+function buildSigIndex(cwd) → Map<string, string[]>
+function scan(sigs, filePath) → { safe: string[], redacte
+function score(cwd) → { * score: number, * grad
+function adapt(context, adapterName, opts = {}) → string
+```
 ### packages/adapters/claude.js
 ```
 module.exports = { name, format, outputPath, write }
@@ -115,38 +150,6 @@ module.exports = { CLI_ENTRY, run }
 function run(argv, cwd) → void
 ```
-### packages/core/README.md
-```
-h1 sigmap-core
-h2 Installation
-h2 Quick start
-h2 API reference
-h3 `extract(src, language)` → `string[]`
-h3 `rank(query, sigIndex, opts?)` → `Result[]`
-h3 `buildSigIndex(cwd)` → `Map<string, string[]>`
-h3 `scan(sigs, filePath)` → `{ safe: string[], redacted: boolean }`
-h3 `score(cwd)` → `HealthResult`
-h2 Migration from v2.3 and earlier
-h2 v3.0 — Multi-Adapter Architecture (released)
-h2 Zero dependencies
-code-fence bash
-code-fence plain
-code-fence js
-code-fence ---
-```
-### packages/core/index.js
-```
-module.exports = { extract, rank, buildSigIndex, scan, score, adapt }
-function _resolveExtractor(language)
-function extract(src, language) → string[]
-function rank(query, sigIndex, opts) → { file: string, score: nu
-function buildSigIndex(cwd) → Map<string, string[]>
-function scan(sigs, filePath) → { safe: string[], redacte
-function score(cwd) → { * score: number, * grad
-function adapt(context, adapterName, opts = {}) → string
-```
 ## src
 ### src/config/loader.js
@@ -187,6 +190,35 @@ function extractContextFiles(context, cwd)
 function judge(response, context, opts = {})
 ```
+### src/learning/weights.js
+```
+module.exports = { BASELINE, DECAY, MAX_MULT, MIN_MULT, weightsPath, clampMultiplier, normalizeFile, loadWeights, saveWeights, updateWeights, boostFiles, penalizeFiles, resetWeights }
+function weightsPath(cwd)
+function clampMultiplier(value)
+function normalizeFile(cwd, filePath)
+function sanitizeWeights(cwd, weights)
+function loadWeights(cwd)
+function saveWeights(cwd, weights)
+function updateWeights(cwd, opts = {})
+function boostFiles(cwd, files, amount = 0.15)
+function penalizeFiles(cwd, files, amount = 0.10)
+function resetWeights(cwd)
+```
+### src/mcp/handlers.js
+```
+module.exports = { readContext, searchSignatures, getMap, createCheckpoint, getRouting, explainFile, listModules, queryContext, getImpact }
+function readContext(args, cwd)
+function searchSignatures(args, cwd)
+function getMap(args, cwd)
+function createCheckpoint(args, cwd)
+function getRouting(args, cwd)
+function explainFile(args, cwd)
+function listModules(args, cwd)
+function queryContext(args, cwd)
+function getImpact(args, cwd)
+```
 ### src/mcp/server.js
 ```
 module.exports = { start }
@@ -604,21 +636,6 @@ module.exports = { score }
 function score(cwd) → { * score: number, * grad
 ```
-### src/learning/weights.js
-```
-module.exports = { BASELINE, DECAY, MAX_MULT, MIN_MULT, weightsPath, clampMultiplier, normalizeFile, loadWeights, saveWeights, updateWeights, boostFiles, penalizeFiles, resetWeights }
-function weightsPath(cwd)
-function clampMultiplier(value)
-function normalizeFile(cwd, filePath)
-function sanitizeWeights(cwd, weights)
-function loadWeights(cwd)
-function saveWeights(cwd, weights)
-function updateWeights(cwd, opts = {})
-function boostFiles(cwd, files, amount = 0.15)
-function penalizeFiles(cwd, files, amount = 0.10)
-function resetWeights(cwd)
-```
 ### src/map/class-hierarchy.js
 ```
 module.exports = { analyze }
@@ -641,20 +658,6 @@ function shouldSkipFile(rel)
 function analyze(files, cwd)
 ```
-### src/mcp/handlers.js
-```
-module.exports = { readContext, searchSignatures, getMap, createCheckpoint, getRouting, explainFile, listModules, queryContext, getImpact }
-function readContext(args, cwd)
-function searchSignatures(args, cwd)
-function getMap(args, cwd)
-function createCheckpoint(args, cwd)
-function getRouting(args, cwd)
-function explainFile(args, cwd)
-function listModules(args, cwd)
-function queryContext(args, cwd)
-function getImpact(args, cwd)
-```
 ### src/mcp/tools.js
 ```
 module.exports = { TOOLS }

package/CHANGELOG.md CHANGED Viewed

@@ -10,6 +10,20 @@ Format: [Semantic Versioning](https://semver.org/)
 ---
+## [5.3.0] — 2026-04-17
+### Added
+- **MCP auto-wire: Windsurf** — `sigmap --setup` now registers the MCP server in `.windsurf/mcp.json` (project-level) and `~/.codeium/windsurf/mcp_config.json` (global) using the standard `mcpServers` shape.
+- **MCP auto-wire: Zed** — `sigmap --setup` now registers a context server in `~/.config/zed/settings.json` using Zed's `context_servers` shape (`command.path` / `command.args`).
+- **Updated `--setup` snippet** — help output now prints manual config snippets for all four tools: Claude, Cursor, Windsurf, and Zed.
+### Changed
+- `registerMcp()` skips each target when the file does not exist and never overwrites an already-registered `sigmap` entry (idempotent).
+---
 ## [5.2.0] — 2026-04-17
 ### Added

package/README.md CHANGED Viewed

@@ -12,7 +12,7 @@
 </div>
 <div align="center">
-<img src="docs/impact-banner.svg" alt="SigMap — 6× better answers, 97% fewer tokens, 2× fewer prompts" width="760" />
+<img src="docs/impact-banner.svg" alt="SigMap — grounded AI coding context with fewer prompts and smaller context windows" width="760" />
 </div>
 ```sh
@@ -21,11 +21,28 @@ npx sigmap   # 10 seconds. zero config. your AI never reads the wrong file again
 **What you get in ~10 seconds**
 - A compact signature map of your codebase
-- The right file in context far more often (84.4% hit@5 vs 13.6% random)
-- Fewer retries (1.59 vs 2.84 prompts per task)
+- The right file in context far more often (78.9% hit@5 vs 13.6% random)
+- Fewer retries (1.69 vs 2.84 prompts per task)
 - Far smaller context (~2K–4K tokens instead of ~80K)
-> Latest: **v4.1.0** — Smart Budget. Token budget now auto-scales to your repo size, targeting 80% source-file coverage by default. No config change needed — it just works.
+> Latest: **v5.3.0** — Learning engine + workflow-first release. Use `ask`, `validate`, `judge`, `learn`, `weights`, `compare`, and `share` on top of the core signature pipeline.
+**What is new in v5.2**
+- `sigmap ask` creates task-focused context in one step
+- `sigmap validate` checks config health and query coverage
+- `sigmap judge` scores groundedness against the supplied context
+- `sigmap learn` and `sigmap weights` add safe local-only ranking feedback
+- `node scripts/run-benchmark-matrix.mjs --save --skip-clone` now writes an HTML benchmark dashboard
+**Daily workflow**
+```bash
+npx sigmap
+sigmap ask "explain the auth flow"
+sigmap validate --query "auth login token"
+sigmap judge --response response.txt --context .context/query-context.md
+sigmap weights
+```
 <div align="center">
 <img src="demo.gif" alt="SigMap demo — reducing 80K tokens to 4K in under 10 seconds" width="760" />
@@ -61,11 +78,11 @@ npx sigmap   # 10 seconds. zero config. your AI never reads the wrong file again
 | | Without SigMap | With SigMap |
 |---|:---:|:---:|
-| Task success | 10% | **59%** |
-| Prompts per task | 2.84 | **1.59** |
+| Task success | 10% | **52.2%** |
+| Prompts per task | 2.84 | **1.69** |
 | Tokens per session | ~80,000 | **~2,000** |
-| Right file found | 13.6% | **84.4%** |
-| Hallucination risk | 92% | **0%** |
+| Right file found | 13.6% | **78.9%** |
+| Hidden-symbol risk | 74.7% | **context surfaced locally** |
 Measured on 90 coding tasks across 18 real public repos. Full methodology and raw benchmark pages are linked below.
@@ -82,7 +99,7 @@ Measured on 90 coding tasks across 18 real public repos. Full methodology and ra
 | [Standalone binaries](docs/readmes/binaries.md) | macOS, Linux, Windows — no Node required |
 | [VS Code extension](#-vs-code-extension) | Status bar, stale alerts, commands |
 | [JetBrains plugin](#-jetbrains-plugin) | IntelliJ IDEA, WebStorm, PyCharm support |
-| [Languages supported](#-languages-supported) | 25 languages |
+| [Languages supported](#-languages-supported) | 29 languages |
 | [Context strategies](#-context-strategies) | full / per-module / hot-cold |
 | [MCP server](#-mcp-server) | 8 on-demand tools |
 | [CLI reference](#-cli-reference) | All flags |
@@ -105,7 +122,7 @@ SigMap scans your source files and extracts only the **function and class signat
 Your codebase
     │
     ▼
-sigmap ─────────► extracts signatures from 25 languages
+sigmap ─────────► extracts signatures from 29 languages
     │
     ▼
 .github/copilot-instructions.md   ◄── auto-read by Copilot / Claude / Cursor
@@ -126,7 +143,7 @@ AI agent session starts with full context
 | **SigMap signatures** | **~4,000** | **95%** |
 | SigMap + MCP (`hot-cold`) | ~200 | **99.75%** |
-> **97% fewer tokens. The same codebase understanding.**
+> **98.1% fewer tokens in the latest saved benchmark snapshot.**
 ### Benchmark: real-world repos
@@ -153,7 +170,7 @@ Reproduced with `node scripts/run-benchmark.mjs` on public repos:
 | fastify | JavaScript | 54.4K | 2.6K | **95.3%** |
 | fastapi | Python | 178.4K | 5.2K | **97.1%** |
-**Average: 97.6% reduction across 18 repos (16 languages).** See [`benchmarks/reports/token-reduction.md`](benchmarks/reports/token-reduction.md) or reproduce with `node scripts/run-benchmark.mjs`.
+**Average: 97.6% reduction across 18 repos (16 languages).** See [`benchmarks/reports/token-reduction.md`](benchmarks/reports/token-reduction.md), open `benchmarks/reports/benchmark-report.html` after a matrix run, or reproduce with `node scripts/run-benchmark.mjs`.
 ---
@@ -503,12 +520,12 @@ Compatible with **IntelliJ IDEA 2024.1+** (Community & Ultimate), **WebStorm**,
 ## 🌐 Languages supported
-> 25 languages. All implemented with zero external dependencies — pure regex + Node built-ins.
+> 29 languages and formats. All implemented with zero external dependencies — pure regex + Node built-ins.
 >
 > Also includes lightweight config/doc extraction for `.toml`, `.properties`, `.xml`, and `.md` to improve real-repo coverage beyond source-code files.
 <details>
-<summary><strong>Show all 25 languages</strong></summary>
+<summary><strong>Show all 29 languages</strong></summary>
 | Language | Extensions | Extracts |
 |---|---|---|
@@ -737,7 +754,7 @@ Copy `gen-context.config.json.example` to `gen-context.config.json`:
 - **`secretScan`** — redact secrets (AWS keys, tokens, etc.) from output
 - **`strategy`** — output mode: `full` (default) | `per-module` | `hot-cold`
-**Token budget (v4.1.0 — auto-scaling):**
+**Token budget (auto-scaling):**
 | Key | Default | Description |
 |---|---|---|
@@ -788,13 +805,13 @@ If `output` is omitted, the default `.github/copilot-instructions.md` is used.
 ## 📊 Observability
-### Coverage score (v4.0)
+### Coverage score
 Every run now prints a coverage line alongside token reduction:
 ```
 ───────────────────────────────────────────
- SigMap v4.1.0
+ SigMap v5.3.0
  Files scanned  : 76
  Symbols found  : 332
  Token reduction: 94%  (65,227 → 4,103)
@@ -813,7 +830,7 @@ sigmap --report
 ```
 [sigmap] report:
-  version         : 4.1.0
+  version         : 5.3.0
   files processed : 76
   reduction       : 93.7%
   coverage        : A (97%)  — 76 of 78 source files included
@@ -857,7 +874,7 @@ sigmap --health --json
 Every output file now carries a metadata line so you can inspect freshness at a glance:
 ```
-<!-- sigmap: version=4.0.0 confidence=HIGH coverage=97% dropped=2 commit=8540612 -->
+<!-- sigmap: version=5.3.0 confidence=HIGH coverage=97% dropped=2 commit=8540612 -->
 ```
 ### Diff risk score

package/gen-context.js CHANGED Viewed

@@ -4853,7 +4853,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
   const SERVER_INFO = {
     name: 'sigmap',
-  version: '5.2.0',
+  version: '5.3.0',
     description: 'SigMap MCP server — code signatures on demand',
   };
@@ -6571,7 +6571,7 @@ const path = require('path');
 const os = require('os');
 const { execSync } = require('child_process');
-const VERSION = '5.2.0';
+const VERSION = '5.3.0';
 const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
 function requireSourceOrBundled(key) {
@@ -8241,9 +8241,13 @@ function registerMcp(cwd, scriptPath) {
     args: [path.resolve(scriptPath), '--mcp'],
   };
+  // mcpServers shape: Claude (.claude/settings.json), Cursor (.cursor/mcp.json),
+  // Windsurf project (.windsurf/mcp.json) and global (~/.codeium/windsurf/mcp_config.json)
   const targets = [
     path.join(cwd, '.claude', 'settings.json'),
     path.join(cwd, '.cursor', 'mcp.json'),
+    path.join(cwd, '.windsurf', 'mcp.json'),
+    path.join(os.homedir(), '.codeium', 'windsurf', 'mcp_config.json'),
   ];
   for (const settingsPath of targets) {
@@ -8255,15 +8259,37 @@ function registerMcp(cwd, scriptPath) {
       if (settings.mcpServers['sigmap']) continue; // already registered
       settings.mcpServers['sigmap'] = serverEntry;
       fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2) + '\n');
-      console.warn(`[sigmap] registered MCP server in ${path.relative(cwd, settingsPath)}`);
+      console.warn(`[sigmap] registered MCP server in ${settingsPath.startsWith(os.homedir()) ? '~' + settingsPath.slice(os.homedir().length) : path.relative(cwd, settingsPath)}`);
     } catch (err) {
       console.warn(`[sigmap] could not update ${path.relative(cwd, settingsPath)}: ${err.message}`);
     }
   }
-  // Always print the manual snippet so users can configure other tools
-  console.warn('[sigmap] MCP server config snippet:');
-  console.warn(JSON.stringify({ mcpServers: { 'sigmap': serverEntry } }, null, 2));
+  // Zed uses context_servers (different shape from mcpServers)
+  const zedSettingsPath = path.join(os.homedir(), '.config', 'zed', 'settings.json');
+  if (fs.existsSync(zedSettingsPath)) {
+    try {
+      const raw      = fs.readFileSync(zedSettingsPath, 'utf8');
+      const settings = JSON.parse(raw);
+      if (!settings.context_servers) settings.context_servers = {};
+      if (!settings.context_servers['sigmap']) {
+        settings.context_servers['sigmap'] = {
+          command: { path: 'node', args: [path.resolve(scriptPath), '--mcp'] },
+        };
+        fs.writeFileSync(zedSettingsPath, JSON.stringify(settings, null, 2) + '\n');
+        console.warn('[sigmap] registered context server in ~/.config/zed/settings.json');
+      }
+    } catch (err) {
+      console.warn(`[sigmap] could not update ~/.config/zed/settings.json: ${err.message}`);
+    }
+  }
+  // Print manual snippets for all 4 tools
+  console.warn('[sigmap] MCP / context server config snippets:');
+  console.warn('  Claude / Cursor / Windsurf (.claude/settings.json | .cursor/mcp.json | .windsurf/mcp.json):');
+  console.warn(JSON.stringify({ mcpServers: { sigmap: serverEntry } }, null, 2));
+  console.warn('  Zed (~/.config/zed/settings.json):');
+  console.warn(JSON.stringify({ context_servers: { sigmap: { command: { path: 'node', args: [path.resolve(scriptPath), '--mcp'] } } } }, null, 2));
 }
 // ---------------------------------------------------------------------------

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap",
-  "version": "5.2.0",
+  "version": "5.3.0",
   "description": "Zero-dependency AI context engine — 97% token reduction. No npm install. Runs on Node 18+.",
   "main": "gen-context.js",
   "exports": {

package/packages/cli/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap-cli",
-  "version": "5.2.0",
+  "version": "5.3.0",
   "description": "SigMap CLI wrapper — thin adapter for programmatic CLI invocation",
   "main": "index.js",
   "keywords": [

package/packages/core/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap-core",
-  "version": "5.2.0",
+  "version": "5.3.0",
   "description": "SigMap core library — zero-dependency code signature extraction, retrieval, and security scanning",
   "main": "index.js",
   "keywords": [

package/src/format/benchmark-report.js ADDED Viewed

@@ -0,0 +1,443 @@
+'use strict';
+const fs = require('fs');
+const path = require('path');
+function escapeHtml(value) {
+  return String(value == null ? '' : value)
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;');
+}
+function formatInt(value) {
+  const n = Number(value);
+  if (!Number.isFinite(n)) return 'n/a';
+  return Math.round(n).toLocaleString('en-US');
+}
+function formatCompact(value) {
+  const n = Number(value);
+  if (!Number.isFinite(n)) return 'n/a';
+  if (Math.abs(n) >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
+  if (Math.abs(n) >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
+  return String(Math.round(n));
+}
+function formatPct(value, digits = 1) {
+  const n = Number(value);
+  if (!Number.isFinite(n)) return 'n/a';
+  return `${n.toFixed(digits)}%`;
+}
+function formatMaybePct(value, digits = 1) {
+  const n = Number(value);
+  if (!Number.isFinite(n)) return 'n/a';
+  return `${n.toFixed(digits)}%`;
+}
+function formatRatio(value, digits = 1) {
+  const n = Number(value);
+  if (!Number.isFinite(n)) return 'n/a';
+  return `${n.toFixed(digits)}x`;
+}
+function formatMoney(value) {
+  const n = Number(value);
+  if (!Number.isFinite(n)) return 'n/a';
+  return `$${n.toLocaleString('en-US', { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
+}
+function durationLabel(ms) {
+  const n = Number(ms);
+  if (!Number.isFinite(n)) return 'n/a';
+  const sec = n / 1000;
+  if (sec < 60) return `${sec.toFixed(1)}s`;
+  const min = Math.floor(sec / 60);
+  const rem = sec - (min * 60);
+  return `${min}m ${rem.toFixed(1)}s`;
+}
+function maxOrZero(values) {
+  if (!Array.isArray(values) || values.length === 0) return 0;
+  return Math.max(...values.map((v) => (Number.isFinite(v) ? v : 0)));
+}
+function readJson(filePath) {
+  try {
+    if (!fs.existsSync(filePath)) return null;
+    return JSON.parse(fs.readFileSync(filePath, 'utf8'));
+  } catch (_) {
+    return null;
+  }
+}
+function loadBenchmarkReports(cwd) {
+  const reportsDir = path.join(cwd, 'benchmarks', 'reports');
+  return {
+    reportsDir,
+    token: readJson(path.join(reportsDir, 'token-reduction.json')),
+    retrieval: readJson(path.join(reportsDir, 'retrieval.json')),
+    quality: readJson(path.join(reportsDir, 'quality.json')),
+    task: readJson(path.join(reportsDir, 'task-benchmark.json')),
+    matrix: readJson(path.join(reportsDir, 'benchmark-matrix.json')),
+  };
+}
+function buildRetrievalSummary(retrieval) {
+  if (!retrieval || !Array.isArray(retrieval.repos) || retrieval.repos.length === 0) return null;
+  let totalTasks = 0;
+  let weightedHit = 0;
+  let weightedRand = 0;
+  let correct = 0;
+  let partial = 0;
+  let wrong = 0;
+  let repoCount = 0;
+  for (const repo of retrieval.repos) {
+    const tasks = Number(repo.tasks) || 0;
+    repoCount++;
+    totalTasks += tasks;
+    weightedHit += (Number(repo.hitAt5) || 0) * tasks;
+    weightedRand += (Number(repo.randomBaseline) || 0) * tasks;
+    correct += Number(repo.tiers && repo.tiers.correct) || 0;
+    partial += Number(repo.tiers && repo.tiers.partial) || 0;
+    wrong += Number(repo.tiers && repo.tiers.wrong) || 0;
+  }
+  const hitAt5 = totalTasks > 0 ? (weightedHit / totalTasks) * 100 : null;
+  const randomBaseline = totalTasks > 0 ? (weightedRand / totalTasks) * 100 : null;
+  const lift = hitAt5 && randomBaseline ? hitAt5 / randomBaseline : null;
+  return {
+    repoCount,
+    totalTasks,
+    hitAt5,
+    randomBaseline,
+    lift,
+    correct,
+    partial,
+    wrong,
+  };
+}
+function buildBenchmarkSummary(reports, matrixSummary) {
+  const missing = [];
+  if (!reports.token) missing.push('token-reduction.json');
+  if (!reports.retrieval) missing.push('retrieval.json');
+  if (!reports.quality) missing.push('quality.json');
+  if (!reports.task) missing.push('task-benchmark.json');
+  const retrievalSummary = buildRetrievalSummary(reports.retrieval);
+  const qualitySummary = reports.quality && reports.quality.summary ? reports.quality.summary : null;
+  const tokenSummary = reports.token && reports.token.summary ? reports.token.summary : null;
+  const taskSummary = reports.task && reports.task.summary ? reports.task.summary : null;
+  const matrix = matrixSummary || reports.matrix || null;
+  const generatedCandidates = [
+    matrix && matrix.generated,
+    reports.task && reports.task.generated,
+    reports.retrieval && reports.retrieval.generated,
+    reports.quality && reports.quality.timestamp,
+    reports.token && reports.token.timestamp,
+  ].filter(Boolean);
+  const generatedAt = generatedCandidates
+    .map((value) => ({ value, time: Date.parse(value) }))
+    .filter((item) => Number.isFinite(item.time))
+    .sort((a, b) => b.time - a.time)[0];
+  return {
+    generatedAt: (generatedAt && generatedAt.value) || generatedCandidates[0] || new Date().toISOString(),
+    missing,
+    tokenSummary,
+    retrievalSummary,
+    qualitySummary,
+    taskSummary,
+    matrix,
+  };
+}
+function renderCard(label, value, hint, tone) {
+  const toneClass = tone ? ` ${tone}` : '';
+  return [
+    `<article class="card${toneClass}">`,
+    `<div class="label">${escapeHtml(label)}</div>`,
+    `<div class="value">${escapeHtml(value)}</div>`,
+    `<div class="hint">${escapeHtml(hint || '')}</div>`,
+    '</article>',
+  ].join('');
+}
+function renderProgress(label, value, max, suffix) {
+  const safeValue = Number.isFinite(value) ? value : 0;
+  const safeMax = Math.max(1, Number.isFinite(max) ? max : 1);
+  const width = Math.max(2, Math.min(100, (safeValue / safeMax) * 100));
+  return [
+    '<div class="progress-row">',
+    `<div class="progress-label">${escapeHtml(label)}</div>`,
+    '<div class="progress-bar"><span style="width:',
+    String(width.toFixed(1)),
+    '%"></span></div>',
+    `<div class="progress-value">${escapeHtml(`${safeValue}${suffix || ''}`)}</div>`,
+    '</div>',
+  ].join('');
+}
+function renderMatrixSection(matrix) {
+  if (!matrix || !Array.isArray(matrix.steps) || matrix.steps.length === 0) return '';
+  const rows = matrix.steps.map((step) => {
+    const status = step.ok ? 'ok' : 'fail';
+    return [
+      '<tr>',
+      `<td>${escapeHtml(step.name)}</td>`,
+      `<td><span class="badge ${status}">${escapeHtml(step.ok ? 'ok' : `exit ${step.status}`)}</span></td>`,
+      `<td>${escapeHtml(durationLabel(step.durationMs))}</td>`,
+      `<td><code>${escapeHtml(['node', step.script].concat(step.args || []).join(' '))}</code></td>`,
+      '</tr>',
+    ].join('');
+  }).join('');
+  return [
+    '<section>',
+    '<h2>Run matrix</h2>',
+    '<p class="section-copy">This shows which benchmark jobs ran, whether they succeeded, and how long each step took.</p>',
+    '<table>',
+    '<thead><tr><th>Step</th><th>Status</th><th>Duration</th><th>Command</th></tr></thead>',
+    `<tbody>${rows}</tbody>`,
+    '</table>',
+    '</section>',
+  ].join('');
+}
+function renderTokenSection(token) {
+  if (!token || !Array.isArray(token.repos) || token.repos.length === 0) return '';
+  const rows = token.repos
+    .slice()
+    .sort((a, b) => (b.reductionPct || 0) - (a.reductionPct || 0))
+    .map((repo) => [
+      '<tr>',
+      `<td>${escapeHtml(repo.repo)}</td>`,
+      `<td>${escapeHtml(repo.language || 'n/a')}</td>`,
+      `<td>${escapeHtml(formatCompact(repo.rawTokens))}</td>`,
+      `<td>${escapeHtml(formatCompact(repo.finalTokens))}</td>`,
+      `<td>${escapeHtml(formatMaybePct(repo.reductionPct, 1))}</td>`,
+      '</tr>',
+    ].join(''))
+    .join('');
+  return [
+    '<section>',
+    '<h2>Token reduction</h2>',
+    '<p class="section-copy">Raw repository tokens versus SigMap output size across the benchmark repos.</p>',
+    '<table>',
+    '<thead><tr><th>Repo</th><th>Language</th><th>Raw tokens</th><th>Final tokens</th><th>Reduction</th></tr></thead>',
+    `<tbody>${rows}</tbody>`,
+    '</table>',
+    '</section>',
+  ].join('');
+}
+function renderRetrievalSection(retrieval) {
+  if (!retrieval || !Array.isArray(retrieval.repos) || retrieval.repos.length === 0) return '';
+  const rows = retrieval.repos.map((repo) => {
+    const lift = repo.randomBaseline > 0 ? (repo.hitAt5 / repo.randomBaseline) : null;
+    return [
+      '<tr>',
+      `<td>${escapeHtml(repo.repo)}</td>`,
+      `<td>${escapeHtml(formatMaybePct((repo.randomBaseline || 0) * 100, 1))}</td>`,
+      `<td>${escapeHtml(formatMaybePct((repo.hitAt5 || 0) * 100, 1))}</td>`,
+      `<td>${escapeHtml(formatRatio(lift, 1))}</td>`,
+      `<td>${escapeHtml(String((repo.tiers && repo.tiers.correct) || 0))}</td>`,
+      `<td>${escapeHtml(String((repo.tiers && repo.tiers.partial) || 0))}</td>`,
+      `<td>${escapeHtml(String((repo.tiers && repo.tiers.wrong) || 0))}</td>`,
+      '</tr>',
+    ].join('');
+  }).join('');
+  return [
+    '<section>',
+    '<h2>Retrieval quality</h2>',
+    '<p class="section-copy">Hit@5 performance against the random baseline, plus the quality-tier mix that drives the task benchmark.</p>',
+    '<table>',
+    '<thead><tr><th>Repo</th><th>Random hit@5</th><th>SigMap hit@5</th><th>Lift</th><th>Correct</th><th>Partial</th><th>Wrong</th></tr></thead>',
+    `<tbody>${rows}</tbody>`,
+    '</table>',
+    '</section>',
+  ].join('');
+}
+function renderQualitySection(quality) {
+  if (!quality || !Array.isArray(quality.repos) || quality.repos.length === 0) return '';
+  const rows = quality.repos.map((repo) => {
+    const overflow = (repo.rawTokens || 0) > 128000 ? 'overflow' : 'fits';
+    return [
+      '<tr>',
+      `<td>${escapeHtml(repo.repo)}</td>`,
+      `<td>${escapeHtml(formatInt(repo.groundedSymbols))}</td>`,
+      `<td>${escapeHtml(formatInt(repo.darkSymbols))}</td>`,
+      `<td>${escapeHtml(formatMaybePct(repo.groundingPct, 0))}</td>`,
+      `<td>${escapeHtml(String(repo.filesHiddenRaw || 0))}</td>`,
+      `<td><span class="badge ${overflow === 'overflow' ? 'warn' : 'ok'}">${escapeHtml(overflow)}</span></td>`,
+      '</tr>',
+    ].join('');
+  }).join('');
+  return [
+    '<section>',
+    '<h2>Quality and hallucination surface</h2>',
+    '<p class="section-copy">How much code stays visible to the model, plus the overflow and dark-symbol risk by repo.</p>',
+    '<table>',
+    '<thead><tr><th>Repo</th><th>Grounded symbols</th><th>Dark symbols</th><th>Grounding</th><th>Hidden files (raw)</th><th>GPT-4o 128K</th></tr></thead>',
+    `<tbody>${rows}</tbody>`,
+    '</table>',
+    '</section>',
+  ].join('');
+}
+function renderTaskSection(task) {
+  if (!task || !Array.isArray(task.repos) || task.repos.length === 0 || !task.summary) return '';
+  const summary = task.summary;
+  const maxReduction = maxOrZero(task.repos.map((repo) => Number(repo.reductionPct) || 0));
+  const repoBars = task.repos
+    .slice()
+    .sort((a, b) => (b.reductionPct || 0) - (a.reductionPct || 0))
+    .slice(0, 10)
+    .map((repo) => renderProgress(repo.repo, Number(repo.reductionPct) || 0, maxReduction, '%'))
+    .join('');
+  return [
+    '<section>',
+    '<h2>Task benchmark</h2>',
+    '<p class="section-copy">A prompt-reduction proxy derived from retrieval quality tiers. Lower prompts means the right file surfaces sooner.</p>',
+    '<div class="split">',
+    '<div class="panel">',
+    '<h3>Answer quality tiers</h3>',
+    renderProgress('Correct', Number(summary.correctPct) || 0, 100, '%'),
+    renderProgress('Partial', Number(summary.partialPct) || 0, 100, '%'),
+    renderProgress('Wrong', Number(summary.wrongPct) || 0, 100, '%'),
+    '</div>',
+    '<div class="panel">',
+    '<h3>Best prompt reduction by repo</h3>',
+    repoBars,
+    '</div>',
+    '</div>',
+    '</section>',
+  ].join('');
+}
+function generateBenchmarkReportHtml(reports, opts = {}) {
+  const summary = buildBenchmarkSummary(reports, opts.matrixSummary);
+  const cards = [];
+  cards.push(renderCard(
+    'Token reduction',
+    summary.tokenSummary ? formatPct(summary.tokenSummary.overallReductionPct, 1) : 'n/a',
+    summary.tokenSummary ? `${formatInt(summary.tokenSummary.repoCount)} repos • ${formatCompact(summary.tokenSummary.totalRawTokens)} raw -> ${formatCompact(summary.tokenSummary.totalFinalTokens)} final` : 'token-reduction.json missing',
+    'cool'
+  ));
+  cards.push(renderCard(
+    'Retrieval hit@5',
+    summary.retrievalSummary ? formatPct(summary.retrievalSummary.hitAt5, 1) : 'n/a',
+    summary.retrievalSummary ? `${formatPct(summary.retrievalSummary.randomBaseline, 1)} random baseline • ${formatRatio(summary.retrievalSummary.lift, 1)} lift` : 'retrieval.json missing',
+    'warm'
+  ));
+  cards.push(renderCard(
+    'Prompt reduction',
+    summary.taskSummary ? formatPct(summary.taskSummary.avgReductionPct, 0) : 'n/a',
+    summary.taskSummary ? `${summary.taskSummary.avgPromptsWithout} -> ${summary.taskSummary.avgPromptsWith} prompts • ${formatInt(summary.taskSummary.totalTasks)} tasks` : 'task-benchmark.json missing',
+    'neutral'
+  ));
+  cards.push(renderCard(
+    'Overflow risk',
+    summary.qualitySummary ? `${formatInt(summary.qualitySummary.overflowGPT4oCount)} repos` : 'n/a',
+    summary.qualitySummary ? `${formatInt(summary.qualitySummary.totalHiddenFiles)} hidden raw files • ${formatMoney(summary.qualitySummary.gpt4oSavedPerMonth)}/month saved` : 'quality.json missing',
+    summary.qualitySummary && summary.qualitySummary.overflowGPT4oCount > 0 ? 'warn' : 'ok'
+  ));
+  const missingHtml = summary.missing.length > 0
+    ? `<div class="notice">Missing source reports: ${escapeHtml(summary.missing.join(', '))}. The page still renders whatever data is available.</div>`
+    : '';
+  return [
+    '<!doctype html>',
+    '<html lang="en">',
+    '<head>',
+    '<meta charset="utf-8" />',
+    '<meta name="viewport" content="width=device-width, initial-scale=1" />',
+    '<title>SigMap Benchmark Report</title>',
+    '<style>',
+    ':root { color-scheme: light; --bg:#f5f1e8; --panel:#fffaf2; --ink:#1f1b16; --muted:#6a6258; --line:#dccfbf; --gold:#c87f2a; --green:#2f6f52; --blue:#2f5f8f; --red:#9f4f43; --shadow:0 18px 40px rgba(54,38,14,.10);} ',
+    '*{box-sizing:border-box} body{margin:0;font-family:ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;background:linear-gradient(180deg,#f3ecdf 0%,#f7f3ed 100%);color:var(--ink)}',
+    '.page{max-width:1240px;margin:0 auto;padding:28px 20px 56px}',
+    'header{display:flex;justify-content:space-between;gap:24px;align-items:flex-end;margin-bottom:24px}',
+    'h1{margin:0;font-size:clamp(2rem,4vw,3.6rem);line-height:1.02;letter-spacing:-.04em}',
+    '.lede{max-width:760px;color:var(--muted);font-size:1rem;line-height:1.6;margin-top:10px}',
+    '.stamp{font-size:.92rem;color:var(--muted);text-align:right}',
+    '.grid{display:grid;grid-template-columns:repeat(4,minmax(0,1fr));gap:14px;margin:20px 0 24px}',
+    '.card,.panel,.notice,section{background:var(--panel);border:1px solid var(--line);box-shadow:var(--shadow);border-radius:18px}',
+    '.card{padding:18px 18px 16px}.card.cool{background:#f7f5ff}.card.warm{background:#fff4eb}.card.warn{background:#fff1eb}.card.ok{background:#eff8f1}',
+    '.label{font-size:.84rem;text-transform:uppercase;letter-spacing:.08em;color:var(--muted)}',
+    '.value{font-size:2rem;font-weight:700;letter-spacing:-.04em;margin-top:8px}',
+    '.hint{font-size:.95rem;color:var(--muted);margin-top:8px;line-height:1.5}',
+    '.notice{padding:14px 16px;margin-bottom:20px;color:var(--muted)}',
+    'section{padding:20px;margin-top:18px}',
+    'h2{margin:0 0 6px;font-size:1.4rem;letter-spacing:-.03em}',
+    'h3{margin:0 0 14px;font-size:1rem}',
+    '.section-copy{margin:0 0 16px;color:var(--muted);line-height:1.6}',
+    'table{width:100%;border-collapse:collapse;font-size:.95rem}',
+    'th,td{padding:10px 12px;border-bottom:1px solid var(--line);text-align:left;vertical-align:top}',
+    'th{font-size:.82rem;text-transform:uppercase;letter-spacing:.06em;color:var(--muted)}',
+    'tbody tr:hover{background:rgba(200,127,42,.06)}',
+    '.badge{display:inline-flex;align-items:center;padding:4px 8px;border-radius:999px;font-size:.78rem;font-weight:600;text-transform:uppercase;letter-spacing:.04em}',
+    '.badge.ok{background:#e6f4ea;color:#21573f}.badge.warn{background:#fff0de;color:#8a4a17}.badge.fail{background:#fde8e5;color:#8a2e23}',
+    '.split{display:grid;grid-template-columns:1fr 1fr;gap:16px}',
+    '.panel{padding:16px}',
+    '.progress-row{display:grid;grid-template-columns:140px 1fr 60px;gap:12px;align-items:center;margin:10px 0}',
+    '.progress-label,.progress-value{font-size:.92rem}',
+    '.progress-bar{height:10px;border-radius:999px;background:#efe4d5;overflow:hidden}',
+    '.progress-bar span{display:block;height:100%;border-radius:999px;background:linear-gradient(90deg,var(--gold),#ebbb61)}',
+    'code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:.85rem}',
+    '@media (max-width: 1020px){.grid{grid-template-columns:repeat(2,minmax(0,1fr))}.split{grid-template-columns:1fr}header{flex-direction:column;align-items:flex-start}.stamp{text-align:left}}',
+    '@media (max-width: 640px){.grid{grid-template-columns:1fr}.progress-row{grid-template-columns:110px 1fr 52px}th:nth-child(n+5),td:nth-child(n+5){display:none}}',
+    '</style>',
+    '</head>',
+    '<body>',
+    '<div class="page">',
+    '<header>',
+    '<div>',
+    '<h1>SigMap Benchmark Report</h1>',
+    '<p class="lede">A self-contained view of token reduction, retrieval quality, hallucination surface, and task-level prompt reduction. This page reads the saved JSON benchmark artifacts so it stays easy to regenerate locally.</p>',
+    '</div>',
+    `<div class="stamp">Generated: ${escapeHtml(summary.generatedAt)}<br />Source directory: <code>benchmarks/reports</code></div>`,
+    '</header>',
+    missingHtml,
+    `<div class="grid">${cards.join('')}</div>`,
+    renderMatrixSection(summary.matrix),
+    renderTokenSection(reports.token),
+    renderRetrievalSection(reports.retrieval),
+    renderQualitySection(reports.quality),
+    renderTaskSection(reports.task),
+    '</div>',
+    '</body>',
+    '</html>',
+  ].join('');
+}
+function writeBenchmarkReport(cwd, opts = {}) {
+  const reports = loadBenchmarkReports(cwd);
+  const html = generateBenchmarkReportHtml(reports, opts);
+  const filePath = path.join(reports.reportsDir, opts.fileName || 'benchmark-report.html');
+  fs.mkdirSync(path.dirname(filePath), { recursive: true });
+  fs.writeFileSync(filePath, html, 'utf8');
+  return {
+    file: filePath,
+    summary: buildBenchmarkSummary(reports, opts.matrixSummary),
+  };
+}
+module.exports = {
+  loadBenchmarkReports,
+  buildBenchmarkSummary,
+  generateBenchmarkReportHtml,
+  writeBenchmarkReport,
+};

package/src/mcp/server.js CHANGED Viewed

@@ -18,7 +18,7 @@ const { readContext, searchSignatures, getMap, createCheckpoint, getRouting, exp
 const SERVER_INFO = {
   name: 'sigmap',
-  version: '5.2.0',
+  version: '5.3.0',
   description: 'SigMap MCP server — code signatures on demand',
 };