npm - sigmap - Versions diffs - 4.3.0 → 5.1.0 - Mend

sigmap 4.3.0 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/AGENTS.md +45 -32
package/CHANGELOG.md +20 -0
package/gen-context.js +254 -8
package/package.json +1 -1
package/packages/cli/package.json +1 -1
package/packages/core/package.json +1 -1
package/src/config/loader.js +77 -5
package/src/format/dashboard.js +20 -0
package/src/judge/judge-engine.js +55 -0
package/src/mcp/server.js +1 -1

package/AGENTS.md CHANGED Viewed

@@ -12,20 +12,23 @@ Use this marker block for all appendable context files:
 ## Auto-generated signatures
 <!-- Updated by gen-context.js -->
 You are a coding assistant with full knowledge of this codebase.
-Below are the code signatures extracted by SigMap v4.1.2 on 2026-04-16T17:45:07.132Z.
+Below are the code signatures extracted by SigMap v5.1.0 on 2026-04-16T21:33:38.411Z.
 Use these signatures to answer questions about the code accurately.
 ## Code Signatures
-<!-- Generated by SigMap gen-context.js v4.1.2 -->
+<!-- Generated by SigMap gen-context.js v5.1.0 -->
 <!-- DO NOT EDIT below the marker line — run gen-context.js to regenerate -->
 # Code signatures
-## changes (last 5 commits — 53 minutes ago)
+## changes (last 5 commits — 16 minutes ago)
 ```
-src/retrieval/ranker.js                       +_parseContextFile  +buildSigIndex  ~buildSigIndex  ~rank
+src/config/loader.js                          +loadBaseConfig  ~loadConfig  ~deepClone
+src/format/dashboard.js                       ~computeExtractorCoverage  ~readBenchmarkTrend
+src/judge/judge-engine.js                     +tokenize  +groundedness  +judge
+src/retrieval/ranker.js                       +detectIntent  ~formatRankJSON
 ```
 ## packages
@@ -146,9 +149,41 @@ function adapt(context, adapterName, opts = {}) → string
 ## src
-### src/config/defaults.js
+### src/config/loader.js
 ```
-module.exports = { DEFAULTS }
+module.exports = { loadConfig, loadBaseConfig }
+function loadBaseConfig(extendsVal, cwd)
+function detectAutoSrcDirs(cwd, excludeList) → string[]
+function loadConfig(cwd) → object
+function deepClone(obj)
+```
+### src/format/dashboard.js
+```
+module.exports = { generateDashboardHtml, renderHistoryCharts, computeExtractorCoverage, percentile, overBudgetStreak }
+function toNumber(v)
+function percentile(values, p)
+function overBudgetStreak(entries)
+function loadConfig(cwd)
+function shouldExclude(rel, excludeSet)
+function detectLanguage(filePath)
+function walkFiles(dir, maxDepth, depth, out, excludeSet)
+function computeExtractorCoverage(cwd)
+function readBenchmarkTrend(cwd)
+function lineChartSvg(values, title, ySuffix)
+function barChartSvg(perLanguage)
+function sparkline(values)
+function buildDashboardData(cwd, health)
+function generateDashboardHtml(cwd, health)
+function renderHistoryCharts(cwd, health)
+```
+### src/judge/judge-engine.js
+```
+module.exports = { groundedness, judge }
+function tokenize(text)
+function groundedness(response, context)
+function judge(response, context, opts = {})
 ```
 ### src/mcp/server.js
@@ -162,13 +197,14 @@ function start(cwd)
 ### src/retrieval/ranker.js
 ```
-module.exports = { rank, buildSigIndex, scoreFile, formatRankTable, formatRankJSON, DEFAULT_WEIGHTS }
+module.exports = { rank, buildSigIndex, scoreFile, formatRankTable, formatRankJSON, DEFAULT_WEIGHTS, detectIntent }
 function scoreFile(filePath, sigs, queryTokens, weights) → number
 function rank(query, sigIndex, opts) → { file: string, score: nu
 function _parseContextFile(contextPath) → Map<string, string[]>
 function buildSigIndex(cwd, opts) → Map<string, string[]>
 function formatRankTable(results, query) → string
 function formatRankJSON(results, query) → object
+function detectIntent(query)
 ```
 ### src/analysis/coverage-score.js
@@ -178,12 +214,9 @@ function coverageScore(cwd, fileEntries, config) → { * score: number, * grad
 function _walk(dir, excludeSet, out)
 ```
-### src/config/loader.js
+### src/config/defaults.js
 ```
-module.exports = { loadConfig }
-function detectAutoSrcDirs(cwd, excludeList) → string[]
-function loadConfig(cwd) → object
-function deepClone(obj)
+module.exports = { DEFAULTS }
 ```
 ### src/eval/analyzer.js
@@ -527,26 +560,6 @@ function formatCache(content) → string
 function formatCachePayload(content, model) → string
 ```
-### src/format/dashboard.js
-```
-module.exports = { generateDashboardHtml, renderHistoryCharts, computeExtractorCoverage, percentile, overBudgetStreak }
-function toNumber(v)
-function percentile(values, p)
-function overBudgetStreak(entries)
-function loadConfig(cwd)
-function shouldExclude(rel, excludeSet)
-function detectLanguage(filePath)
-function walkFiles(dir, maxDepth, depth, out, excludeSet)
-function computeExtractorCoverage(cwd)
-function readBenchmarkTrend(cwd)
-function lineChartSvg(values, title, ySuffix)
-function barChartSvg(perLanguage)
-function sparkline(values)
-function buildDashboardData(cwd, health)
-function generateDashboardHtml(cwd, health)
-function renderHistoryCharts(cwd, health)
-```
 ### src/format/llm-txt.js
 ```
 module.exports = { format, outputPath }

package/CHANGELOG.md CHANGED Viewed

@@ -10,6 +10,26 @@ Format: [Semantic Versioning](https://semver.org/)
 ---
+## [5.1.0] — 2026-04-16
+### Added
+- **Benchmark history tracking** — all three benchmark scripts (`run-retrieval-benchmark.mjs`, `run-benchmark.mjs`, `run-task-benchmark.mjs`) now append a structured NDJSON entry to `.context/benchmark-history.ndjson` after each run (`type: "retrieval" | "token-reduction" | "task"`).
+- **`sigmap history` benchmark trend rows** — when `.context/benchmark-history.ndjson` exists, `sigmap history` prints a retrieval `hit@5` sparkline row and a token-reduction sparkline row below the usage table. The command no longer exits early when the usage log is empty.
+- **Dashboard `readBenchmarkTrend` uses local history** — `src/format/dashboard.js` now prefers `.context/benchmark-history.ndjson` over the CI-only `benchmarks/results/` directory, so the dashboard hit@5 trend chart populates for all users after running any benchmark locally.
+---
+## [5.0.0] — 2026-04-16
+### Added
+- **`sigmap judge --response <file> --context <file>`** — rule-based groundedness scoring engine (`src/judge/judge-engine.js`). Computes a 0–1 score from token overlap between an LLM response and its source context. Exits 0 when verdict is `pass`, exits 1 on `fail`. Supports `--json` (emits `{ score, verdict, reasons }`) and `--threshold` override.
+- **Config `extends`** — `gen-context.config.json` now accepts an `"extends"` key pointing to a local JSON file path or HTTPS URL. The base config is deep-merged (DEFAULTS → base → local), with HTTPS responses cached for 1 hour in `.context/config-cache/`.
+- **`sigmap history [--last N] [--json]`** — displays last N usage log entries as a table with a Unicode sparkline (▁▂▃▄▅▆▇█) for the token trend. Reads from `.context/usage.ndjson` (requires `tracking: true` in config).
+---
 ## [4.3.0] — 2026-04-16
 ### Added

package/gen-context.js CHANGED Viewed

@@ -221,6 +221,47 @@ __factories["./src/config/loader"] = function(module, exports) {
     });
   }
+  const BASE_CONFIG_TTL_MS = 60 * 60 * 1000;
+  function loadBaseConfig(extendsVal, cwd) {
+    if (!extendsVal || typeof extendsVal !== 'string') return {};
+    if (extendsVal.startsWith('https://') || extendsVal.startsWith('http://')) {
+      const cacheDir  = path.join(cwd, '.context', 'config-cache');
+      const cacheKey  = Buffer.from(extendsVal).toString('base64').replace(/[^a-zA-Z0-9_-]/g, '_');
+      const cachePath = path.join(cacheDir, `${cacheKey}.json`);
+      if (fs.existsSync(cachePath)) {
+        const age = Date.now() - fs.statSync(cachePath).mtimeMs;
+        if (age < BASE_CONFIG_TTL_MS) {
+          try { return JSON.parse(fs.readFileSync(cachePath, 'utf8')); } catch (_) {}
+        }
+      }
+      try {
+        const { execSync } = require('child_process');
+        const proto = extendsVal.startsWith('https') ? 'https' : 'http';
+        const out = execSync(
+          `node -e "const h=require('${proto}');let d='';h.get(${JSON.stringify(extendsVal)},r=>{r.on('data',c=>d+=c);r.on('end',()=>process.stdout.write(d))}).on('error',()=>process.exit(1))"`,
+          { timeout: 10000, encoding: 'utf8' }
+        );
+        const parsed = JSON.parse(out);
+        if (!fs.existsSync(cacheDir)) fs.mkdirSync(cacheDir, { recursive: true });
+        fs.writeFileSync(cachePath, JSON.stringify(parsed), 'utf8');
+        return parsed;
+      } catch (err) {
+        process.stderr.write(`[sigmap] config extends: could not fetch ${extendsVal}: ${err.message}\n`);
+        if (fs.existsSync(cachePath)) {
+          try { return JSON.parse(fs.readFileSync(cachePath, 'utf8')); } catch (_) {}
+        }
+        return {};
+      }
+    }
+    const absPath = path.resolve(cwd, extendsVal);
+    try { return JSON.parse(fs.readFileSync(absPath, 'utf8')); }
+    catch (err) {
+      process.stderr.write(`[sigmap] config extends: could not load ${absPath}: ${err.message}\n`);
+      return {};
+    }
+  }
   /**
    * Load and merge configuration for a given working directory.
    *
@@ -250,18 +291,31 @@ __factories["./src/config/loader"] = function(module, exports) {
     // Warn on unknown keys (helps catch typos)
     for (const key of Object.keys(userConfig)) {
-      if (key.startsWith('_')) continue; // allow _comment etc.
+      if (key.startsWith('_') || key === 'extends') continue;
       if (!KNOWN_KEYS.has(key)) {
         console.warn(`[sigmap] unknown config key: "${key}" (ignored)`);
       }
     }
-    // Deep merge: top-level known keys from user override defaults
-    // For object values (e.g. mcp), merge one level deep
+    // Deep merge: DEFAULTS → base (extends) → user config
+    const baseConfig = loadBaseConfig(userConfig.extends, cwd);
     const merged = deepClone(DEFAULTS);
+    for (const key of Object.keys(baseConfig)) {
+      if (key.startsWith('_') || key === 'extends') continue;
+      if (!KNOWN_KEYS.has(key)) continue;
+      const val = baseConfig[key];
+      if (val !== null && typeof val === 'object' && !Array.isArray(val) &&
+          typeof merged[key] === 'object' && !Array.isArray(merged[key])) {
+        merged[key] = Object.assign({}, merged[key], val);
+      } else {
+        merged[key] = val;
+      }
+    }
     for (const key of Object.keys(userConfig)) {
-      if (key.startsWith('_')) continue;
-      if (!KNOWN_KEYS.has(key)) continue; // skip unknown keys
+      if (key.startsWith('_') || key === 'extends') continue;
+      if (!KNOWN_KEYS.has(key)) continue;
       const val = userConfig[key];
       if (val !== null && typeof val === 'object' && !Array.isArray(val) &&
           typeof merged[key] === 'object' && !Array.isArray(merged[key])) {
@@ -292,7 +346,7 @@ __factories["./src/config/loader"] = function(module, exports) {
     return JSON.parse(JSON.stringify(obj));
   }
-  module.exports = { loadConfig, detectAutoSrcDirs };
+  module.exports = { loadConfig, loadBaseConfig, detectAutoSrcDirs };
 };
@@ -3098,6 +3152,25 @@ __factories["./src/format/cache"] = function(module, exports) {
     }
     function readBenchmarkTrend(cwd) {
+      // Prefer per-user history file written by benchmark scripts
+      const histPath = path.join(cwd, '.context', 'benchmark-history.ndjson');
+      if (fs.existsSync(histPath)) {
+        const histValues = [];
+        try {
+          for (const line of fs.readFileSync(histPath, 'utf8').trim().split('\n').filter(Boolean)) {
+            try {
+              const obj = JSON.parse(line);
+              if (obj.type === 'retrieval') {
+                const v = toNumber(obj.hitAt5Pct);
+                if (v !== null) histValues.push(v);
+              }
+            } catch (_) {}
+          }
+        } catch (_) {}
+        if (histValues.length > 0) return histValues.slice(-30);
+      }
+      // Fallback: legacy benchmarks/results directory (CI artifacts)
       const resultDir = path.join(cwd, 'benchmarks', 'results');
       if (!fs.existsSync(resultDir)) return [];
       const files = [];
@@ -4654,7 +4727,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
   const SERVER_INFO = {
     name: 'sigmap',
-    version: '4.3.0',
+    version: '5.1.0',
     description: 'SigMap MCP server — code signatures on demand',
   };
@@ -5252,6 +5325,61 @@ __factories["./src/security/scanner"] = function(module, exports) {
 };
+// ── ./src/judge/judge-engine ──
+__factories["./src/judge/judge-engine"] = function(module, exports) {
+  'use strict';
+  const STOP = new Set([
+    'the','a','an','in','on','at','to','of','for','and','or','but',
+    'is','are','was','were','be','been','being','have','has','had',
+    'do','does','did','will','would','could','should','may','might',
+    'shall','can','not','with','from','by','as','this','that','it',
+  ]);
+  function tokenize(text) {
+    return (text || '').toLowerCase().match(/\b[a-z][a-z0-9_]{2,}\b/g) || [];
+  }
+  function groundedness(response, context) {
+    if (!response || !context) return 0;
+    const ctxTokens = new Set(tokenize(context).filter((t) => !STOP.has(t)));
+    if (ctxTokens.size === 0) return 0;
+    const respTokens = tokenize(response).filter((t) => !STOP.has(t));
+    if (respTokens.length === 0) return 0;
+    const matched = respTokens.filter((t) => ctxTokens.has(t));
+    return parseFloat((matched.length / respTokens.length).toFixed(3));
+  }
+  const GENERIC_MARKERS = [
+    'however, based on my knowledge',
+    'generally speaking',
+    'in general',
+    'typically,',
+    'usually,',
+    'as a general rule',
+  ];
+  function judge(response, context, opts) {
+    opts = opts || {};
+    const score = groundedness(response, context);
+    const threshold = opts.threshold !== undefined ? opts.threshold : 0.25;
+    const reasons = [];
+    if (score < threshold) {
+      reasons.push(`score ${score} is below threshold ${threshold} — response may not be grounded in context`);
+    }
+    if (response) {
+      const lower = response.toLowerCase();
+      for (const m of GENERIC_MARKERS) {
+        if (lower.includes(m)) reasons.push(`response contains generic phrase: "${m}"`);
+      }
+    }
+    const verdict = score >= threshold && reasons.length === 0 ? 'pass' : 'fail';
+    return { score, verdict, reasons };
+  }
+  module.exports = { groundedness, judge };
+};
 // ── ./src/tracking/logger ──
 __factories["./src/tracking/logger"] = function(module, exports) {
@@ -6262,7 +6390,7 @@ const path = require('path');
 const os = require('os');
 const { execSync } = require('child_process');
-const VERSION = '4.3.0';
+const VERSION = '5.1.0';
 const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
 function requireSourceOrBundled(key) {
@@ -8313,6 +8441,124 @@ function main() {
     process.exit(0);
   }
+  // v5.0: `sigmap judge --response <file> --context <file>` — groundedness scoring
+  if (args[0] === 'judge') {
+    const respIdx = args.indexOf('--response');
+    const ctxIdx  = args.indexOf('--context');
+    if (respIdx < 0 || ctxIdx < 0) {
+      console.error('[sigmap] Usage: sigmap judge --response <file> --context <file> [--json] [--threshold 0.25]');
+      process.exit(1);
+    }
+    const respFile = (args[respIdx + 1] || '').trim();
+    const ctxFile  = (args[ctxIdx + 1]  || '').trim();
+    if (!respFile || respFile.startsWith('--') || !ctxFile || ctxFile.startsWith('--')) {
+      console.error('[sigmap] --response and --context require file paths');
+      process.exit(1);
+    }
+    let responseText = '', contextText = '';
+    try { responseText = fs.readFileSync(path.resolve(cwd, respFile), 'utf8'); }
+    catch (e) { console.error(`[sigmap] cannot read --response file: ${e.message}`); process.exit(1); }
+    try { contextText = fs.readFileSync(path.resolve(cwd, ctxFile), 'utf8'); }
+    catch (e) { console.error(`[sigmap] cannot read --context file: ${e.message}`); process.exit(1); }
+    const thrIdx = args.indexOf('--threshold');
+    const judgeOpts = thrIdx >= 0 ? { threshold: parseFloat(args[thrIdx + 1]) || 0.25 } : {};
+    const { judge: runJudge } = requireSourceOrBundled('./src/judge/judge-engine');
+    const result = runJudge(responseText, contextText, judgeOpts);
+    if (args.includes('--json')) {
+      process.stdout.write(JSON.stringify(result) + '\n');
+    } else {
+      const bar = '─'.repeat(44);
+      console.log([
+        bar,
+        ` sigmap judge`,
+        ` Score     : ${result.score}`,
+        ` Verdict   : ${result.verdict}`,
+        result.reasons.length ? ` Reasons   :\n   ${result.reasons.join('\n   ')}` : ` Reasons   : none`,
+        bar,
+      ].join('\n'));
+    }
+    process.exit(result.verdict === 'pass' ? 0 : 1);
+  }
+  // v5.0: `sigmap history` — show last N usage log entries with sparkline
+  if (args[0] === 'history') {
+    const { readLog } = requireSourceOrBundled('./src/tracking/logger');
+    const entries = readLog(cwd);
+    const nIdx = args.indexOf('--last');
+    const n    = nIdx >= 0 ? (parseInt(args[nIdx + 1], 10) || 10) : 10;
+    const last = entries.slice(-n);
+    if (args.includes('--json')) {
+      process.stdout.write(JSON.stringify(last) + '\n');
+      process.exit(0);
+    }
+    const SPARK_CHARS = '▁▂▃▄▅▆▇█';
+    function sparkline(values) {
+      if (values.length === 0) return '';
+      const min = Math.min(...values);
+      const max = Math.max(...values);
+      const range = max - min || 1;
+      return values.map((v) => {
+        const idx = Math.round(((v - min) / range) * (SPARK_CHARS.length - 1));
+        return SPARK_CHARS[idx];
+      }).join('');
+    }
+    const bar = '─'.repeat(62);
+    console.log(bar);
+    console.log(` sigmap history  (last ${Math.max(last.length, 1)} runs)`);
+    console.log(bar);
+    if (last.length === 0) {
+      console.log(' No usage log entries. Enable tracking: true in config to start recording runs.');
+    } else {
+      console.log(` ${'Date'.padEnd(24)} ${'Files'.padStart(5)} ${'Tokens'.padStart(7)} ${'Reduction'.padStart(9)} ${'Budget?'.padStart(7)}`);
+      console.log(` ${'─'.repeat(24)} ${'─'.repeat(5)} ${'─'.repeat(7)} ${'─'.repeat(9)} ${'─'.repeat(7)}`);
+      for (const e of last) {
+        const date = (e.ts || '').slice(0, 19).replace('T', ' ');
+        const files = String(e.fileCount || 0).padStart(5);
+        const tok   = String(e.finalTokens || 0).padStart(7);
+        const red   = `${e.reductionPct || 0}%`.padStart(9);
+        const over  = (e.overBudget ? '  ⚠ yes' : '     no').padStart(7);
+        console.log(` ${date.padEnd(24)} ${files} ${tok} ${red} ${over}`);
+      }
+      console.log(bar);
+      const tokens = last.map((e) => e.finalTokens || 0);
+      console.log(` Token trend: ${sparkline(tokens)}`);
+    }
+    // Show benchmark trend row if .context/benchmark-history.ndjson exists
+    const benchHistPath = path.join(cwd, '.context', 'benchmark-history.ndjson');
+    if (fs.existsSync(benchHistPath)) {
+      try {
+        const benchEntries = fs.readFileSync(benchHistPath, 'utf8').trim().split('\n')
+          .map((l) => { try { return JSON.parse(l); } catch (_) { return null; } }).filter(Boolean);
+        const retrieval = benchEntries.filter((e) => e.type === 'retrieval').slice(-n);
+        if (retrieval.length > 0) {
+          const hits = retrieval.map((e) => e.hitAt5Pct || 0);
+          console.log(` hit@5 trend: ${sparkline(hits)}  ${hits.at(-1)}% (latest)`);
+        }
+        const tokenBench = benchEntries.filter((e) => e.type === 'token-reduction').slice(-n);
+        if (tokenBench.length > 0) {
+          const reds = tokenBench.map((e) => e.reduction || e.avgReductionPct || 0);
+          console.log(` tok reduce : ${sparkline(reds)}  ${reds.at(-1)}% (latest)`);
+        }
+      } catch (_) {}
+    }
+    console.log(bar);
+    process.exit(0);
+  }
   // Feature 6: `sigmap sync` — write all outputs + llms.txt + print compact diff
   if (args[0] === 'sync') {
     try {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap",
-  "version": "4.3.0",
+  "version": "5.1.0",
   "description": "Zero-dependency AI context engine — 97% token reduction. No npm install. Runs on Node 18+.",
   "main": "gen-context.js",
   "exports": {

package/packages/cli/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap-cli",
-  "version": "4.3.0",
+  "version": "5.1.0",
   "description": "SigMap CLI wrapper — thin adapter for programmatic CLI invocation",
   "main": "index.js",
   "keywords": [

package/packages/core/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap-core",
-  "version": "4.3.0",
+  "version": "5.1.0",
   "description": "SigMap core library — zero-dependency code signature extraction, retrieval, and security scanning",
   "main": "index.js",
   "keywords": [

package/src/config/loader.js CHANGED Viewed

@@ -4,6 +4,65 @@ const fs = require('fs');
 const path = require('path');
 const { DEFAULTS } = require('./defaults');
+const BASE_CONFIG_TTL_MS = 60 * 60 * 1000; // 1 hour
+function loadBaseConfig(extendsVal, cwd) {
+  if (!extendsVal || typeof extendsVal !== 'string') return {};
+  if (extendsVal.startsWith('https://') || extendsVal.startsWith('http://')) {
+    const cacheDir  = path.join(cwd, '.context', 'config-cache');
+    const cacheKey  = Buffer.from(extendsVal).toString('base64url').replace(/[^a-zA-Z0-9_-]/g, '_');
+    const cachePath = path.join(cacheDir, `${cacheKey}.json`);
+    if (fs.existsSync(cachePath)) {
+      const age = Date.now() - fs.statSync(cachePath).mtimeMs;
+      if (age < BASE_CONFIG_TTL_MS) {
+        try { return JSON.parse(fs.readFileSync(cachePath, 'utf8')); } catch (_) {}
+      }
+    }
+    try {
+      const https = require('https');
+      const http  = require('http');
+      const mod   = extendsVal.startsWith('https://') ? https : http;
+      const raw   = (() => {
+        let data = '';
+        return new Promise((resolve, reject) => {
+          mod.get(extendsVal, (res) => {
+            res.on('data', (c) => { data += c; });
+            res.on('end', () => resolve(data));
+          }).on('error', reject);
+        });
+      })();
+      // sync fallback: use execSync with node -e
+      const { execSync } = require('child_process');
+      const out = execSync(
+        `node -e "const h=require('${extendsVal.startsWith('https') ? 'https' : 'http'}');let d='';h.get(${JSON.stringify(extendsVal)},r=>{r.on('data',c=>d+=c);r.on('end',()=>process.stdout.write(d))}).on('error',()=>process.exit(1))"`,
+        { timeout: 10000, encoding: 'utf8' }
+      );
+      const parsed = JSON.parse(out);
+      if (!fs.existsSync(cacheDir)) fs.mkdirSync(cacheDir, { recursive: true });
+      fs.writeFileSync(cachePath, JSON.stringify(parsed), 'utf8');
+      return parsed;
+    } catch (err) {
+      process.stderr.write(`[sigmap] config extends: could not fetch ${extendsVal}: ${err.message}\n`);
+      if (fs.existsSync(cachePath)) {
+        try { return JSON.parse(fs.readFileSync(cachePath, 'utf8')); } catch (_) {}
+      }
+      return {};
+    }
+  }
+  // Local file path
+  const absPath = path.resolve(cwd, extendsVal);
+  try {
+    return JSON.parse(fs.readFileSync(absPath, 'utf8'));
+  } catch (err) {
+    process.stderr.write(`[sigmap] config extends: could not load ${absPath}: ${err.message}\n`);
+    return {};
+  }
+}
 // Keys that are valid in gen-context.config.json
 const KNOWN_KEYS = new Set(Object.keys(DEFAULTS));
@@ -173,17 +232,30 @@ function loadConfig(cwd) {
   // Warn on unknown keys (helps catch typos)
   for (const key of Object.keys(userConfig)) {
-    if (key.startsWith('_')) continue; // allow _comment etc.
+    if (key.startsWith('_') || key === 'extends') continue;
     if (!KNOWN_KEYS.has(key)) {
       console.warn(`[sigmap] unknown config key: "${key}" (ignored)`);
     }
   }
-  // Deep merge: top-level known keys from user override defaults
-  // For object values (e.g. mcp), merge one level deep
+  // Deep merge: DEFAULTS → base (extends) → user config
+  const baseConfig = loadBaseConfig(userConfig.extends, cwd);
   const merged = deepClone(DEFAULTS);
+  for (const key of Object.keys(baseConfig)) {
+    if (key.startsWith('_') || key === 'extends') continue;
+    if (!KNOWN_KEYS.has(key)) continue;
+    const val = baseConfig[key];
+    if (val !== null && typeof val === 'object' && !Array.isArray(val) &&
+        typeof merged[key] === 'object' && !Array.isArray(merged[key])) {
+      merged[key] = Object.assign({}, merged[key], val);
+    } else {
+      merged[key] = val;
+    }
+  }
   for (const key of Object.keys(userConfig)) {
-    if (key.startsWith('_')) continue;
+    if (key.startsWith('_') || key === 'extends') continue;
     if (!KNOWN_KEYS.has(key)) continue; // skip unknown keys
     const val = userConfig[key];
     if (val !== null && typeof val === 'object' && !Array.isArray(val) &&
@@ -214,4 +286,4 @@ function deepClone(obj) {
   return JSON.parse(JSON.stringify(obj));
 }
-module.exports = { loadConfig };
+module.exports = { loadConfig, loadBaseConfig };

package/src/format/dashboard.js CHANGED Viewed

@@ -140,6 +140,26 @@ function computeExtractorCoverage(cwd) {
 }
 function readBenchmarkTrend(cwd) {
+  // Prefer per-user history file written by benchmark scripts
+  const histPath = path.join(cwd, '.context', 'benchmark-history.ndjson');
+  if (fs.existsSync(histPath)) {
+    const values = [];
+    try {
+      const lines = fs.readFileSync(histPath, 'utf8').trim().split('\n').filter(Boolean);
+      for (const line of lines) {
+        try {
+          const obj = JSON.parse(line);
+          if (obj.type === 'retrieval') {
+            const v = toNumber(obj.hitAt5Pct);
+            if (v !== null) values.push(v);
+          }
+        } catch (_) {}
+      }
+    } catch (_) {}
+    if (values.length > 0) return values.slice(-30);
+  }
+  // Fallback: legacy benchmarks/results directory (CI artifacts)
   const resultDir = path.join(cwd, 'benchmarks', 'results');
   if (!fs.existsSync(resultDir)) return [];

package/src/judge/judge-engine.js ADDED Viewed

@@ -0,0 +1,55 @@
+'use strict';
+const STOP = new Set([
+  'the','a','an','in','on','at','to','of','for','and','or','but',
+  'is','are','was','were','be','been','being','have','has','had',
+  'do','does','did','will','would','could','should','may','might',
+  'shall','can','not','with','from','by','as','this','that','it',
+]);
+function tokenize(text) {
+  return (text || '').toLowerCase().match(/\b[a-z][a-z0-9_]{2,}\b/g) || [];
+}
+function groundedness(response, context) {
+  if (!response || !context) return 0;
+  const ctxTokens = new Set(tokenize(context).filter((t) => !STOP.has(t)));
+  if (ctxTokens.size === 0) return 0;
+  const respTokens = tokenize(response).filter((t) => !STOP.has(t));
+  if (respTokens.length === 0) return 0;
+  const matched = respTokens.filter((t) => ctxTokens.has(t));
+  return parseFloat((matched.length / respTokens.length).toFixed(3));
+}
+const GENERIC_MARKERS = [
+  'however, based on my knowledge',
+  'generally speaking',
+  'in general',
+  'typically,',
+  'usually,',
+  'as a general rule',
+];
+function judge(response, context, opts = {}) {
+  const score = groundedness(response, context);
+  const threshold = opts.threshold !== undefined ? opts.threshold : 0.25;
+  const reasons = [];
+  if (score < threshold) {
+    reasons.push(`score ${score} is below threshold ${threshold} — response may not be grounded in context`);
+  }
+  if (response) {
+    const lower = response.toLowerCase();
+    for (const m of GENERIC_MARKERS) {
+      if (lower.includes(m)) {
+        reasons.push(`response contains generic phrase: "${m}"`);
+      }
+    }
+  }
+  const verdict = score >= threshold && reasons.length === 0 ? 'pass' : 'fail';
+  return { score, verdict, reasons };
+}
+module.exports = { groundedness, judge };

package/src/mcp/server.js CHANGED Viewed

@@ -18,7 +18,7 @@ const { readContext, searchSignatures, getMap, createCheckpoint, getRouting, exp
 const SERVER_INFO = {
   name: 'sigmap',
-  version: '4.3.0',
+  version: '5.1.0',
   description: 'SigMap MCP server — code signatures on demand',
 };