npm - sigmap - Versions diffs - 7.14.0 → 7.16.0 - Mend

sigmap 7.14.0 → 7.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md +18 -0
package/gen-context.js +199 -2
package/llms-full.txt +1 -1
package/llms.txt +1 -1
package/package.json +3 -2
package/packages/cli/package.json +1 -1
package/packages/core/package.json +1 -1
package/src/conventions/ci.js +48 -0
package/src/eval/llm-ablation.js +113 -0
package/src/mcp/server.js +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -10,6 +10,24 @@ Format: [Semantic Versioning](https://semver.org/)
 ---
+## [7.16.0] — 2026-06-18
+Minor release — LLM A/B hallucination ablation harness (grounded codegen, IMPL §9).
+### Added
+- **LLM A/B hallucination ablation harness (#325):** the honest measurement behind the grounded-codegen plan (IMPL §9). Runs a model twice per task — (A) no SigMap context, (B) with SigMap grounding — pipes both outputs through the hallucination guard, and reports the measured delta in flagged codebase-fact errors. New zero-dependency, bundle-safe `src/eval/llm-ablation.js` (`buildGrounding`, `scoreAnswer`, `runAblation`) keeps the model call **injected**, so the harness is fully offline-testable; the live runner `scripts/run-llm-ablation.mjs` wires Anthropic via `ANTHROPIC_API_KEY` and prints the A/B table + delta (`npm run benchmark:llm-ablation`), degrading to a graceful skip (exit 0) when no key is set. The network fetch is confined to `scripts/`, never the published library surface. Starter corpus in `benchmarks/llm-ablation-tasks.json`. This turns §9 from an offline coverage proxy into a ready-to-run real A/B — the moment a key is present, it produces the measured hallucination delta.
+---
+## [7.15.0] — 2026-06-18
+Minor release — `sigmap conventions --ci` (grounded codegen, Layer 3 polish).
+### Added
+- **`sigmap conventions --ci` — gate CI on convention consistency (#322):** completes the consistency-tracking story started by `--report` (v7.14.0). A CI gate that fails when a repo's overall convention consistency falls below a threshold (`--min`, default 0.70), and — with `--no-regress` — also fails when the score dropped vs the last recorded snapshot (best-effort). New zero-dependency, bundle-safe `src/conventions/ci.js` (`ciGate`) reuses `overallScore`; the command is read-only (reads the last `.context/conventions-history.ndjson` snapshot for `--no-regress`, never appends) and exits non-zero on failure, so it drops straight into CI. `--json` for machine output. The remaining `conventions` flags (`--fix`, `--update`) and the §9 LLM A/B benchmark are follow-ups.
+---
 ## [7.14.0] — 2026-06-17
 Minor release — `sigmap conventions --report` (grounded codegen, Layer 3 polish).

package/gen-context.js CHANGED Viewed

@@ -30,6 +30,175 @@ function __require(key) {
 // ── ./src/review/review-pr ──
 // ── ./src/create/orchestrate ──
 // ── ./src/conventions/report ──
+// ── ./src/conventions/ci ──
+// ── ./src/eval/llm-ablation ──
+__factories["./src/eval/llm-ablation"] = function(module, exports) {
+  /**
+   * LLM A/B hallucination ablation (IMPL.md §9) — the honest measurement.
+   *
+   * Runs a model twice per task — (A) no SigMap context, (B) with SigMap
+   * grounding — pipes both outputs through the hallucination guard, and reports
+   * the measured delta in flagged codebase-fact errors. The model call is
+   * INJECTED (`complete(prompt) → text`), so the harness itself is pure and
+   * offline-testable; the live model adapter lives in `scripts/run-llm-ablation.mjs`.
+   * Zero-dependency, bundle-safe (no network here).
+   */
+  const { verify } = __require('./src/verify/hallucination-guard');
+  /**
+   * Build the SigMap grounding block for a repo — what we prepend to a task
+   * prompt in arm B. Conventions (the house style) + the known-symbol list
+   * (so the model can reference real names instead of guessing).
+   * @param {string} cwd
+   * @param {object} [opts]
+   * @param {number} [opts.maxSymbols=80]
+   * @returns {string}
+   */
+  function buildGrounding(cwd, opts = {}) {
+    const maxSymbols = opts.maxSymbols != null ? opts.maxSymbols : 80;
+    const parts = [];
+    try {
+      const { extractConventions } = __require('./src/conventions/extract');
+      const { renderConventionsBlock } = __require('./src/conventions/inject');
+      const { loadConfig } = __require('./src/config/loader');
+      let files = [];
+      try {
+        const cfg = loadConfig(cwd);
+        const { buildSigIndex } = __require('./src/retrieval/ranker');
+        files = [...buildSigIndex(cwd).keys()];
+        void cfg;
+      } catch (_) {}
+      const conv = extractConventions(cwd, files);
+      parts.push(renderConventionsBlock(conv));
+    } catch (_) {}
+    try {
+      const { buildSymbolSet } = __require('./src/verify/hallucination-guard');
+      const { set } = buildSymbolSet(cwd);
+      const names = [...set].slice(0, maxSymbols);
+      if (names.length) parts.push(`## Known symbols (reference these exactly)\n${names.join(', ')}`);
+    } catch (_) {}
+    return parts.join('\n\n');
+  }
+  /**
+   * Count flagged codebase-fact errors in an answer (the §9 metric).
+   * @param {string} answerText
+   * @param {string} cwd
+   * @returns {number}
+   */
+  function scoreAnswer(answerText, cwd) {
+    try {
+      const { summary } = verify(String(answerText || ''), cwd);
+      return summary.total || 0;
+    } catch (_) {
+      return 0;
+    }
+  }
+  /**
+   * Run the A/B ablation over a task corpus.
+   * @param {Array<{id:string, prompt:string}>} tasks
+   * @param {string} cwd
+   * @param {(prompt:string, meta:object)=>string} complete injected model call
+   * @param {object} [opts]
+   * @param {string} [opts.grounding] precomputed grounding (else built from cwd)
+   * @returns {{ tasks: object[], aggregate: object }}
+   */
+  function runAblation(tasks, cwd, complete, opts = {}) {
+    const grounding = opts.grounding != null ? opts.grounding : buildGrounding(cwd);
+    const rows = [];
+    let sumA = 0;
+    let sumB = 0;
+    for (const task of tasks || []) {
+      const basePrompt = task.prompt || '';
+      const groundedPrompt = grounding ? `${grounding}\n\n---\n\n${basePrompt}` : basePrompt;
+      const outA = String(complete(basePrompt, { id: task.id, grounded: false }) || '');
+      const outB = String(complete(groundedPrompt, { id: task.id, grounded: true }) || '');
+      const aFlagged = scoreAnswer(outA, cwd);
+      const bFlagged = scoreAnswer(outB, cwd);
+      sumA += aFlagged;
+      sumB += bFlagged;
+      rows.push({ id: task.id, aFlagged, bFlagged });
+    }
+    const n = rows.length;
+    const per100 = (sum) => (n > 0 ? (sum / n) * 100 : 0);
+    return {
+      tasks: rows,
+      aggregate: {
+        n,
+        withoutFlagged: sumA,
+        withFlagged: sumB,
+        delta: sumA - sumB,
+        withoutPer100: per100(sumA),
+        withPer100: per100(sumB),
+      },
+    };
+  }
+  module.exports = { buildGrounding, scoreAnswer, runAblation };
+};
+__factories["./src/conventions/ci"] = function(module, exports) {
+  /**
+   * Convention CI gate (IMPL.md §4 — `conventions --ci`).
+   *
+   * Fails CI when a repo's overall convention consistency is below a threshold,
+   * and optionally when it regresses vs the last recorded run. Builds on the
+   * `--report` score. Pure, zero-dependency, bundle-safe.
+   */
+  const { overallScore } = __require('./src/conventions/report');
+  const DEFAULT_MIN = 0.7;
+  const EPS = 1e-9;
+  /**
+   * Evaluate the consistency gate.
+   * @param {object} result an `extractConventions` result
+   * @param {object} [opts]
+   * @param {number} [opts.min=0.7] minimum overall consistency (0–1)
+   * @param {boolean} [opts.noRegress=false] also fail if the score dropped vs prior
+   * @param {object|null} [prior] the previous snapshot (from `report.snapshot`)
+   * @returns {{ score:number, min:number, ok:boolean, regressed:boolean, reasons:string[] }}
+   */
+  function ciGate(result, opts = {}, prior = null) {
+    const min = opts.min != null ? opts.min : DEFAULT_MIN;
+    const score = overallScore(result);
+    const reasons = [];
+    let ok = true;
+    if (score < min) {
+      ok = false;
+      reasons.push(`consistency ${(score * 100).toFixed(0)}% below min ${(min * 100).toFixed(0)}%`);
+    }
+    let regressed = false;
+    if (opts.noRegress && prior && typeof prior.score === 'number') {
+      if (score < prior.score - EPS) {
+        regressed = true;
+        ok = false;
+        reasons.push(`consistency dropped ${(prior.score * 100).toFixed(0)}% → ${(score * 100).toFixed(0)}%`);
+      }
+    }
+    return { score, min, ok, regressed, reasons };
+  }
+  module.exports = { ciGate, DEFAULT_MIN };
+};
 __factories["./src/conventions/report"] = function(module, exports) {
   /**
@@ -7538,7 +7707,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
   const SERVER_INFO = {
     name: 'sigmap',
-    version: '7.14.0',
+    version: '7.16.0',
     description: 'SigMap MCP server — code signatures on demand',
   };
@@ -13216,7 +13385,7 @@ function __tryGit(args, opts = {}) {
   catch (_) { return ''; }
 }
-const VERSION = '7.14.0';
+const VERSION = '7.16.0';
 const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
 function requireSourceOrBundled(key) {
@@ -16411,6 +16580,34 @@ function main() {
       process.exit(0);
     }
+    // `--ci`: gate — fail when overall consistency is below a threshold (or regresses).
+    if (args.includes('--ci')) {
+      const { ciGate } = requireSourceOrBundled('./src/conventions/ci');
+      const minIdx = args.indexOf('--min');
+      const min = minIdx !== -1 && args[minIdx + 1] ? parseFloat(args[minIdx + 1]) : undefined;
+      const noRegress = args.includes('--no-regress');
+      let prior = null;
+      if (noRegress) {
+        try {
+          const lines = fs.readFileSync(path.join(cwd, '.context', 'conventions-history.ndjson'), 'utf8').split('\n').filter(Boolean);
+          if (lines.length) prior = JSON.parse(lines[lines.length - 1]);
+        } catch (_) {}
+      }
+      const gate = ciGate(result, { min, noRegress }, prior);
+      if (jsonOut) {
+        process.stdout.write(JSON.stringify(gate) + '\n');
+        process.exit(gate.ok ? 0 : 1);
+      }
+      const pctC = (n) => `${(n * 100).toFixed(0)}%`;
+      if (gate.ok) {
+        console.log(`[sigmap] conventions --ci  ✓ PASS — consistency ${pctC(gate.score)} (min ${pctC(gate.min)})`);
+        process.exit(0);
+      }
+      console.log(`[sigmap] conventions --ci  ✗ FAIL — consistency ${pctC(gate.score)} (min ${pctC(gate.min)})`);
+      for (const r of gate.reasons) console.log(`  • ${r}`);
+      process.exit(1);
+    }
     // `--report`: consistency audit + score + trend vs the last run.
     if (args.includes('--report')) {
       const { scoreReport, snapshot } = requireSourceOrBundled('./src/conventions/report');

package/llms-full.txt CHANGED Viewed

@@ -9,7 +9,7 @@ the files relevant to the task — cutting tokens ~97% while keeping answers
 grounded. Deterministic, offline, no embeddings or vector database. Works with
 Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
-# Version: 7.14.0 | Benchmark: sigmap-v7.0-main (2026-06-14)
+# Version: 7.16.0 | Benchmark: sigmap-v7.0-main (2026-06-14)
 # Source: auto-generated from package.json, version.json, src/mcp/tools.js, src/config/defaults.js
 # Regenerate: npm run generate:llms   |   Validate: npm run validate:llms

package/llms.txt CHANGED Viewed

@@ -9,7 +9,7 @@ the files relevant to the task — cutting tokens ~97% while keeping answers
 grounded. Deterministic, offline, no embeddings or vector database. Works with
 Claude, Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
-# Version: 7.14.0 | Benchmark: sigmap-v7.0-main (2026-06-14)
+# Version: 7.16.0 | Benchmark: sigmap-v7.0-main (2026-06-14)
 # Source: auto-generated from package.json, version.json, src/mcp/tools.js, src/config/defaults.js
 # Regenerate: npm run generate:llms   |   Validate: npm run validate:llms

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap",
-  "version": "7.14.0",
+  "version": "7.16.0",
   "description": "97% token reduction for AI coding. Extracts function & class signatures with TF-IDF ranking to feed only the right files to Claude, Cursor, Copilot, Aider, Windsurf, local LLMs & MCP. Zero dependencies, runs offline via npx.",
   "main": "packages/core/index.js",
   "exports": {
@@ -39,7 +39,8 @@
     "generate:llms": "node scripts/generate-llms.mjs",
     "validate:llms": "node scripts/validate-llms.mjs",
     "prepublishOnly": "node scripts/check-bundle.mjs && node scripts/check-version-meta.mjs && node scripts/generate-llms.mjs",
-    "benchmark:grounding": "node scripts/run-hallucination-benchmark.mjs"
+    "benchmark:grounding": "node scripts/run-hallucination-benchmark.mjs",
+    "benchmark:llm-ablation": "node scripts/run-llm-ablation.mjs"
   },
   "files": [
     "gen-context.js",

package/packages/cli/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap-cli",
-  "version": "7.14.0",
+  "version": "7.16.0",
   "description": "SigMap CLI wrapper — thin adapter for programmatic CLI invocation",
   "main": "index.js",
   "keywords": [

package/packages/core/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap-core",
-  "version": "7.14.0",
+  "version": "7.16.0",
   "description": "SigMap core library — zero-dependency code signature extraction, retrieval, and security scanning",
   "main": "index.js",
   "keywords": [

package/src/conventions/ci.js ADDED Viewed

@@ -0,0 +1,48 @@
+'use strict';
+/**
+ * Convention CI gate (IMPL.md §4 — `conventions --ci`).
+ *
+ * Fails CI when a repo's overall convention consistency is below a threshold,
+ * and optionally when it regresses vs the last recorded run. Builds on the
+ * `--report` score. Pure, zero-dependency, bundle-safe.
+ */
+const { overallScore } = require('./report');
+const DEFAULT_MIN = 0.7;
+const EPS = 1e-9;
+/**
+ * Evaluate the consistency gate.
+ * @param {object} result an `extractConventions` result
+ * @param {object} [opts]
+ * @param {number} [opts.min=0.7] minimum overall consistency (0–1)
+ * @param {boolean} [opts.noRegress=false] also fail if the score dropped vs prior
+ * @param {object|null} [prior] the previous snapshot (from `report.snapshot`)
+ * @returns {{ score:number, min:number, ok:boolean, regressed:boolean, reasons:string[] }}
+ */
+function ciGate(result, opts = {}, prior = null) {
+  const min = opts.min != null ? opts.min : DEFAULT_MIN;
+  const score = overallScore(result);
+  const reasons = [];
+  let ok = true;
+  if (score < min) {
+    ok = false;
+    reasons.push(`consistency ${(score * 100).toFixed(0)}% below min ${(min * 100).toFixed(0)}%`);
+  }
+  let regressed = false;
+  if (opts.noRegress && prior && typeof prior.score === 'number') {
+    if (score < prior.score - EPS) {
+      regressed = true;
+      ok = false;
+      reasons.push(`consistency dropped ${(prior.score * 100).toFixed(0)}% → ${(score * 100).toFixed(0)}%`);
+    }
+  }
+  return { score, min, ok, regressed, reasons };
+}
+module.exports = { ciGate, DEFAULT_MIN };

package/src/eval/llm-ablation.js ADDED Viewed

@@ -0,0 +1,113 @@
+'use strict';
+/**
+ * LLM A/B hallucination ablation (IMPL.md §9) — the honest measurement.
+ *
+ * Runs a model twice per task — (A) no SigMap context, (B) with SigMap
+ * grounding — pipes both outputs through the hallucination guard, and reports
+ * the measured delta in flagged codebase-fact errors. The model call is
+ * INJECTED (`complete(prompt) → text`), so the harness itself is pure and
+ * offline-testable; the live model adapter lives in `scripts/run-llm-ablation.mjs`.
+ * Zero-dependency, bundle-safe (no network here).
+ */
+const { verify } = require('../verify/hallucination-guard');
+/**
+ * Build the SigMap grounding block for a repo — what we prepend to a task
+ * prompt in arm B. Conventions (the house style) + the known-symbol list
+ * (so the model can reference real names instead of guessing).
+ * @param {string} cwd
+ * @param {object} [opts]
+ * @param {number} [opts.maxSymbols=80]
+ * @returns {string}
+ */
+function buildGrounding(cwd, opts = {}) {
+  const maxSymbols = opts.maxSymbols != null ? opts.maxSymbols : 80;
+  const parts = [];
+  try {
+    const { extractConventions } = require('../conventions/extract');
+    const { renderConventionsBlock } = require('../conventions/inject');
+    const { loadConfig } = require('../config/loader');
+    let files = [];
+    try {
+      const cfg = loadConfig(cwd);
+      const { buildSigIndex } = require('../retrieval/ranker');
+      files = [...buildSigIndex(cwd).keys()];
+      void cfg;
+    } catch (_) {}
+    const conv = extractConventions(cwd, files);
+    parts.push(renderConventionsBlock(conv));
+  } catch (_) {}
+  try {
+    const { buildSymbolSet } = require('../verify/hallucination-guard');
+    const { set } = buildSymbolSet(cwd);
+    const names = [...set].slice(0, maxSymbols);
+    if (names.length) parts.push(`## Known symbols (reference these exactly)\n${names.join(', ')}`);
+  } catch (_) {}
+  return parts.join('\n\n');
+}
+/**
+ * Count flagged codebase-fact errors in an answer (the §9 metric).
+ * @param {string} answerText
+ * @param {string} cwd
+ * @returns {number}
+ */
+function scoreAnswer(answerText, cwd) {
+  try {
+    const { summary } = verify(String(answerText || ''), cwd);
+    return summary.total || 0;
+  } catch (_) {
+    return 0;
+  }
+}
+/**
+ * Run the A/B ablation over a task corpus.
+ * @param {Array<{id:string, prompt:string}>} tasks
+ * @param {string} cwd
+ * @param {(prompt:string, meta:object)=>string} complete injected model call
+ * @param {object} [opts]
+ * @param {string} [opts.grounding] precomputed grounding (else built from cwd)
+ * @returns {{ tasks: object[], aggregate: object }}
+ */
+function runAblation(tasks, cwd, complete, opts = {}) {
+  const grounding = opts.grounding != null ? opts.grounding : buildGrounding(cwd);
+  const rows = [];
+  let sumA = 0;
+  let sumB = 0;
+  for (const task of tasks || []) {
+    const basePrompt = task.prompt || '';
+    const groundedPrompt = grounding ? `${grounding}\n\n---\n\n${basePrompt}` : basePrompt;
+    const outA = String(complete(basePrompt, { id: task.id, grounded: false }) || '');
+    const outB = String(complete(groundedPrompt, { id: task.id, grounded: true }) || '');
+    const aFlagged = scoreAnswer(outA, cwd);
+    const bFlagged = scoreAnswer(outB, cwd);
+    sumA += aFlagged;
+    sumB += bFlagged;
+    rows.push({ id: task.id, aFlagged, bFlagged });
+  }
+  const n = rows.length;
+  const per100 = (sum) => (n > 0 ? (sum / n) * 100 : 0);
+  return {
+    tasks: rows,
+    aggregate: {
+      n,
+      withoutFlagged: sumA,
+      withFlagged: sumB,
+      delta: sumA - sumB,
+      withoutPer100: per100(sumA),
+      withPer100: per100(sumB),
+    },
+  };
+}
+module.exports = { buildGrounding, scoreAnswer, runAblation };

package/src/mcp/server.js CHANGED Viewed

@@ -18,7 +18,7 @@ const { readContext, searchSignatures, getMap, createCheckpoint, getRouting, exp
 const SERVER_INFO = {
   name: 'sigmap',
-  version: '7.14.0',
+  version: '7.16.0',
   description: 'SigMap MCP server — code signatures on demand',
 };