npm - sigmap - Versions diffs - 4.0.2 → 4.1.1 - Mend

sigmap 4.0.2 → 4.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/AGENTS.md +2 -14
package/CHANGELOG.md +88 -0
package/README.md +26 -8
package/gen-context.config.json.example +15 -0
package/gen-context.js +161 -25
package/package.json +2 -2
package/packages/cli/package.json +1 -1
package/packages/core/package.json +1 -1
package/src/config/defaults.js +22 -1
package/src/mcp/server.js +1 -1
package/src/retrieval/ranker.js +65 -10

package/AGENTS.md CHANGED Viewed

@@ -12,29 +12,17 @@ Use this marker block for all appendable context files:
 ## Auto-generated signatures
 <!-- Updated by gen-context.js -->
 You are a coding assistant with full knowledge of this codebase.
-Below are the code signatures extracted by SigMap v4.0.2 on 2026-04-15T07:28:24.633Z.
+Below are the code signatures extracted by SigMap v4.1.0 on 2026-04-15T08:05:43.080Z.
 Use these signatures to answer questions about the code accurately.
 ## Code Signatures
-<!-- Generated by SigMap gen-context.js v4.0.2 -->
+<!-- Generated by SigMap gen-context.js v4.1.0 -->
 <!-- DO NOT EDIT below the marker line — run gen-context.js to regenerate -->
 # Code signatures
-## changes (last 5 commits — 77 minutes ago)
-```
-src/analysis/coverage-score.js                +coverageScore  +_walk
-src/eval/analyzer.js                          ~analyzeFiles
-packages/adapters/claude.js                   +_confidenceMeta  ~format
-packages/adapters/copilot.js                  +_confidenceMeta  ~format
-packages/adapters/cursor.js                   +_confidenceMeta  ~format
-packages/adapters/gemini.js                   +_confidenceMeta  ~format  ~write
-packages/adapters/openai.js                   +_confidenceMeta  ~format  ~outputPath
-packages/adapters/windsurf.js                 +_confidenceMeta  ~format
-```
 ## packages
 ### packages/adapters/claude.js

package/CHANGELOG.md CHANGED Viewed

@@ -10,6 +10,94 @@ Format: [Semantic Versioning](https://semver.org/)
 ---
+## [4.1.1] — 2026-04-16 — Fix: --query works with any adapter output
+### Fixed
+- **`--query` fails after `--adapter` generation** (`[sigmap] no context file found`):
+  `buildSigIndex` hardcoded `.github/copilot-instructions.md` as the only
+  context file path, so `--query` always failed when any adapter other than
+  `copilot` wrote to a different location (`CLAUDE.md`, `AGENTS.md`,
+  `.cursorrules`, `.windsurfrules`, etc.).
+  `buildSigIndex` now probes all nine known adapter output paths in priority
+  order and returns the first non-empty index:
+  ```
+  copilot → claude → codex → cursor → windsurf → openai → gemini → llm-full → llm
+  ```
+  Human-written preamble before the `## Auto-generated signatures` marker
+  (e.g. custom content in `CLAUDE.md`) is skipped so those `###` sections
+  don't pollute the signature index.
+- **`--adapter <name> --query "..."` combination ignored the adapter flag**:
+  The `--query` handler now detects a co-present `--adapter` flag, resolves
+  that adapter's output path, and reads from it directly — so both forms work:
+  ```bash
+  # generate with claude adapter, then query without re-specifying adapter
+  node gen-context.js --adapter claude
+  node gen-context.js --query "add a new extractor"
+  # or pin explicitly in one command
+  node gen-context.js --adapter claude --query "add a new extractor"
+  ```
+- **`--analyze --json` output truncated at ~8 KB on macOS**:
+  Calling `process.exit(0)` immediately after `process.stdout.write(largeJson)`
+  truncated output because the underlying pipe write is asynchronous even
+  when `write()` returns `true`. Fixed by using the write callback so the
+  process exits only after the OS has accepted all bytes.
+### Tests
+- Added `test/integration/query-adapter.test.js` (17 tests) covering every
+  adapter output path (unit + CLI), probe order, marker-skipping, explicit
+  `opts.contextPath` override, and empty-project fallback.
+---
+## [4.1.0] — 2026-04-15 — Smart Budget: auto-scaling token budget
+### Added
+- **Auto-scaling token budget** (`autoMaxTokens: true`, default on):
+  Replaces the old fixed 6 000-token default with a formula that sizes the budget to your repo:
+  ```
+  effective = clamp(ceil(totalSigTokens × coverageTarget), 4000, floor(modelContextLimit × maxTokensHeadroom))
+  ```
+  - `coverageTarget` (default `0.80`) — target fraction of source files to include
+  - `modelContextLimit` (default `128000`) — model context window size; hard cap = `limit × headroom`
+  - `maxTokensHeadroom` (default `0.20`) — fraction of the model window reserved for SigMap output (default hard cap: **25 600 tokens**)
+  - Minimum floor: **4 000 tokens** (prevents tiny repos from being under-budgeted)
+  - When the hard cap prevents hitting the coverage target by more than 10 percentage points, SigMap warns and suggests `strategy: "per-module"`
+- **Four new config keys** (all optional, documented in `gen-context.config.json.example`):
+  | Key | Default | Description |
+  |---|---|---|
+  | `autoMaxTokens` | `true` | Enable auto-scaling |
+  | `coverageTarget` | `0.80` | Target fraction of source files |
+  | `modelContextLimit` | `128000` | Model context window (tokens) |
+  | `maxTokensHeadroom` | `0.20` | Fraction of context for SigMap |
+- **Post-run summary annotation**: coverage line now shows `[budget: N auto-scaled]` when the formula overrode the configured `maxTokens`.
+- **Per-module strategy budget fix**: each module now gets its own full effective budget instead of a proportional slice, which was the limiting factor that made `per-module` less useful than advertised.
+- **Tracking log fields**: `autoBudget: true/false` and `budgetLimit: N` added to `.context/usage.ndjson` entries.
+- **12 new integration tests** (`test/integration/auto-budget.test.js`): cover MIN floor, proportional scaling, hard cap, disabled auto-scaling, custom `coverageTarget`/`modelContextLimit`/`maxTokensHeadroom`, warning emission, and empty-project edge case.
+### Changed
+- `autoMaxTokens: false` + explicit `maxTokens` preserves the old fixed-budget behaviour exactly — fully backwards compatible.
+- `printReport` now labels the budget `(auto-scaled)` vs `(fixed)` in the report line.
+### Benchmarks (v4.1.0)
+- Token reduction: **97.6% average** across 18 repos ✅
+- Retrieval hit@5: **84.4%** ✅
+- With auto-scaling enabled, all 18 benchmark repos now stay within a sensible budget that targets ≥ 80% file coverage rather than the old 6 K ceiling.
+---
 ## [4.0.2] — 2026-04-15 — Bundle factory fix (re-release of 4.0.1)
 ### Fixed

package/README.md CHANGED Viewed

@@ -5,7 +5,7 @@
 <h1>⚡ SigMap</h1>
 <p><strong>WITHOUT SIGMAP, YOUR AI IS GUESSING.</strong><br>
-<strong>It sees 8% of your codebase and invents the rest.</strong></p>
+<strong>Without structured context, AI often reads the wrong file and fills the gaps with guesses.</strong></p>
 <p><sub>Run one command. Force every answer to come from real code.</sub></p>
@@ -19,7 +19,13 @@
 npx sigmap   # 10 seconds. zero config. your AI never reads the wrong file again.
 ```
-> Latest: **v4.0.0** — Intelligence Layer. Coverage score, confidence indicators in every output file, `--report` module heatmap, `--diff` risk scoring, and extractor quality-based drop order.
+**What you get in ~10 seconds**
+- A compact signature map of your codebase
+- The right file in context far more often (84.4% hit@5 vs 13.6% random)
+- Fewer retries (1.59 vs 2.84 prompts per task)
+- Far smaller context (~2K–4K tokens instead of ~80K)
+> Latest: **v4.1.0** — Smart Budget. Token budget now auto-scales to your repo size, targeting 80% source-file coverage by default. No config change needed — it just works.
 <div align="center">
 <img src="demo.gif" alt="SigMap demo — reducing 80K tokens to 4K in under 10 seconds" width="760" />
@@ -56,11 +62,13 @@ npx sigmap   # 10 seconds. zero config. your AI never reads the wrong file again
 | | Without SigMap | With SigMap |
 |---|:---:|:---:|
 | Task success | 10% | **59%** |
-| Prompts per task | 2.84 | **1.78** |
+| Prompts per task | 2.84 | **1.59** |
 | Tokens per session | ~80,000 | **~2,000** |
 | Right file found | 13.6% | **84.4%** |
 | Hallucination risk | 92% | **0%** |
+Measured on 90 coding tasks across 18 real public repos. Full methodology and raw benchmark pages are linked below.
 </details>
 ---
@@ -689,7 +697,6 @@ Copy `gen-context.config.json.example` to `gen-context.config.json`:
 {
   "output": ".github/copilot-instructions.md",
   "srcDirs": ["src", "app", "lib"],
-  "maxTokens": 6000,
   "outputs": ["copilot"],
   "secretScan": true,
   "strategy": "full",
@@ -703,10 +710,21 @@ Copy `gen-context.config.json.example` to `gen-context.config.json`:
 - **`output`** — custom path for the primary markdown output file (used by `copilot` adapter). Default: `.github/copilot-instructions.md`
 - **`outputs`** — which adapters to write to: `copilot` | `claude` | `cursor` | `windsurf`
 - **`srcDirs`** — directories to scan (relative to project root)
-- **`maxTokens`** — max tokens in final output before budget enforcement
 - **`secretScan`** — redact secrets (AWS keys, tokens, etc.) from output
 - **`strategy`** — output mode: `full` (default) | `per-module` | `hot-cold`
+**Token budget (v4.1.0 — auto-scaling):**
+| Key | Default | Description |
+|---|---|---|
+| `autoMaxTokens` | `true` | Auto-scale budget to repo size. Set `false` to pin a fixed `maxTokens`. |
+| `coverageTarget` | `0.80` | Fraction of source files to target (0.0–1.0). |
+| `modelContextLimit` | `128000` | Model context window size. Hard cap = `limit × maxTokensHeadroom`. |
+| `maxTokensHeadroom` | `0.20` | Fraction of the context window reserved for SigMap output (default: 25 600 tokens). |
+| `maxTokens` | `6000` | Used only when `autoMaxTokens: false`, or as a floor. |
+The formula: `effective = clamp(ceil(totalSigTokens × coverageTarget), 4000, floor(modelContextLimit × maxTokensHeadroom))`.
 Exclusions go in `.contextignore` (gitignore syntax). Also reads `.repomixignore` if present.
 ```
@@ -752,11 +770,11 @@ Every run now prints a coverage line alongside token reduction:
 ```
 ───────────────────────────────────────────
- SigMap v4.0.0
+ SigMap v4.1.0
  Files scanned  : 76
  Symbols found  : 332
  Token reduction: 94%  (65,227 → 4,103)
- Coverage       : A (97%)  — 76 of 78 source files included
+ Coverage       : A (97%)  — 76 of 78 source files included  [budget: 4000 auto-scaled]
  Output         : .github/copilot-instructions.md
 ───────────────────────────────────────────
 ```
@@ -771,7 +789,7 @@ sigmap --report
 ```
 [sigmap] report:
-  version         : 4.0.0
+  version         : 4.1.0
   files processed : 76
   reduction       : 93.7%
   coverage        : A (97%)  — 76 of 78 source files included

package/gen-context.config.json.example CHANGED Viewed

@@ -20,8 +20,23 @@
   "maxSigsPerFile": 25,
+  "_maxTokens_comment": "Used only when autoMaxTokens is false. Override to pin a fixed budget.",
   "maxTokens": 6000,
+  "_autoMaxTokens_comment": "Auto-scale budget based on repo size. Default: true.",
+  "_autoMaxTokens_formula": "effective = clamp(totalSigTokens × coverageTarget, 4000, modelContextLimit × maxTokensHeadroom)",
+  "autoMaxTokens": true,
+  "_coverageTarget_comment": "Fraction of source files to target for inclusion (0.0–1.0). Default: 0.80 = 80%.",
+  "coverageTarget": 0.80,
+  "_modelContextLimit_comment": "Model context window size (tokens). Hard cap = modelContextLimit × maxTokensHeadroom.",
+  "_modelContextLimit_examples": "128000 = GPT-4o/Claude (default)  |  200000 = Claude max  |  1000000 = Gemini 1M",
+  "modelContextLimit": 128000,
+  "_maxTokensHeadroom_comment": "Fraction of model context reserved for SigMap output. 0.20 = 25,600 token hard cap.",
+  "maxTokensHeadroom": 0.20,
   "secretScan": true,
   "monorepo": false,

package/gen-context.js CHANGED Viewed

@@ -59,9 +59,22 @@ __factories["./src/config/defaults"] = function(module, exports) {
     // Maximum signatures extracted per file
     maxSigsPerFile: 25,
-    // Maximum tokens in final output before budget enforcement kicks in
+    // Maximum tokens in final output before budget enforcement kicks in.
+    // Used only when autoMaxTokens is false, or as a floor for auto-scaling.
     maxTokens: 6000,
+    // Automatically scale the token budget based on repo size.
+    autoMaxTokens: true,
+    // Fraction of source files to target for inclusion (0.0–1.0).
+    coverageTarget: 0.80,
+    // Model context window size (tokens). Used to compute the hard cap.
+    modelContextLimit: 128000,
+    // Fraction of the model context window reserved for SigMap output.
+    maxTokensHeadroom: 0.20,
     // Scan signatures for secrets and redact matches
     secretScan: true,
@@ -4641,7 +4654,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
   const SERVER_INFO = {
     name: 'sigmap',
-    version: '4.0.2',
+    version: '4.1.0',
     description: 'SigMap MCP server — code signatures on demand',
   };
@@ -5436,12 +5449,24 @@ __factories["./src/retrieval/ranker"] = function(module, exports) {
     scored.sort((a, b) => b.score - a.score || a.file.localeCompare(b.file));
     return scored.slice(0, topK);
   }
-  function buildSigIndex(cwd) {
-    const fs = require('fs'); const path = require('path');
-    const contextPath = path.join(cwd, '.github', 'copilot-instructions.md');
+  const ADAPTER_OUTPUT_PATHS = [
+    ['.github', 'copilot-instructions.md'],
+    ['CLAUDE.md'],
+    ['AGENTS.md'],
+    ['.cursorrules'],
+    ['.windsurfrules'],
+    ['.github', 'openai-context.md'],
+    ['.github', 'gemini-context.md'],
+    ['llm-full.txt'],
+    ['llm.txt'],
+  ];
+  function _parseContextFile(contextPath) {
+    const fs = require('fs');
     const index = new Map();
     if (!fs.existsSync(contextPath)) return index;
-    const content = fs.readFileSync(contextPath, 'utf8');
+    let content = fs.readFileSync(contextPath, 'utf8');
+    const markerIdx = content.indexOf('## Auto-generated signatures');
+    if (markerIdx !== -1) content = content.slice(markerIdx);
     const lines = content.split('\n');
     let currentFile = null; let inBlock = false; let sigs = [];
     for (const line of lines) {
@@ -5453,6 +5478,16 @@ __factories["./src/retrieval/ranker"] = function(module, exports) {
     if (currentFile !== null) index.set(currentFile, sigs);
     return index;
   }
+  function buildSigIndex(cwd, opts) {
+    const path = require('path');
+    if (opts && opts.contextPath) return _parseContextFile(opts.contextPath);
+    for (const parts of ADAPTER_OUTPUT_PATHS) {
+      const contextPath = path.join(cwd, ...parts);
+      const index = _parseContextFile(contextPath);
+      if (index.size > 0) return index;
+    }
+    return new Map();
+  }
   function formatRankTable(results, query) {
     if (!results || results.length === 0) return `No matching files found for query: "${query}"\n`;
     const lines = [`## Query: ${query}`, '', '| Rank | File | Score | Sigs | Tokens |', '|------|------|-------|------|--------|',
@@ -6203,7 +6238,7 @@ const path = require('path');
 const os = require('os');
 const { execSync } = require('child_process');
-const VERSION = '4.0.2';
+const VERSION = '4.1.0';
 const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
 function requireSourceOrBundled(key) {
@@ -6423,6 +6458,61 @@ function isMockFile(filePath) {
     /mock\.(ts|js|tsx|jsx)$/.test(p);
 }
+/**
+ * Compute the effective token budget based on repo size and config.
+ *
+ * Formula:
+ *   totalSigTokens  = sum of estimated tokens for all extracted sig blocks
+ *   needed          = ceil(totalSigTokens * coverageTarget)   // tokens for target% coverage
+ *   hardCap         = floor(modelContextLimit * maxTokensHeadroom)
+ *   effective       = clamp(needed, 4000, hardCap)
+ *
+ * When autoMaxTokens is false the configured maxTokens is returned unchanged.
+ *
+ * @param {Array}  fileEntries - All file entries BEFORE budget enforcement
+ * @param {object} config
+ * @returns {number} effective token budget
+ */
+function computeEffectiveMaxTokens(fileEntries, config) {
+  if (config.autoMaxTokens === false) return config.maxTokens;
+  const coverageTarget    = (config.coverageTarget    != null) ? config.coverageTarget    : 0.80;
+  const modelContextLimit = (config.modelContextLimit != null) ? config.modelContextLimit : 128000;
+  const maxTokensHeadroom = (config.maxTokensHeadroom != null) ? config.maxTokensHeadroom : 0.20;
+  const totalSigTokens = fileEntries.reduce(
+    (s, e) => s + estimateTokens((e.sigs || []).join('\n')), 0
+  );
+  if (totalSigTokens === 0) return config.maxTokens;
+  const hardCap  = Math.floor(modelContextLimit * maxTokensHeadroom);
+  const needed   = Math.ceil(totalSigTokens * coverageTarget);
+  const MIN      = 4000;
+  const effective = Math.min(Math.max(MIN, needed), hardCap);
+  // Warn when repo is so large the hard cap prevents hitting the coverage target
+  if (needed > hardCap) {
+    const estimatedCovPct = Math.round((hardCap / totalSigTokens) * 100);
+    const targetPct       = Math.round(coverageTarget * 100);
+    if (estimatedCovPct < targetPct - 10) {
+      console.warn(
+        `[sigmap] auto-budget: ${fileEntries.length} files need ~${Math.round(needed / 1000)}K tokens ` +
+        `for ${targetPct}% coverage`
+      );
+      console.warn(
+        `[sigmap] auto-budget: capped at ${hardCap} ` +
+        `(${Math.round(maxTokensHeadroom * 100)}% of ${Math.round(modelContextLimit / 1000)}K model limit) ` +
+        `→ est. ${estimatedCovPct}% coverage`
+      );
+      console.warn(
+        `[sigmap] auto-budget: tip — set strategy:"per-module" for full coverage on large repos`
+      );
+    }
+  }
+  return effective;
+}
 function applyTokenBudget(fileEntries, maxTokens) {
   // fileEntries: [{ filePath, sigs, mtime }]
   // Reserve ~10% for formatting overhead (section headers, code fences, top-level header)
@@ -6889,7 +6979,7 @@ function _coverageBar(pct, width) {
   return '\u2588'.repeat(filled) + '\u2591'.repeat(width - filled);
 }
-function printReport(inputTokens, finalTokens, fileCount, droppedCount, asJson, budgetLimit, coverageResult) {
+function printReport(inputTokens, finalTokens, fileCount, droppedCount, asJson, budgetLimit, coverageResult, isAutoBudget) {
   const reduction = inputTokens > 0 ? (100 - (finalTokens / inputTokens) * 100).toFixed(1) : 0;
   const overBudget = finalTokens > (budgetLimit || 6000);
   if (asJson) {
@@ -6904,6 +6994,7 @@ function printReport(inputTokens, finalTokens, fileCount, droppedCount, asJson,
       reductionPct: parseFloat(reduction),
       overBudget,
       budgetLimit: budgetLimit || 6000,
+      autoBudget: !!isAutoBudget,
     };
     if (coverageResult) {
       payload.coverage = {
@@ -6923,13 +7014,16 @@ function printReport(inputTokens, finalTokens, fileCount, droppedCount, asJson,
     // Exit 1 in CI if over budget — lets pipelines fail fast
     if (overBudget) process.exitCode = 1;
   } else {
+    const budgetLabel = isAutoBudget
+      ? `${budgetLimit || 6000} (auto-scaled)`
+      : `${budgetLimit || 6000} (fixed)`;
     console.log(`[sigmap] report:`);
     console.log(`  version         : ${VERSION}`);
     console.log(`  files processed : ${fileCount}`);
     console.log(`  files dropped   : ${droppedCount}`);
     console.log(`  input tokens    : ~${inputTokens}`);
     console.log(`  output tokens   : ~${finalTokens}`);
-    console.log(`  budget limit    : ${budgetLimit || 6000}`);
+    console.log(`  budget limit    : ${budgetLabel}`);
     console.log(`  reduction       : ${reduction}%`);
     if (coverageResult) {
       console.log(`  coverage        : ${coverageResult.grade} (${coverageResult.score}%)  — ${coverageResult.included} of ${coverageResult.total} source files included`);
@@ -7088,8 +7182,9 @@ function runPerModuleStrategy(cwd, config, fileEntries, inputTokenTotal) {
     const outPath = path.join(cwd, '.github', outName);
     const modEntries = modules[mod];
-    // Per-module budget: proportional share of maxTokens
-    const modBudget = Math.max(1000, Math.floor(config.maxTokens / moduleNames.length));
+    // Per-module budget: each module gets its own full effective budget
+    // (per-module strategy is the recommended path for large repos — no sharing needed)
+    const modBudget = Math.max(1000, config.maxTokens);
     const budgeted = applyTokenBudget(modEntries, modBudget);
     const content = formatOutput(budgeted, cwd, false, config, null);
@@ -7358,15 +7453,22 @@ function runGenerate(cwd, config, reportMode, reportJson = false) {
     });
   }
+  // v4.1: compute effective budget once; used by all strategies
+  const effectiveMaxTokens = computeEffectiveMaxTokens(fileEntries, config);
+  // Propagate to config so per-module / hot-cold strategies pick it up
+  const configWithBudget = effectiveMaxTokens !== config.maxTokens
+    ? Object.assign({}, config, { maxTokens: effectiveMaxTokens, _autoMaxTokens: effectiveMaxTokens })
+    : config;
   let result;
   if (!reportMode) {
     if (strategy === 'per-module') {
-      result = runPerModuleStrategy(cwd, config, fileEntries, inputTokenTotal);
+      result = runPerModuleStrategy(cwd, configWithBudget, fileEntries, inputTokenTotal);
     } else if (strategy === 'hot-cold') {
-      result = runHotColdStrategy(cwd, config, fileEntries, recentFiles, inputTokenTotal);
+      result = runHotColdStrategy(cwd, configWithBudget, fileEntries, recentFiles, inputTokenTotal);
     } else {
       // 'full' — original behaviour
-      fileEntries = applyTokenBudget(fileEntries, config.maxTokens);
+      fileEntries = applyTokenBudget(fileEntries, effectiveMaxTokens);
       const droppedCount = beforeCount - fileEntries.length;
       const routingEnabled = !!(config.routing || process.argv.includes('--routing'));
       const content = formatOutput(fileEntries, cwd, routingEnabled, config, null);
@@ -7409,21 +7511,21 @@ function runGenerate(cwd, config, reportMode, reportJson = false) {
     }
   } else {
     // report mode: always run full pipeline for accurate stats
-    const budgeted = applyTokenBudget([...fileEntries], config.maxTokens);
+    const budgeted = applyTokenBudget([...fileEntries], effectiveMaxTokens);
     const droppedCount = beforeCount - budgeted.length;
-    const content = formatOutput(budgeted, cwd, false, config, null);
+    const content = formatOutput(budgeted, cwd, false, configWithBudget, null);
     const finalTokens = estimateTokens(content);
     // v4.0: compute coverage score for --report heatmap
     let coverageResult = null;
     try {
       const { coverageScore } = requireSourceOrBundled('./src/analysis/coverage-score');
-      coverageResult = coverageScore(cwd, budgeted, config);
+      coverageResult = coverageScore(cwd, budgeted, configWithBudget);
     } catch (_) {}
     result = { inputTokenTotal, finalTokens, fileCount: beforeCount, droppedCount, coverageResult };
   }
   if (reportMode || process.argv.includes('--report')) {
-    printReport(result.inputTokenTotal, result.finalTokens, result.fileCount, result.droppedCount, reportJson, config.maxTokens, result.coverageResult);
+    printReport(result.inputTokenTotal, result.finalTokens, result.fileCount, result.droppedCount, reportJson, effectiveMaxTokens, result.coverageResult, config.autoMaxTokens !== false && effectiveMaxTokens !== config.maxTokens);
   }
   // Usage tracking (v0.9) — optional append-only NDJSON log
@@ -7437,8 +7539,9 @@ function runGenerate(cwd, config, reportMode, reportJson = false) {
         droppedCount: result.droppedCount,
         rawTokens: result.inputTokenTotal,
         finalTokens: result.finalTokens,
-        overBudget: result.finalTokens > config.maxTokens,
-        budgetLimit: config.maxTokens,
+        overBudget: result.finalTokens > effectiveMaxTokens,
+        budgetLimit: effectiveMaxTokens,
+        autoBudget: config.autoMaxTokens !== false && effectiveMaxTokens !== config.maxTokens,
       }, cwd);
     } catch (err) {
       console.warn(`[sigmap] tracking: ${err.message}`);
@@ -7459,8 +7562,15 @@ function runGenerate(cwd, config, reportMode, reportJson = false) {
     let coverageLine = '';
     try {
       const { coverageScore } = requireSourceOrBundled('./src/analysis/coverage-score');
-      const cov = coverageScore(cwd, fileEntries, config);
-      coverageLine = ` Coverage       : ${cov.grade} (${cov.score}%)  \u2014 ${cov.included} of ${cov.total} source files included`;
+      const cov = coverageScore(cwd, fileEntries, configWithBudget);
+      const autoBudgetNote = (config.autoMaxTokens !== false && effectiveMaxTokens !== config.maxTokens)
+        ? `  [budget: ${effectiveMaxTokens} auto-scaled]`
+        : '';
+      coverageLine = ` Coverage       : ${cov.grade} (${cov.score}%)  \u2014 ${cov.included} of ${cov.total} source files included${autoBudgetNote}`;
+      // Extra warning line when coverage is still poor despite auto-scaling
+      if (cov.score < 40 && config.strategy !== 'per-module' && config.strategy !== 'hot-cold') {
+        coverageLine += '\n [sigmap] tip: large repo — consider strategy:"per-module" for full coverage';
+      }
     } catch (_) {}
     const lines = [
       bar,
@@ -8132,7 +8242,13 @@ function main() {
       const stats = analyzeFiles(allFiles, cwd, { slow, maxSigs: cfg.maxSigsPerFile || 25 });
       if (args.includes('--json')) {
-        process.stdout.write(JSON.stringify(formatAnalysisJSON(stats)) + '\n');
+        const out = JSON.stringify(formatAnalysisJSON(stats)) + '\n';
+        // Use the write callback to exit only after the OS has accepted all
+        // bytes. Calling process.exit(0) synchronously after write() truncates
+        // large outputs because the underlying pipe write is asynchronous even
+        // when write() returns true.
+        process.stdout.write(out, 'utf8', () => process.exit(0));
+        return; // exit is handled by the callback above
       } else {
         const table = formatAnalysisTable(stats, slow);
         process.stdout.write(table);
@@ -8238,9 +8354,29 @@ function main() {
         process.exit(1);
       }
       const { rank, buildSigIndex, formatRankTable, formatRankJSON } = requireSourceOrBundled('./src/retrieval/ranker');
-      const index = buildSigIndex(cwd);
+      // Resolve an explicit context file path when --adapter is present.
+      // This lets `--adapter claude --query "..."` read CLAUDE.md instead of
+      // falling through to the default copilot-instructions.md probe.
+      let queryOpts;
+      const adpIdx = args.indexOf('--adapter');
+      if (adpIdx >= 0) {
+        const adapterName = (args[adpIdx + 1] || '').trim().toLowerCase();
+        const VALID_ADAPTERS = ['copilot', 'claude', 'cursor', 'windsurf', 'openai', 'gemini', 'codex'];
+        if (VALID_ADAPTERS.includes(adapterName)) {
+          try {
+            const adapterMod = __require('./packages/adapters/' + adapterName);
+            queryOpts = { contextPath: adapterMod.outputPath(cwd) };
+          } catch (_) {}
+        }
+      }
+      const index = buildSigIndex(cwd, queryOpts);
       if (index.size === 0) {
         console.error('[sigmap] no context file found. Run: node gen-context.js');
+        if (adpIdx >= 0) {
+          console.error('  (tried the path for --adapter ' + (args[adpIdx + 1] || '') + ')');
+        }
         process.exit(1);
       }
       const topIdx = args.indexOf('--top');

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap",
-  "version": "4.0.2",
+  "version": "4.1.1",
   "description": "Zero-dependency AI context engine — 97% token reduction. No npm install. Runs on Node 18+.",
   "main": "gen-context.js",
   "exports": {
@@ -15,7 +15,7 @@
   },
   "scripts": {
     "test": "node test/run.js",
-    "test:integration": "node test/integration/strategy.test.js && node test/integration/secret-scan.test.js && node test/integration/token-budget.test.js && node test/integration/mcp-server.test.js",
+    "test:integration": "node test/integration/strategy.test.js && node test/integration/secret-scan.test.js && node test/integration/token-budget.test.js && node test/integration/auto-budget.test.js && node test/integration/mcp-server.test.js",
     "test:integration:all": "node test/integration/all.js",
     "test:all": "node test/run.js && node test/integration/strategy.test.js && node test/integration/secret-scan.test.js",
     "generate": "node gen-context.js",

package/packages/cli/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap-cli",
-  "version": "4.0.2",
+  "version": "4.1.0",
   "description": "SigMap CLI wrapper — thin adapter for programmatic CLI invocation",
   "main": "index.js",
   "keywords": [

package/packages/core/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sigmap-core",
-  "version": "4.0.2",
+  "version": "4.1.0",
   "description": "SigMap core library — zero-dependency code signature extraction, retrieval, and security scanning",
   "main": "index.js",
   "keywords": [

package/src/config/defaults.js CHANGED Viewed

@@ -47,9 +47,30 @@ const DEFAULTS = {
   // Maximum signatures extracted per file
   maxSigsPerFile: 25,
-  // Maximum tokens in final output before budget enforcement kicks in
+  // Maximum tokens in final output before budget enforcement kicks in.
+  // Used only when autoMaxTokens is false, or as a floor for auto-scaling.
   maxTokens: 6000,
+  // Automatically scale the token budget based on repo size.
+  // When true, SigMap targets `coverageTarget` fraction of source files and
+  // raises the budget up to `modelContextLimit * maxTokensHeadroom`.
+  // Set to false (or set maxTokens explicitly) to pin the budget.
+  autoMaxTokens: true,
+  // Fraction of source files to target for inclusion (0.0–1.0).
+  // 0.80 = include at least 80% of source files in the context output.
+  coverageTarget: 0.80,
+  // Model context window size (tokens). Used to compute the hard cap:
+  //   hardCap = modelContextLimit × maxTokensHeadroom
+  // Default: GPT-4o / Claude Sonnet (128K). Set higher for Gemini 1M etc.
+  modelContextLimit: 128000,
+  // Fraction of the model context window reserved for SigMap output.
+  // Leaves the remaining fraction for the conversation, system prompt, etc.
+  // Default 0.20 = 20% of 128K = 25,600 token hard cap.
+  maxTokensHeadroom: 0.20,
   // Scan signatures for secrets and redact matches
   secretScan: true,

package/src/mcp/server.js CHANGED Viewed

@@ -18,7 +18,7 @@ const { readContext, searchSignatures, getMap, createCheckpoint, getRouting, exp
 const SERVER_INFO = {
   name: 'sigmap',
-  version: '4.0.2',
+  version: '4.1.0',
   description: 'SigMap MCP server — code signatures on demand',
 };

package/src/retrieval/ranker.js CHANGED Viewed

@@ -141,24 +141,45 @@ function rank(query, sigIndex, opts) {
 }
 /**
- * Build a signature index from the generated context file.
- * Returns Map<filePath, string[]> where filePath is the relative path
- * as it appears in the ### headers of copilot-instructions.md.
+ * All paths where sigmap adapters write their context files, in probe order.
+ * The first existing file with a non-empty index wins when no explicit path
+ * is supplied.
+ */
+const ADAPTER_OUTPUT_PATHS = [
+  ['.github', 'copilot-instructions.md'], // copilot (default)
+  ['CLAUDE.md'],                           // claude
+  ['AGENTS.md'],                           // codex
+  ['.cursorrules'],                        // cursor
+  ['.windsurfrules'],                      // windsurf
+  ['.github', 'openai-context.md'],        // openai
+  ['.github', 'gemini-context.md'],        // gemini
+  ['llm-full.txt'],                        // llm-full
+  ['llm.txt'],                             // llm
+];
+/**
+ * Parse a single context file into a Map<filePath, string[]>.
  *
- * @param {string} cwd
+ * Files that contain human-written content before an
+ * "## Auto-generated signatures" marker (e.g. CLAUDE.md) are handled
+ * by skipping everything above the marker before scanning for ### headers.
+ *
+ * @param {string} contextPath  - absolute path to the context file
  * @returns {Map<string, string[]>}
  */
-function buildSigIndex(cwd) {
-  const fs   = require('fs');
-  const path = require('path');
-  const contextPath = path.join(cwd, '.github', 'copilot-instructions.md');
+function _parseContextFile(contextPath) {
+  const fs = require('fs');
   const index = new Map();
   if (!fs.existsSync(contextPath)) return index;
-  const content = fs.readFileSync(contextPath, 'utf8');
-  const lines = content.split('\n');
+  let content = fs.readFileSync(contextPath, 'utf8');
+  // Skip any human-written preamble that sits above the auto-generated block.
+  const markerIdx = content.indexOf('## Auto-generated signatures');
+  if (markerIdx !== -1) content = content.slice(markerIdx);
+  const lines = content.split('\n');
   let currentFile = null;
   let inBlock = false;
   let sigs = [];
@@ -180,6 +201,40 @@ function buildSigIndex(cwd) {
   return index;
 }
+/**
+ * Build a signature index from the generated context file.
+ * Returns Map<filePath, string[]> where filePath is the relative path
+ * as it appears in the ### headers of the context file.
+ *
+ * When `opts.contextPath` is provided, that specific file is used.
+ * This is the case when the caller already knows the path (e.g. via
+ * --adapter <name> or --output <file>).
+ *
+ * Otherwise all known adapter output paths are probed in order and the
+ * first file that produces a non-empty index is returned.
+ *
+ * @param {string} cwd
+ * @param {{ contextPath?: string }} [opts]
+ * @returns {Map<string, string[]>}
+ */
+function buildSigIndex(cwd, opts) {
+  const path = require('path');
+  // Caller supplied an explicit path — use it directly.
+  if (opts && opts.contextPath) {
+    return _parseContextFile(opts.contextPath);
+  }
+  // Probe all known adapter output paths; return first non-empty index.
+  for (const parts of ADAPTER_OUTPUT_PATHS) {
+    const contextPath = path.join(cwd, ...parts);
+    const index = _parseContextFile(contextPath);
+    if (index.size > 0) return index;
+  }
+  return new Map();
+}
 /**
  * Format ranked results as a markdown table string.
  *