sigmap 4.0.2 → 4.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +2 -14
- package/CHANGELOG.md +88 -0
- package/README.md +26 -8
- package/gen-context.config.json.example +15 -0
- package/gen-context.js +161 -25
- package/package.json +2 -2
- package/packages/cli/package.json +1 -1
- package/packages/core/package.json +1 -1
- package/src/config/defaults.js +22 -1
- package/src/mcp/server.js +1 -1
- package/src/retrieval/ranker.js +65 -10
package/AGENTS.md
CHANGED
|
@@ -12,29 +12,17 @@ Use this marker block for all appendable context files:
|
|
|
12
12
|
## Auto-generated signatures
|
|
13
13
|
<!-- Updated by gen-context.js -->
|
|
14
14
|
You are a coding assistant with full knowledge of this codebase.
|
|
15
|
-
Below are the code signatures extracted by SigMap v4.0
|
|
15
|
+
Below are the code signatures extracted by SigMap v4.1.0 on 2026-04-15T08:05:43.080Z.
|
|
16
16
|
|
|
17
17
|
Use these signatures to answer questions about the code accurately.
|
|
18
18
|
|
|
19
19
|
## Code Signatures
|
|
20
20
|
|
|
21
|
-
<!-- Generated by SigMap gen-context.js v4.0
|
|
21
|
+
<!-- Generated by SigMap gen-context.js v4.1.0 -->
|
|
22
22
|
<!-- DO NOT EDIT below the marker line — run gen-context.js to regenerate -->
|
|
23
23
|
|
|
24
24
|
# Code signatures
|
|
25
25
|
|
|
26
|
-
## changes (last 5 commits — 77 minutes ago)
|
|
27
|
-
```
|
|
28
|
-
src/analysis/coverage-score.js +coverageScore +_walk
|
|
29
|
-
src/eval/analyzer.js ~analyzeFiles
|
|
30
|
-
packages/adapters/claude.js +_confidenceMeta ~format
|
|
31
|
-
packages/adapters/copilot.js +_confidenceMeta ~format
|
|
32
|
-
packages/adapters/cursor.js +_confidenceMeta ~format
|
|
33
|
-
packages/adapters/gemini.js +_confidenceMeta ~format ~write
|
|
34
|
-
packages/adapters/openai.js +_confidenceMeta ~format ~outputPath
|
|
35
|
-
packages/adapters/windsurf.js +_confidenceMeta ~format
|
|
36
|
-
```
|
|
37
|
-
|
|
38
26
|
## packages
|
|
39
27
|
|
|
40
28
|
### packages/adapters/claude.js
|
package/CHANGELOG.md
CHANGED
|
@@ -10,6 +10,94 @@ Format: [Semantic Versioning](https://semver.org/)
|
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
13
|
+
## [4.1.1] — 2026-04-16 — Fix: --query works with any adapter output
|
|
14
|
+
|
|
15
|
+
### Fixed
|
|
16
|
+
|
|
17
|
+
- **`--query` fails after `--adapter` generation** (`[sigmap] no context file found`):
|
|
18
|
+
`buildSigIndex` hardcoded `.github/copilot-instructions.md` as the only
|
|
19
|
+
context file path, so `--query` always failed when any adapter other than
|
|
20
|
+
`copilot` wrote to a different location (`CLAUDE.md`, `AGENTS.md`,
|
|
21
|
+
`.cursorrules`, `.windsurfrules`, etc.).
|
|
22
|
+
|
|
23
|
+
`buildSigIndex` now probes all nine known adapter output paths in priority
|
|
24
|
+
order and returns the first non-empty index:
|
|
25
|
+
```
|
|
26
|
+
copilot → claude → codex → cursor → windsurf → openai → gemini → llm-full → llm
|
|
27
|
+
```
|
|
28
|
+
Human-written preamble before the `## Auto-generated signatures` marker
|
|
29
|
+
(e.g. custom content in `CLAUDE.md`) is skipped so those `###` sections
|
|
30
|
+
don't pollute the signature index.
|
|
31
|
+
|
|
32
|
+
- **`--adapter <name> --query "..."` combination ignored the adapter flag**:
|
|
33
|
+
The `--query` handler now detects a co-present `--adapter` flag, resolves
|
|
34
|
+
that adapter's output path, and reads from it directly — so both forms work:
|
|
35
|
+
```bash
|
|
36
|
+
# generate with claude adapter, then query without re-specifying adapter
|
|
37
|
+
node gen-context.js --adapter claude
|
|
38
|
+
node gen-context.js --query "add a new extractor"
|
|
39
|
+
|
|
40
|
+
# or pin explicitly in one command
|
|
41
|
+
node gen-context.js --adapter claude --query "add a new extractor"
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
- **`--analyze --json` output truncated at ~8 KB on macOS**:
|
|
45
|
+
Calling `process.exit(0)` immediately after `process.stdout.write(largeJson)`
|
|
46
|
+
truncated output because the underlying pipe write is asynchronous even
|
|
47
|
+
when `write()` returns `true`. Fixed by using the write callback so the
|
|
48
|
+
process exits only after the OS has accepted all bytes.
|
|
49
|
+
|
|
50
|
+
### Tests
|
|
51
|
+
|
|
52
|
+
- Added `test/integration/query-adapter.test.js` (17 tests) covering every
|
|
53
|
+
adapter output path (unit + CLI), probe order, marker-skipping, explicit
|
|
54
|
+
`opts.contextPath` override, and empty-project fallback.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## [4.1.0] — 2026-04-15 — Smart Budget: auto-scaling token budget
|
|
59
|
+
|
|
60
|
+
### Added
|
|
61
|
+
|
|
62
|
+
- **Auto-scaling token budget** (`autoMaxTokens: true`, default on):
|
|
63
|
+
Replaces the old fixed 6 000-token default with a formula that sizes the budget to your repo:
|
|
64
|
+
```
|
|
65
|
+
effective = clamp(ceil(totalSigTokens × coverageTarget), 4000, floor(modelContextLimit × maxTokensHeadroom))
|
|
66
|
+
```
|
|
67
|
+
- `coverageTarget` (default `0.80`) — target fraction of source files to include
|
|
68
|
+
- `modelContextLimit` (default `128000`) — model context window size; hard cap = `limit × headroom`
|
|
69
|
+
- `maxTokensHeadroom` (default `0.20`) — fraction of the model window reserved for SigMap output (default hard cap: **25 600 tokens**)
|
|
70
|
+
- Minimum floor: **4 000 tokens** (prevents tiny repos from being under-budgeted)
|
|
71
|
+
- When the hard cap prevents hitting the coverage target by more than 10 percentage points, SigMap warns and suggests `strategy: "per-module"`
|
|
72
|
+
|
|
73
|
+
- **Four new config keys** (all optional, documented in `gen-context.config.json.example`):
|
|
74
|
+
| Key | Default | Description |
|
|
75
|
+
|---|---|---|
|
|
76
|
+
| `autoMaxTokens` | `true` | Enable auto-scaling |
|
|
77
|
+
| `coverageTarget` | `0.80` | Target fraction of source files |
|
|
78
|
+
| `modelContextLimit` | `128000` | Model context window (tokens) |
|
|
79
|
+
| `maxTokensHeadroom` | `0.20` | Fraction of context for SigMap |
|
|
80
|
+
|
|
81
|
+
- **Post-run summary annotation**: coverage line now shows `[budget: N auto-scaled]` when the formula overrode the configured `maxTokens`.
|
|
82
|
+
|
|
83
|
+
- **Per-module strategy budget fix**: each module now gets its own full effective budget instead of a proportional slice, which was the limiting factor that made `per-module` less useful than advertised.
|
|
84
|
+
|
|
85
|
+
- **Tracking log fields**: `autoBudget: true/false` and `budgetLimit: N` added to `.context/usage.ndjson` entries.
|
|
86
|
+
|
|
87
|
+
- **12 new integration tests** (`test/integration/auto-budget.test.js`): cover MIN floor, proportional scaling, hard cap, disabled auto-scaling, custom `coverageTarget`/`modelContextLimit`/`maxTokensHeadroom`, warning emission, and empty-project edge case.
|
|
88
|
+
|
|
89
|
+
### Changed
|
|
90
|
+
|
|
91
|
+
- `autoMaxTokens: false` + explicit `maxTokens` preserves the old fixed-budget behaviour exactly — fully backwards compatible.
|
|
92
|
+
- `printReport` now labels the budget `(auto-scaled)` vs `(fixed)` in the report line.
|
|
93
|
+
|
|
94
|
+
### Benchmarks (v4.1.0)
|
|
95
|
+
- Token reduction: **97.6% average** across 18 repos ✅
|
|
96
|
+
- Retrieval hit@5: **84.4%** ✅
|
|
97
|
+
- With auto-scaling enabled, all 18 benchmark repos now stay within a sensible budget that targets ≥ 80% file coverage rather than the old 6 K ceiling.
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
13
101
|
## [4.0.2] — 2026-04-15 — Bundle factory fix (re-release of 4.0.1)
|
|
14
102
|
|
|
15
103
|
### Fixed
|
package/README.md
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
<h1>⚡ SigMap</h1>
|
|
6
6
|
|
|
7
7
|
<p><strong>WITHOUT SIGMAP, YOUR AI IS GUESSING.</strong><br>
|
|
8
|
-
<strong>
|
|
8
|
+
<strong>Without structured context, AI often reads the wrong file and fills the gaps with guesses.</strong></p>
|
|
9
9
|
|
|
10
10
|
<p><sub>Run one command. Force every answer to come from real code.</sub></p>
|
|
11
11
|
|
|
@@ -19,7 +19,13 @@
|
|
|
19
19
|
npx sigmap # 10 seconds. zero config. your AI never reads the wrong file again.
|
|
20
20
|
```
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
**What you get in ~10 seconds**
|
|
23
|
+
- A compact signature map of your codebase
|
|
24
|
+
- The right file in context far more often (84.4% hit@5 vs 13.6% random)
|
|
25
|
+
- Fewer retries (1.59 vs 2.84 prompts per task)
|
|
26
|
+
- Far smaller context (~2K–4K tokens instead of ~80K)
|
|
27
|
+
|
|
28
|
+
> Latest: **v4.1.0** — Smart Budget. Token budget now auto-scales to your repo size, targeting 80% source-file coverage by default. No config change needed — it just works.
|
|
23
29
|
|
|
24
30
|
<div align="center">
|
|
25
31
|
<img src="demo.gif" alt="SigMap demo — reducing 80K tokens to 4K in under 10 seconds" width="760" />
|
|
@@ -56,11 +62,13 @@ npx sigmap # 10 seconds. zero config. your AI never reads the wrong file again
|
|
|
56
62
|
| | Without SigMap | With SigMap |
|
|
57
63
|
|---|:---:|:---:|
|
|
58
64
|
| Task success | 10% | **59%** |
|
|
59
|
-
| Prompts per task | 2.84 | **1.
|
|
65
|
+
| Prompts per task | 2.84 | **1.59** |
|
|
60
66
|
| Tokens per session | ~80,000 | **~2,000** |
|
|
61
67
|
| Right file found | 13.6% | **84.4%** |
|
|
62
68
|
| Hallucination risk | 92% | **0%** |
|
|
63
69
|
|
|
70
|
+
Measured on 90 coding tasks across 18 real public repos. Full methodology and raw benchmark pages are linked below.
|
|
71
|
+
|
|
64
72
|
</details>
|
|
65
73
|
|
|
66
74
|
---
|
|
@@ -689,7 +697,6 @@ Copy `gen-context.config.json.example` to `gen-context.config.json`:
|
|
|
689
697
|
{
|
|
690
698
|
"output": ".github/copilot-instructions.md",
|
|
691
699
|
"srcDirs": ["src", "app", "lib"],
|
|
692
|
-
"maxTokens": 6000,
|
|
693
700
|
"outputs": ["copilot"],
|
|
694
701
|
"secretScan": true,
|
|
695
702
|
"strategy": "full",
|
|
@@ -703,10 +710,21 @@ Copy `gen-context.config.json.example` to `gen-context.config.json`:
|
|
|
703
710
|
- **`output`** — custom path for the primary markdown output file (used by `copilot` adapter). Default: `.github/copilot-instructions.md`
|
|
704
711
|
- **`outputs`** — which adapters to write to: `copilot` | `claude` | `cursor` | `windsurf`
|
|
705
712
|
- **`srcDirs`** — directories to scan (relative to project root)
|
|
706
|
-
- **`maxTokens`** — max tokens in final output before budget enforcement
|
|
707
713
|
- **`secretScan`** — redact secrets (AWS keys, tokens, etc.) from output
|
|
708
714
|
- **`strategy`** — output mode: `full` (default) | `per-module` | `hot-cold`
|
|
709
715
|
|
|
716
|
+
**Token budget (v4.1.0 — auto-scaling):**
|
|
717
|
+
|
|
718
|
+
| Key | Default | Description |
|
|
719
|
+
|---|---|---|
|
|
720
|
+
| `autoMaxTokens` | `true` | Auto-scale budget to repo size. Set `false` to pin a fixed `maxTokens`. |
|
|
721
|
+
| `coverageTarget` | `0.80` | Fraction of source files to target (0.0–1.0). |
|
|
722
|
+
| `modelContextLimit` | `128000` | Model context window size. Hard cap = `limit × maxTokensHeadroom`. |
|
|
723
|
+
| `maxTokensHeadroom` | `0.20` | Fraction of the context window reserved for SigMap output (default: 25 600 tokens). |
|
|
724
|
+
| `maxTokens` | `6000` | Used only when `autoMaxTokens: false`, or as a floor. |
|
|
725
|
+
|
|
726
|
+
The formula: `effective = clamp(ceil(totalSigTokens × coverageTarget), 4000, floor(modelContextLimit × maxTokensHeadroom))`.
|
|
727
|
+
|
|
710
728
|
Exclusions go in `.contextignore` (gitignore syntax). Also reads `.repomixignore` if present.
|
|
711
729
|
|
|
712
730
|
```
|
|
@@ -752,11 +770,11 @@ Every run now prints a coverage line alongside token reduction:
|
|
|
752
770
|
|
|
753
771
|
```
|
|
754
772
|
───────────────────────────────────────────
|
|
755
|
-
SigMap v4.
|
|
773
|
+
SigMap v4.1.0
|
|
756
774
|
Files scanned : 76
|
|
757
775
|
Symbols found : 332
|
|
758
776
|
Token reduction: 94% (65,227 → 4,103)
|
|
759
|
-
Coverage : A (97%) — 76 of 78 source files included
|
|
777
|
+
Coverage : A (97%) — 76 of 78 source files included [budget: 4000 auto-scaled]
|
|
760
778
|
Output : .github/copilot-instructions.md
|
|
761
779
|
───────────────────────────────────────────
|
|
762
780
|
```
|
|
@@ -771,7 +789,7 @@ sigmap --report
|
|
|
771
789
|
|
|
772
790
|
```
|
|
773
791
|
[sigmap] report:
|
|
774
|
-
version : 4.
|
|
792
|
+
version : 4.1.0
|
|
775
793
|
files processed : 76
|
|
776
794
|
reduction : 93.7%
|
|
777
795
|
coverage : A (97%) — 76 of 78 source files included
|
|
@@ -20,8 +20,23 @@
|
|
|
20
20
|
|
|
21
21
|
"maxSigsPerFile": 25,
|
|
22
22
|
|
|
23
|
+
"_maxTokens_comment": "Used only when autoMaxTokens is false. Override to pin a fixed budget.",
|
|
23
24
|
"maxTokens": 6000,
|
|
24
25
|
|
|
26
|
+
"_autoMaxTokens_comment": "Auto-scale budget based on repo size. Default: true.",
|
|
27
|
+
"_autoMaxTokens_formula": "effective = clamp(totalSigTokens × coverageTarget, 4000, modelContextLimit × maxTokensHeadroom)",
|
|
28
|
+
"autoMaxTokens": true,
|
|
29
|
+
|
|
30
|
+
"_coverageTarget_comment": "Fraction of source files to target for inclusion (0.0–1.0). Default: 0.80 = 80%.",
|
|
31
|
+
"coverageTarget": 0.80,
|
|
32
|
+
|
|
33
|
+
"_modelContextLimit_comment": "Model context window size (tokens). Hard cap = modelContextLimit × maxTokensHeadroom.",
|
|
34
|
+
"_modelContextLimit_examples": "128000 = GPT-4o/Claude (default) | 200000 = Claude max | 1000000 = Gemini 1M",
|
|
35
|
+
"modelContextLimit": 128000,
|
|
36
|
+
|
|
37
|
+
"_maxTokensHeadroom_comment": "Fraction of model context reserved for SigMap output. 0.20 = 25,600 token hard cap.",
|
|
38
|
+
"maxTokensHeadroom": 0.20,
|
|
39
|
+
|
|
25
40
|
"secretScan": true,
|
|
26
41
|
|
|
27
42
|
"monorepo": false,
|
package/gen-context.js
CHANGED
|
@@ -59,9 +59,22 @@ __factories["./src/config/defaults"] = function(module, exports) {
|
|
|
59
59
|
// Maximum signatures extracted per file
|
|
60
60
|
maxSigsPerFile: 25,
|
|
61
61
|
|
|
62
|
-
// Maximum tokens in final output before budget enforcement kicks in
|
|
62
|
+
// Maximum tokens in final output before budget enforcement kicks in.
|
|
63
|
+
// Used only when autoMaxTokens is false, or as a floor for auto-scaling.
|
|
63
64
|
maxTokens: 6000,
|
|
64
|
-
|
|
65
|
+
|
|
66
|
+
// Automatically scale the token budget based on repo size.
|
|
67
|
+
autoMaxTokens: true,
|
|
68
|
+
|
|
69
|
+
// Fraction of source files to target for inclusion (0.0–1.0).
|
|
70
|
+
coverageTarget: 0.80,
|
|
71
|
+
|
|
72
|
+
// Model context window size (tokens). Used to compute the hard cap.
|
|
73
|
+
modelContextLimit: 128000,
|
|
74
|
+
|
|
75
|
+
// Fraction of the model context window reserved for SigMap output.
|
|
76
|
+
maxTokensHeadroom: 0.20,
|
|
77
|
+
|
|
65
78
|
// Scan signatures for secrets and redact matches
|
|
66
79
|
secretScan: true,
|
|
67
80
|
|
|
@@ -4641,7 +4654,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
|
|
|
4641
4654
|
|
|
4642
4655
|
const SERVER_INFO = {
|
|
4643
4656
|
name: 'sigmap',
|
|
4644
|
-
version: '4.0
|
|
4657
|
+
version: '4.1.0',
|
|
4645
4658
|
description: 'SigMap MCP server — code signatures on demand',
|
|
4646
4659
|
};
|
|
4647
4660
|
|
|
@@ -5436,12 +5449,24 @@ __factories["./src/retrieval/ranker"] = function(module, exports) {
|
|
|
5436
5449
|
scored.sort((a, b) => b.score - a.score || a.file.localeCompare(b.file));
|
|
5437
5450
|
return scored.slice(0, topK);
|
|
5438
5451
|
}
|
|
5439
|
-
|
|
5440
|
-
|
|
5441
|
-
|
|
5452
|
+
const ADAPTER_OUTPUT_PATHS = [
|
|
5453
|
+
['.github', 'copilot-instructions.md'],
|
|
5454
|
+
['CLAUDE.md'],
|
|
5455
|
+
['AGENTS.md'],
|
|
5456
|
+
['.cursorrules'],
|
|
5457
|
+
['.windsurfrules'],
|
|
5458
|
+
['.github', 'openai-context.md'],
|
|
5459
|
+
['.github', 'gemini-context.md'],
|
|
5460
|
+
['llm-full.txt'],
|
|
5461
|
+
['llm.txt'],
|
|
5462
|
+
];
|
|
5463
|
+
function _parseContextFile(contextPath) {
|
|
5464
|
+
const fs = require('fs');
|
|
5442
5465
|
const index = new Map();
|
|
5443
5466
|
if (!fs.existsSync(contextPath)) return index;
|
|
5444
|
-
|
|
5467
|
+
let content = fs.readFileSync(contextPath, 'utf8');
|
|
5468
|
+
const markerIdx = content.indexOf('## Auto-generated signatures');
|
|
5469
|
+
if (markerIdx !== -1) content = content.slice(markerIdx);
|
|
5445
5470
|
const lines = content.split('\n');
|
|
5446
5471
|
let currentFile = null; let inBlock = false; let sigs = [];
|
|
5447
5472
|
for (const line of lines) {
|
|
@@ -5453,6 +5478,16 @@ __factories["./src/retrieval/ranker"] = function(module, exports) {
|
|
|
5453
5478
|
if (currentFile !== null) index.set(currentFile, sigs);
|
|
5454
5479
|
return index;
|
|
5455
5480
|
}
|
|
5481
|
+
function buildSigIndex(cwd, opts) {
|
|
5482
|
+
const path = require('path');
|
|
5483
|
+
if (opts && opts.contextPath) return _parseContextFile(opts.contextPath);
|
|
5484
|
+
for (const parts of ADAPTER_OUTPUT_PATHS) {
|
|
5485
|
+
const contextPath = path.join(cwd, ...parts);
|
|
5486
|
+
const index = _parseContextFile(contextPath);
|
|
5487
|
+
if (index.size > 0) return index;
|
|
5488
|
+
}
|
|
5489
|
+
return new Map();
|
|
5490
|
+
}
|
|
5456
5491
|
function formatRankTable(results, query) {
|
|
5457
5492
|
if (!results || results.length === 0) return `No matching files found for query: "${query}"\n`;
|
|
5458
5493
|
const lines = [`## Query: ${query}`, '', '| Rank | File | Score | Sigs | Tokens |', '|------|------|-------|------|--------|',
|
|
@@ -6203,7 +6238,7 @@ const path = require('path');
|
|
|
6203
6238
|
const os = require('os');
|
|
6204
6239
|
const { execSync } = require('child_process');
|
|
6205
6240
|
|
|
6206
|
-
const VERSION = '4.0
|
|
6241
|
+
const VERSION = '4.1.0';
|
|
6207
6242
|
const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
|
|
6208
6243
|
|
|
6209
6244
|
function requireSourceOrBundled(key) {
|
|
@@ -6423,6 +6458,61 @@ function isMockFile(filePath) {
|
|
|
6423
6458
|
/mock\.(ts|js|tsx|jsx)$/.test(p);
|
|
6424
6459
|
}
|
|
6425
6460
|
|
|
6461
|
+
/**
|
|
6462
|
+
* Compute the effective token budget based on repo size and config.
|
|
6463
|
+
*
|
|
6464
|
+
* Formula:
|
|
6465
|
+
* totalSigTokens = sum of estimated tokens for all extracted sig blocks
|
|
6466
|
+
* needed = ceil(totalSigTokens * coverageTarget) // tokens for target% coverage
|
|
6467
|
+
* hardCap = floor(modelContextLimit * maxTokensHeadroom)
|
|
6468
|
+
* effective = clamp(needed, 4000, hardCap)
|
|
6469
|
+
*
|
|
6470
|
+
* When autoMaxTokens is false the configured maxTokens is returned unchanged.
|
|
6471
|
+
*
|
|
6472
|
+
* @param {Array} fileEntries - All file entries BEFORE budget enforcement
|
|
6473
|
+
* @param {object} config
|
|
6474
|
+
* @returns {number} effective token budget
|
|
6475
|
+
*/
|
|
6476
|
+
function computeEffectiveMaxTokens(fileEntries, config) {
|
|
6477
|
+
if (config.autoMaxTokens === false) return config.maxTokens;
|
|
6478
|
+
|
|
6479
|
+
const coverageTarget = (config.coverageTarget != null) ? config.coverageTarget : 0.80;
|
|
6480
|
+
const modelContextLimit = (config.modelContextLimit != null) ? config.modelContextLimit : 128000;
|
|
6481
|
+
const maxTokensHeadroom = (config.maxTokensHeadroom != null) ? config.maxTokensHeadroom : 0.20;
|
|
6482
|
+
|
|
6483
|
+
const totalSigTokens = fileEntries.reduce(
|
|
6484
|
+
(s, e) => s + estimateTokens((e.sigs || []).join('\n')), 0
|
|
6485
|
+
);
|
|
6486
|
+
if (totalSigTokens === 0) return config.maxTokens;
|
|
6487
|
+
|
|
6488
|
+
const hardCap = Math.floor(modelContextLimit * maxTokensHeadroom);
|
|
6489
|
+
const needed = Math.ceil(totalSigTokens * coverageTarget);
|
|
6490
|
+
const MIN = 4000;
|
|
6491
|
+
const effective = Math.min(Math.max(MIN, needed), hardCap);
|
|
6492
|
+
|
|
6493
|
+
// Warn when repo is so large the hard cap prevents hitting the coverage target
|
|
6494
|
+
if (needed > hardCap) {
|
|
6495
|
+
const estimatedCovPct = Math.round((hardCap / totalSigTokens) * 100);
|
|
6496
|
+
const targetPct = Math.round(coverageTarget * 100);
|
|
6497
|
+
if (estimatedCovPct < targetPct - 10) {
|
|
6498
|
+
console.warn(
|
|
6499
|
+
`[sigmap] auto-budget: ${fileEntries.length} files need ~${Math.round(needed / 1000)}K tokens ` +
|
|
6500
|
+
`for ${targetPct}% coverage`
|
|
6501
|
+
);
|
|
6502
|
+
console.warn(
|
|
6503
|
+
`[sigmap] auto-budget: capped at ${hardCap} ` +
|
|
6504
|
+
`(${Math.round(maxTokensHeadroom * 100)}% of ${Math.round(modelContextLimit / 1000)}K model limit) ` +
|
|
6505
|
+
`→ est. ${estimatedCovPct}% coverage`
|
|
6506
|
+
);
|
|
6507
|
+
console.warn(
|
|
6508
|
+
`[sigmap] auto-budget: tip — set strategy:"per-module" for full coverage on large repos`
|
|
6509
|
+
);
|
|
6510
|
+
}
|
|
6511
|
+
}
|
|
6512
|
+
|
|
6513
|
+
return effective;
|
|
6514
|
+
}
|
|
6515
|
+
|
|
6426
6516
|
function applyTokenBudget(fileEntries, maxTokens) {
|
|
6427
6517
|
// fileEntries: [{ filePath, sigs, mtime }]
|
|
6428
6518
|
// Reserve ~10% for formatting overhead (section headers, code fences, top-level header)
|
|
@@ -6889,7 +6979,7 @@ function _coverageBar(pct, width) {
|
|
|
6889
6979
|
return '\u2588'.repeat(filled) + '\u2591'.repeat(width - filled);
|
|
6890
6980
|
}
|
|
6891
6981
|
|
|
6892
|
-
function printReport(inputTokens, finalTokens, fileCount, droppedCount, asJson, budgetLimit, coverageResult) {
|
|
6982
|
+
function printReport(inputTokens, finalTokens, fileCount, droppedCount, asJson, budgetLimit, coverageResult, isAutoBudget) {
|
|
6893
6983
|
const reduction = inputTokens > 0 ? (100 - (finalTokens / inputTokens) * 100).toFixed(1) : 0;
|
|
6894
6984
|
const overBudget = finalTokens > (budgetLimit || 6000);
|
|
6895
6985
|
if (asJson) {
|
|
@@ -6904,6 +6994,7 @@ function printReport(inputTokens, finalTokens, fileCount, droppedCount, asJson,
|
|
|
6904
6994
|
reductionPct: parseFloat(reduction),
|
|
6905
6995
|
overBudget,
|
|
6906
6996
|
budgetLimit: budgetLimit || 6000,
|
|
6997
|
+
autoBudget: !!isAutoBudget,
|
|
6907
6998
|
};
|
|
6908
6999
|
if (coverageResult) {
|
|
6909
7000
|
payload.coverage = {
|
|
@@ -6923,13 +7014,16 @@ function printReport(inputTokens, finalTokens, fileCount, droppedCount, asJson,
|
|
|
6923
7014
|
// Exit 1 in CI if over budget — lets pipelines fail fast
|
|
6924
7015
|
if (overBudget) process.exitCode = 1;
|
|
6925
7016
|
} else {
|
|
7017
|
+
const budgetLabel = isAutoBudget
|
|
7018
|
+
? `${budgetLimit || 6000} (auto-scaled)`
|
|
7019
|
+
: `${budgetLimit || 6000} (fixed)`;
|
|
6926
7020
|
console.log(`[sigmap] report:`);
|
|
6927
7021
|
console.log(` version : ${VERSION}`);
|
|
6928
7022
|
console.log(` files processed : ${fileCount}`);
|
|
6929
7023
|
console.log(` files dropped : ${droppedCount}`);
|
|
6930
7024
|
console.log(` input tokens : ~${inputTokens}`);
|
|
6931
7025
|
console.log(` output tokens : ~${finalTokens}`);
|
|
6932
|
-
console.log(` budget limit : ${
|
|
7026
|
+
console.log(` budget limit : ${budgetLabel}`);
|
|
6933
7027
|
console.log(` reduction : ${reduction}%`);
|
|
6934
7028
|
if (coverageResult) {
|
|
6935
7029
|
console.log(` coverage : ${coverageResult.grade} (${coverageResult.score}%) — ${coverageResult.included} of ${coverageResult.total} source files included`);
|
|
@@ -7088,8 +7182,9 @@ function runPerModuleStrategy(cwd, config, fileEntries, inputTokenTotal) {
|
|
|
7088
7182
|
const outPath = path.join(cwd, '.github', outName);
|
|
7089
7183
|
const modEntries = modules[mod];
|
|
7090
7184
|
|
|
7091
|
-
// Per-module budget:
|
|
7092
|
-
|
|
7185
|
+
// Per-module budget: each module gets its own full effective budget
|
|
7186
|
+
// (per-module strategy is the recommended path for large repos — no sharing needed)
|
|
7187
|
+
const modBudget = Math.max(1000, config.maxTokens);
|
|
7093
7188
|
const budgeted = applyTokenBudget(modEntries, modBudget);
|
|
7094
7189
|
|
|
7095
7190
|
const content = formatOutput(budgeted, cwd, false, config, null);
|
|
@@ -7358,15 +7453,22 @@ function runGenerate(cwd, config, reportMode, reportJson = false) {
|
|
|
7358
7453
|
});
|
|
7359
7454
|
}
|
|
7360
7455
|
|
|
7456
|
+
// v4.1: compute effective budget once; used by all strategies
|
|
7457
|
+
const effectiveMaxTokens = computeEffectiveMaxTokens(fileEntries, config);
|
|
7458
|
+
// Propagate to config so per-module / hot-cold strategies pick it up
|
|
7459
|
+
const configWithBudget = effectiveMaxTokens !== config.maxTokens
|
|
7460
|
+
? Object.assign({}, config, { maxTokens: effectiveMaxTokens, _autoMaxTokens: effectiveMaxTokens })
|
|
7461
|
+
: config;
|
|
7462
|
+
|
|
7361
7463
|
let result;
|
|
7362
7464
|
if (!reportMode) {
|
|
7363
7465
|
if (strategy === 'per-module') {
|
|
7364
|
-
result = runPerModuleStrategy(cwd,
|
|
7466
|
+
result = runPerModuleStrategy(cwd, configWithBudget, fileEntries, inputTokenTotal);
|
|
7365
7467
|
} else if (strategy === 'hot-cold') {
|
|
7366
|
-
result = runHotColdStrategy(cwd,
|
|
7468
|
+
result = runHotColdStrategy(cwd, configWithBudget, fileEntries, recentFiles, inputTokenTotal);
|
|
7367
7469
|
} else {
|
|
7368
7470
|
// 'full' — original behaviour
|
|
7369
|
-
fileEntries = applyTokenBudget(fileEntries,
|
|
7471
|
+
fileEntries = applyTokenBudget(fileEntries, effectiveMaxTokens);
|
|
7370
7472
|
const droppedCount = beforeCount - fileEntries.length;
|
|
7371
7473
|
const routingEnabled = !!(config.routing || process.argv.includes('--routing'));
|
|
7372
7474
|
const content = formatOutput(fileEntries, cwd, routingEnabled, config, null);
|
|
@@ -7409,21 +7511,21 @@ function runGenerate(cwd, config, reportMode, reportJson = false) {
|
|
|
7409
7511
|
}
|
|
7410
7512
|
} else {
|
|
7411
7513
|
// report mode: always run full pipeline for accurate stats
|
|
7412
|
-
const budgeted = applyTokenBudget([...fileEntries],
|
|
7514
|
+
const budgeted = applyTokenBudget([...fileEntries], effectiveMaxTokens);
|
|
7413
7515
|
const droppedCount = beforeCount - budgeted.length;
|
|
7414
|
-
const content = formatOutput(budgeted, cwd, false,
|
|
7516
|
+
const content = formatOutput(budgeted, cwd, false, configWithBudget, null);
|
|
7415
7517
|
const finalTokens = estimateTokens(content);
|
|
7416
7518
|
// v4.0: compute coverage score for --report heatmap
|
|
7417
7519
|
let coverageResult = null;
|
|
7418
7520
|
try {
|
|
7419
7521
|
const { coverageScore } = requireSourceOrBundled('./src/analysis/coverage-score');
|
|
7420
|
-
coverageResult = coverageScore(cwd, budgeted,
|
|
7522
|
+
coverageResult = coverageScore(cwd, budgeted, configWithBudget);
|
|
7421
7523
|
} catch (_) {}
|
|
7422
7524
|
result = { inputTokenTotal, finalTokens, fileCount: beforeCount, droppedCount, coverageResult };
|
|
7423
7525
|
}
|
|
7424
7526
|
|
|
7425
7527
|
if (reportMode || process.argv.includes('--report')) {
|
|
7426
|
-
printReport(result.inputTokenTotal, result.finalTokens, result.fileCount, result.droppedCount, reportJson,
|
|
7528
|
+
printReport(result.inputTokenTotal, result.finalTokens, result.fileCount, result.droppedCount, reportJson, effectiveMaxTokens, result.coverageResult, config.autoMaxTokens !== false && effectiveMaxTokens !== config.maxTokens);
|
|
7427
7529
|
}
|
|
7428
7530
|
|
|
7429
7531
|
// Usage tracking (v0.9) — optional append-only NDJSON log
|
|
@@ -7437,8 +7539,9 @@ function runGenerate(cwd, config, reportMode, reportJson = false) {
|
|
|
7437
7539
|
droppedCount: result.droppedCount,
|
|
7438
7540
|
rawTokens: result.inputTokenTotal,
|
|
7439
7541
|
finalTokens: result.finalTokens,
|
|
7440
|
-
overBudget: result.finalTokens >
|
|
7441
|
-
budgetLimit:
|
|
7542
|
+
overBudget: result.finalTokens > effectiveMaxTokens,
|
|
7543
|
+
budgetLimit: effectiveMaxTokens,
|
|
7544
|
+
autoBudget: config.autoMaxTokens !== false && effectiveMaxTokens !== config.maxTokens,
|
|
7442
7545
|
}, cwd);
|
|
7443
7546
|
} catch (err) {
|
|
7444
7547
|
console.warn(`[sigmap] tracking: ${err.message}`);
|
|
@@ -7459,8 +7562,15 @@ function runGenerate(cwd, config, reportMode, reportJson = false) {
|
|
|
7459
7562
|
let coverageLine = '';
|
|
7460
7563
|
try {
|
|
7461
7564
|
const { coverageScore } = requireSourceOrBundled('./src/analysis/coverage-score');
|
|
7462
|
-
const cov = coverageScore(cwd, fileEntries,
|
|
7463
|
-
|
|
7565
|
+
const cov = coverageScore(cwd, fileEntries, configWithBudget);
|
|
7566
|
+
const autoBudgetNote = (config.autoMaxTokens !== false && effectiveMaxTokens !== config.maxTokens)
|
|
7567
|
+
? ` [budget: ${effectiveMaxTokens} auto-scaled]`
|
|
7568
|
+
: '';
|
|
7569
|
+
coverageLine = ` Coverage : ${cov.grade} (${cov.score}%) \u2014 ${cov.included} of ${cov.total} source files included${autoBudgetNote}`;
|
|
7570
|
+
// Extra warning line when coverage is still poor despite auto-scaling
|
|
7571
|
+
if (cov.score < 40 && config.strategy !== 'per-module' && config.strategy !== 'hot-cold') {
|
|
7572
|
+
coverageLine += '\n [sigmap] tip: large repo — consider strategy:"per-module" for full coverage';
|
|
7573
|
+
}
|
|
7464
7574
|
} catch (_) {}
|
|
7465
7575
|
const lines = [
|
|
7466
7576
|
bar,
|
|
@@ -8132,7 +8242,13 @@ function main() {
|
|
|
8132
8242
|
const stats = analyzeFiles(allFiles, cwd, { slow, maxSigs: cfg.maxSigsPerFile || 25 });
|
|
8133
8243
|
|
|
8134
8244
|
if (args.includes('--json')) {
|
|
8135
|
-
|
|
8245
|
+
const out = JSON.stringify(formatAnalysisJSON(stats)) + '\n';
|
|
8246
|
+
// Use the write callback to exit only after the OS has accepted all
|
|
8247
|
+
// bytes. Calling process.exit(0) synchronously after write() truncates
|
|
8248
|
+
// large outputs because the underlying pipe write is asynchronous even
|
|
8249
|
+
// when write() returns true.
|
|
8250
|
+
process.stdout.write(out, 'utf8', () => process.exit(0));
|
|
8251
|
+
return; // exit is handled by the callback above
|
|
8136
8252
|
} else {
|
|
8137
8253
|
const table = formatAnalysisTable(stats, slow);
|
|
8138
8254
|
process.stdout.write(table);
|
|
@@ -8238,9 +8354,29 @@ function main() {
|
|
|
8238
8354
|
process.exit(1);
|
|
8239
8355
|
}
|
|
8240
8356
|
const { rank, buildSigIndex, formatRankTable, formatRankJSON } = requireSourceOrBundled('./src/retrieval/ranker');
|
|
8241
|
-
|
|
8357
|
+
|
|
8358
|
+
// Resolve an explicit context file path when --adapter is present.
|
|
8359
|
+
// This lets `--adapter claude --query "..."` read CLAUDE.md instead of
|
|
8360
|
+
// falling through to the default copilot-instructions.md probe.
|
|
8361
|
+
let queryOpts;
|
|
8362
|
+
const adpIdx = args.indexOf('--adapter');
|
|
8363
|
+
if (adpIdx >= 0) {
|
|
8364
|
+
const adapterName = (args[adpIdx + 1] || '').trim().toLowerCase();
|
|
8365
|
+
const VALID_ADAPTERS = ['copilot', 'claude', 'cursor', 'windsurf', 'openai', 'gemini', 'codex'];
|
|
8366
|
+
if (VALID_ADAPTERS.includes(adapterName)) {
|
|
8367
|
+
try {
|
|
8368
|
+
const adapterMod = __require('./packages/adapters/' + adapterName);
|
|
8369
|
+
queryOpts = { contextPath: adapterMod.outputPath(cwd) };
|
|
8370
|
+
} catch (_) {}
|
|
8371
|
+
}
|
|
8372
|
+
}
|
|
8373
|
+
|
|
8374
|
+
const index = buildSigIndex(cwd, queryOpts);
|
|
8242
8375
|
if (index.size === 0) {
|
|
8243
8376
|
console.error('[sigmap] no context file found. Run: node gen-context.js');
|
|
8377
|
+
if (adpIdx >= 0) {
|
|
8378
|
+
console.error(' (tried the path for --adapter ' + (args[adpIdx + 1] || '') + ')');
|
|
8379
|
+
}
|
|
8244
8380
|
process.exit(1);
|
|
8245
8381
|
}
|
|
8246
8382
|
const topIdx = args.indexOf('--top');
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sigmap",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.1.1",
|
|
4
4
|
"description": "Zero-dependency AI context engine — 97% token reduction. No npm install. Runs on Node 18+.",
|
|
5
5
|
"main": "gen-context.js",
|
|
6
6
|
"exports": {
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
},
|
|
16
16
|
"scripts": {
|
|
17
17
|
"test": "node test/run.js",
|
|
18
|
-
"test:integration": "node test/integration/strategy.test.js && node test/integration/secret-scan.test.js && node test/integration/token-budget.test.js && node test/integration/mcp-server.test.js",
|
|
18
|
+
"test:integration": "node test/integration/strategy.test.js && node test/integration/secret-scan.test.js && node test/integration/token-budget.test.js && node test/integration/auto-budget.test.js && node test/integration/mcp-server.test.js",
|
|
19
19
|
"test:integration:all": "node test/integration/all.js",
|
|
20
20
|
"test:all": "node test/run.js && node test/integration/strategy.test.js && node test/integration/secret-scan.test.js",
|
|
21
21
|
"generate": "node gen-context.js",
|
package/src/config/defaults.js
CHANGED
|
@@ -47,9 +47,30 @@ const DEFAULTS = {
|
|
|
47
47
|
// Maximum signatures extracted per file
|
|
48
48
|
maxSigsPerFile: 25,
|
|
49
49
|
|
|
50
|
-
// Maximum tokens in final output before budget enforcement kicks in
|
|
50
|
+
// Maximum tokens in final output before budget enforcement kicks in.
|
|
51
|
+
// Used only when autoMaxTokens is false, or as a floor for auto-scaling.
|
|
51
52
|
maxTokens: 6000,
|
|
52
53
|
|
|
54
|
+
// Automatically scale the token budget based on repo size.
|
|
55
|
+
// When true, SigMap targets `coverageTarget` fraction of source files and
|
|
56
|
+
// raises the budget up to `modelContextLimit * maxTokensHeadroom`.
|
|
57
|
+
// Set to false (or set maxTokens explicitly) to pin the budget.
|
|
58
|
+
autoMaxTokens: true,
|
|
59
|
+
|
|
60
|
+
// Fraction of source files to target for inclusion (0.0–1.0).
|
|
61
|
+
// 0.80 = include at least 80% of source files in the context output.
|
|
62
|
+
coverageTarget: 0.80,
|
|
63
|
+
|
|
64
|
+
// Model context window size (tokens). Used to compute the hard cap:
|
|
65
|
+
// hardCap = modelContextLimit × maxTokensHeadroom
|
|
66
|
+
// Default: GPT-4o / Claude Sonnet (128K). Set higher for Gemini 1M etc.
|
|
67
|
+
modelContextLimit: 128000,
|
|
68
|
+
|
|
69
|
+
// Fraction of the model context window reserved for SigMap output.
|
|
70
|
+
// Leaves the remaining fraction for the conversation, system prompt, etc.
|
|
71
|
+
// Default 0.20 = 20% of 128K = 25,600 token hard cap.
|
|
72
|
+
maxTokensHeadroom: 0.20,
|
|
73
|
+
|
|
53
74
|
// Scan signatures for secrets and redact matches
|
|
54
75
|
secretScan: true,
|
|
55
76
|
|
package/src/mcp/server.js
CHANGED
package/src/retrieval/ranker.js
CHANGED
|
@@ -141,24 +141,45 @@ function rank(query, sigIndex, opts) {
|
|
|
141
141
|
}
|
|
142
142
|
|
|
143
143
|
/**
|
|
144
|
-
*
|
|
145
|
-
*
|
|
146
|
-
*
|
|
144
|
+
* All paths where sigmap adapters write their context files, in probe order.
|
|
145
|
+
* The first existing file with a non-empty index wins when no explicit path
|
|
146
|
+
* is supplied.
|
|
147
|
+
*/
|
|
148
|
+
const ADAPTER_OUTPUT_PATHS = [
|
|
149
|
+
['.github', 'copilot-instructions.md'], // copilot (default)
|
|
150
|
+
['CLAUDE.md'], // claude
|
|
151
|
+
['AGENTS.md'], // codex
|
|
152
|
+
['.cursorrules'], // cursor
|
|
153
|
+
['.windsurfrules'], // windsurf
|
|
154
|
+
['.github', 'openai-context.md'], // openai
|
|
155
|
+
['.github', 'gemini-context.md'], // gemini
|
|
156
|
+
['llm-full.txt'], // llm-full
|
|
157
|
+
['llm.txt'], // llm
|
|
158
|
+
];
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Parse a single context file into a Map<filePath, string[]>.
|
|
147
162
|
*
|
|
148
|
-
*
|
|
163
|
+
* Files that contain human-written content before an
|
|
164
|
+
* "## Auto-generated signatures" marker (e.g. CLAUDE.md) are handled
|
|
165
|
+
* by skipping everything above the marker before scanning for ### headers.
|
|
166
|
+
*
|
|
167
|
+
* @param {string} contextPath - absolute path to the context file
|
|
149
168
|
* @returns {Map<string, string[]>}
|
|
150
169
|
*/
|
|
151
|
-
function
|
|
152
|
-
const fs
|
|
153
|
-
const path = require('path');
|
|
154
|
-
const contextPath = path.join(cwd, '.github', 'copilot-instructions.md');
|
|
170
|
+
function _parseContextFile(contextPath) {
|
|
171
|
+
const fs = require('fs');
|
|
155
172
|
const index = new Map();
|
|
156
173
|
|
|
157
174
|
if (!fs.existsSync(contextPath)) return index;
|
|
158
175
|
|
|
159
|
-
|
|
160
|
-
const lines = content.split('\n');
|
|
176
|
+
let content = fs.readFileSync(contextPath, 'utf8');
|
|
161
177
|
|
|
178
|
+
// Skip any human-written preamble that sits above the auto-generated block.
|
|
179
|
+
const markerIdx = content.indexOf('## Auto-generated signatures');
|
|
180
|
+
if (markerIdx !== -1) content = content.slice(markerIdx);
|
|
181
|
+
|
|
182
|
+
const lines = content.split('\n');
|
|
162
183
|
let currentFile = null;
|
|
163
184
|
let inBlock = false;
|
|
164
185
|
let sigs = [];
|
|
@@ -180,6 +201,40 @@ function buildSigIndex(cwd) {
|
|
|
180
201
|
return index;
|
|
181
202
|
}
|
|
182
203
|
|
|
204
|
+
/**
|
|
205
|
+
* Build a signature index from the generated context file.
|
|
206
|
+
* Returns Map<filePath, string[]> where filePath is the relative path
|
|
207
|
+
* as it appears in the ### headers of the context file.
|
|
208
|
+
*
|
|
209
|
+
* When `opts.contextPath` is provided, that specific file is used.
|
|
210
|
+
* This is the case when the caller already knows the path (e.g. via
|
|
211
|
+
* --adapter <name> or --output <file>).
|
|
212
|
+
*
|
|
213
|
+
* Otherwise all known adapter output paths are probed in order and the
|
|
214
|
+
* first file that produces a non-empty index is returned.
|
|
215
|
+
*
|
|
216
|
+
* @param {string} cwd
|
|
217
|
+
* @param {{ contextPath?: string }} [opts]
|
|
218
|
+
* @returns {Map<string, string[]>}
|
|
219
|
+
*/
|
|
220
|
+
function buildSigIndex(cwd, opts) {
|
|
221
|
+
const path = require('path');
|
|
222
|
+
|
|
223
|
+
// Caller supplied an explicit path — use it directly.
|
|
224
|
+
if (opts && opts.contextPath) {
|
|
225
|
+
return _parseContextFile(opts.contextPath);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Probe all known adapter output paths; return first non-empty index.
|
|
229
|
+
for (const parts of ADAPTER_OUTPUT_PATHS) {
|
|
230
|
+
const contextPath = path.join(cwd, ...parts);
|
|
231
|
+
const index = _parseContextFile(contextPath);
|
|
232
|
+
if (index.size > 0) return index;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
return new Map();
|
|
236
|
+
}
|
|
237
|
+
|
|
183
238
|
/**
|
|
184
239
|
* Format ranked results as a markdown table string.
|
|
185
240
|
*
|