@ricky-stevens/context-guardian 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/CLAUDE.md +11 -1
- package/README.md +26 -39
- package/hooks/session-start.mjs +16 -6
- package/hooks/stop.mjs +34 -50
- package/hooks/submit.mjs +34 -31
- package/lib/checkpoint.mjs +14 -4
- package/lib/config.mjs +37 -10
- package/lib/handoff.mjs +12 -2
- package/lib/statusline.mjs +104 -54
- package/lib/tokens.mjs +2 -16
- package/package.json +1 -1
- package/skills/config/SKILL.md +1 -1
- package/skills/stats/SKILL.md +7 -28
- package/test/checkpoint.test.mjs +2 -2
- package/test/config.test.mjs +39 -0
- package/test/integration.test.mjs +4 -1
- package/test/statusline.test.mjs +116 -6
- package/test/submit.test.mjs +3 -9
- package/test/tokens.test.mjs +2 -40
- package/lib/estimate.mjs +0 -254
- package/test/estimate.test.mjs +0 -262
package/CLAUDE.md
CHANGED
|
@@ -92,11 +92,21 @@ Never chop at a point. Start+end trim: keep first N chars (intent) + last N char
|
|
|
92
92
|
- Compaction checkpoints are also copied to `.context-guardian/cg-checkpoint-*.md` for user visibility
|
|
93
93
|
- `rotateFiles` sorts by mtime (not filename) because label-prefixed filenames break alphabetical chronological ordering
|
|
94
94
|
|
|
95
|
+
## Model & Token Limit Detection
|
|
96
|
+
|
|
97
|
+
The statusline receives the authoritative `context_window_size` and `model.id` directly from Claude Code's session JSON. It persists these to the per-session state file in `~/.claude/cg/`. Hooks read from this file as the primary source — values update immediately after `/model` switches.
|
|
98
|
+
|
|
99
|
+
Fallback chain when the statusline hasn't fired yet (first message): `config.max_tokens` → `200000`.
|
|
100
|
+
|
|
101
|
+
## Adaptive Threshold
|
|
102
|
+
|
|
103
|
+
The compaction threshold scales with context window size: 55% at 200K, 30% at 1M (linear interpolation, clamped [25%, 55%]). Computed by `computeAdaptiveThreshold()` in `lib/config.mjs`. If the user explicitly sets a threshold via `/cg:config threshold X`, the explicit value wins.
|
|
104
|
+
|
|
95
105
|
## Token Counting
|
|
96
106
|
|
|
97
107
|
1. **Real counts (preferred):** `input_tokens + cache_creation_input_tokens + cache_read_input_tokens` from `message.usage` in transcript JSONL. Written by both submit and stop hooks.
|
|
98
108
|
2. **Byte estimation (fallback):** First message only. Content bytes / 4.
|
|
99
|
-
3. **Baseline overhead:** Stop hook captures on first response — irreducible floor (system prompts, tools, CLAUDE.md). Used in
|
|
109
|
+
3. **Baseline overhead:** Stop hook captures on first response — irreducible floor (system prompts, tools, CLAUDE.md). Used in compaction stats and session size calculation.
|
|
100
110
|
|
|
101
111
|
## Session Size (API Payload Monitoring)
|
|
102
112
|
|
package/README.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Context Guardian
|
|
2
2
|
|
|
3
3
|
[](https://github.com/Ricky-Stevens/context-guardian/actions/workflows/ci.yml)
|
|
4
|
-
[](https://github.com/Ricky-Stevens/context-guardian/releases)
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
[](https://sonarcloud.io/summary/new_code?id=Ricky-Stevens_context-guardian)
|
|
7
7
|
[](https://sonarcloud.io/summary/new_code?id=Ricky-Stevens_context-guardian)
|
|
@@ -53,7 +53,7 @@ Context Guardian adds five slash commands:
|
|
|
53
53
|
|
|
54
54
|
### `/cg:stats`
|
|
55
55
|
|
|
56
|
-
Shows current token usage, session size,
|
|
56
|
+
Shows current token usage, session size, threshold, and recommendations.
|
|
57
57
|
|
|
58
58
|
```
|
|
59
59
|
┌─────────────────────────────────────────────────
|
|
@@ -61,16 +61,12 @@ Shows current token usage, session size, compaction estimates, and recommendatio
|
|
|
61
61
|
│
|
|
62
62
|
│ Current usage: 372,000 / 1,000,000 tokens (37.2%)
|
|
63
63
|
│ Session size: 8.4MB / 20MB
|
|
64
|
-
│ Threshold:
|
|
65
|
-
│ Data source: real counts
|
|
66
|
-
│
|
|
64
|
+
│ Threshold: 30% (0% remaining to warning)
|
|
67
65
|
│ Model: claude-opus-4-6 / 1,000,000 tokens
|
|
68
|
-
│ Last updated: 12 seconds ago
|
|
69
|
-
│
|
|
70
|
-
│ /cg:compact ~37.2% → ~5%
|
|
71
|
-
│ /cg:prune ~37.2% → ~3%
|
|
72
66
|
│
|
|
73
|
-
│ /cg:
|
|
67
|
+
│ /cg:compact smart compact — strips file reads, system noise
|
|
68
|
+
│ /cg:prune keep last 10 exchanges only
|
|
69
|
+
│ /cg:handoff [name] save session for later
|
|
74
70
|
│
|
|
75
71
|
└─────────────────────────────────────────────────
|
|
76
72
|
```
|
|
@@ -78,10 +74,10 @@ Shows current token usage, session size, compaction estimates, and recommendatio
|
|
|
78
74
|
### `/cg:config`
|
|
79
75
|
|
|
80
76
|
```bash
|
|
81
|
-
/cg:config # show current config +
|
|
82
|
-
/cg:config threshold 0.50 #
|
|
83
|
-
/cg:config max_tokens 1000000 # override token limit
|
|
84
|
-
/cg:config reset # restore defaults
|
|
77
|
+
/cg:config # show current config + detected model/limit
|
|
78
|
+
/cg:config threshold 0.50 # override adaptive threshold with fixed 50%
|
|
79
|
+
/cg:config max_tokens 1000000 # override detected token limit
|
|
80
|
+
/cg:config reset # restore adaptive defaults
|
|
85
81
|
```
|
|
86
82
|
|
|
87
83
|
### `/cg:compact`
|
|
@@ -143,13 +139,21 @@ The 1M window is powerful, but it requires active management. Context Guardian p
|
|
|
143
139
|
|
|
144
140
|
---
|
|
145
141
|
|
|
146
|
-
##
|
|
142
|
+
## Adaptive Threshold
|
|
143
|
+
|
|
144
|
+
Context Guardian's compaction threshold **scales automatically with the context window size**. Different window sizes need different thresholds — 35% of 200K is very different from 35% of 1M.
|
|
145
|
+
|
|
146
|
+
| Window | Default Threshold | Alert At | Rationale |
|
|
147
|
+
|--------|------------------|----------|-----------|
|
|
148
|
+
| **200K** | 55% | ~110K tokens | System overhead is 25-45K tokens, so a higher threshold maximises usable conversation space |
|
|
149
|
+
| **500K** | 46% | ~230K tokens | Balanced — quality is still strong, plenty of room before auto-compact |
|
|
150
|
+
| **1M** | 30% | ~300K tokens | Context rot research shows measurable quality degradation at 80-150K tokens regardless of window size. A lower threshold catches this earlier. |
|
|
147
151
|
|
|
148
|
-
|
|
152
|
+
Override with `/cg:config threshold <value>` if the adaptive default doesn't suit your workflow.
|
|
149
153
|
|
|
150
|
-
###
|
|
154
|
+
### Why These Numbers?
|
|
151
155
|
|
|
152
|
-
|
|
156
|
+
Research on LLM attention patterns shows a **U-shaped attention curve** — models attend strongly to the beginning and end of context, with weaker attention in the middle. Quality degrades gradually, not at a cliff:
|
|
153
157
|
|
|
154
158
|
| Usage Range | Model Behavior |
|
|
155
159
|
|-------------|---------------|
|
|
@@ -160,7 +164,7 @@ Context Guardian triggers at **35% usage** by default. This is deliberately cons
|
|
|
160
164
|
| **80-95%** | Critical zone. Effective context is much smaller than the raw number suggests. |
|
|
161
165
|
| **95%+** | Emergency auto-compact fires. Everything reduced to a brief summary. |
|
|
162
166
|
|
|
163
|
-
|
|
167
|
+
The adaptive threshold places the alert at the boundary between "strong recall" and "beginning to degrade" for each window size.
|
|
164
168
|
|
|
165
169
|
### What Actually Fills the Context
|
|
166
170
|
|
|
@@ -250,19 +254,9 @@ Skills invoke `compact-cli.mjs` via Bash (since skills don't fire `UserPromptSub
|
|
|
250
254
|
| `/cg:prune` | `lib/compact-cli.mjs recent` → `checkpoint.mjs:performCompaction()` |
|
|
251
255
|
| `/cg:handoff` | `lib/compact-cli.mjs handoff` → `handoff.mjs:performHandoff()` |
|
|
252
256
|
|
|
253
|
-
### Token Counting
|
|
254
|
-
|
|
255
|
-
Two methods, preferring the more accurate. State is written by **both** the submit hook (before the response) and the stop hook (after the response), so `/cg:stats` always reflects the latest counts.
|
|
256
|
-
|
|
257
|
-
1. **Real counts (preferred):** Reads `message.usage` from the most recent assistant message in the transcript JSONL. Calculates `input_tokens + cache_creation_input_tokens + cache_read_input_tokens`. Also detects the model name for auto-detecting max_tokens.
|
|
258
|
-
|
|
259
|
-
2. **Byte estimation (fallback):** Only used on the very first message of a session (before any assistant response). Counts content bytes after the most recent compact marker and divides by 4.
|
|
260
|
-
|
|
261
|
-
3. **Post-compaction estimates:** After compaction or checkpoint restore, a state file is written with estimated post-compaction token counts so `/cg:stats` works immediately.
|
|
262
|
-
|
|
263
257
|
### Baseline Overhead
|
|
264
258
|
|
|
265
|
-
On the first assistant response of each session, the stop hook captures the current token count as `baseline_overhead` - at that point, context is almost entirely system prompts, CLAUDE.md, and tool definitions. This measured value serves as an irreducible floor in
|
|
259
|
+
On the first assistant response of each session, the stop hook captures the current token count as `baseline_overhead` - at that point, context is almost entirely system prompts, CLAUDE.md, and tool definitions. This measured value serves as an irreducible floor in compaction stats and session size calculations.
|
|
266
260
|
|
|
267
261
|
### Statusline
|
|
268
262
|
|
|
@@ -285,16 +279,9 @@ In green/yellow states, labels are dim/grey with only the numbers colored. At re
|
|
|
285
279
|
|
|
286
280
|
The session-start hook **reclaims the statusline** if another tool overwrites it, logging a warning and notifying the user via `additionalContext`.
|
|
287
281
|
|
|
288
|
-
### Model & Token Limit
|
|
289
|
-
|
|
290
|
-
Every assistant message in the transcript includes a `model` field (e.g., `"claude-opus-4-6"`). Context Guardian uses this to set the token limit:
|
|
291
|
-
|
|
292
|
-
- **Opus 4.6+** (major >= 4, minor >= 6): **1,000,000 tokens**
|
|
293
|
-
- **Everything else** (Sonnet, Haiku, older Opus): **200,000 tokens**
|
|
294
|
-
|
|
295
|
-
This is imperfect - I haven't found a better way to do this yet. Contributions or ideas welcome.
|
|
282
|
+
### Model & Token Limit Detection
|
|
296
283
|
|
|
297
|
-
You can override
|
|
284
|
+
Context Guardian automatically detects the actual context window size and model for the current session. The detected values update immediately when you switch models via `/model`. You can override with `/cg:config max_tokens <value>` if needed.
|
|
298
285
|
|
|
299
286
|
### Data Storage
|
|
300
287
|
|
package/hooks/session-start.mjs
CHANGED
|
@@ -3,7 +3,11 @@ import fs from "node:fs";
|
|
|
3
3
|
import os from "node:os";
|
|
4
4
|
import path from "node:path";
|
|
5
5
|
import { log } from "../lib/logger.mjs";
|
|
6
|
-
import {
|
|
6
|
+
import {
|
|
7
|
+
atomicWriteFileSync,
|
|
8
|
+
resolveDataDir,
|
|
9
|
+
STATUSLINE_STATE_DIR,
|
|
10
|
+
} from "../lib/paths.mjs";
|
|
7
11
|
|
|
8
12
|
let input;
|
|
9
13
|
try {
|
|
@@ -15,22 +19,28 @@ try {
|
|
|
15
19
|
|
|
16
20
|
const STALE_MS = 30 * 60 * 1000;
|
|
17
21
|
|
|
18
|
-
// Clean up stale session-scoped state files (state-*.json) in
|
|
19
|
-
//
|
|
22
|
+
// Clean up stale session-scoped state files (state-*.json) in both the primary
|
|
23
|
+
// data dir and the statusline fallback dir (~/.claude/cg/).
|
|
20
24
|
const dataDir = resolveDataDir();
|
|
21
|
-
|
|
25
|
+
for (const dir of new Set([dataDir, STATUSLINE_STATE_DIR])) {
|
|
26
|
+
if (!fs.existsSync(dir)) continue;
|
|
22
27
|
try {
|
|
23
28
|
const now3 = Date.now();
|
|
24
29
|
for (const f of fs
|
|
25
|
-
.readdirSync(
|
|
30
|
+
.readdirSync(dir)
|
|
26
31
|
.filter((f) => f.startsWith("state-") && f.endsWith(".json"))) {
|
|
27
|
-
const filePath = path.join(
|
|
32
|
+
const filePath = path.join(dir, f);
|
|
28
33
|
try {
|
|
29
34
|
if (now3 - fs.statSync(filePath).mtimeMs > STALE_MS) {
|
|
30
35
|
fs.unlinkSync(filePath);
|
|
31
36
|
}
|
|
32
37
|
} catch {}
|
|
33
38
|
}
|
|
39
|
+
// Remove legacy cc-context-window.json (context_window_size is now in state files)
|
|
40
|
+
const legacyFile = path.join(dir, "cc-context-window.json");
|
|
41
|
+
try {
|
|
42
|
+
fs.unlinkSync(legacyFile);
|
|
43
|
+
} catch {}
|
|
34
44
|
} catch {}
|
|
35
45
|
}
|
|
36
46
|
|
package/hooks/stop.mjs
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import fs from "node:fs";
|
|
3
|
-
import {
|
|
4
|
-
import { estimateSavings } from "../lib/estimate.mjs";
|
|
3
|
+
import { adaptiveThreshold, resolveMaxTokens } from "../lib/config.mjs";
|
|
5
4
|
import { log } from "../lib/logger.mjs";
|
|
6
5
|
import {
|
|
7
6
|
atomicWriteFileSync,
|
|
@@ -15,10 +14,8 @@ import { estimateTokens, getTokenUsage } from "../lib/tokens.mjs";
|
|
|
15
14
|
// ---------------------------------------------------------------------------
|
|
16
15
|
// Stop hook — writes fresh token counts after each assistant response.
|
|
17
16
|
//
|
|
18
|
-
//
|
|
19
|
-
//
|
|
20
|
-
// updates the token counts (cheap — tail-reads 32KB) and carries forward the
|
|
21
|
-
// existing savings estimates from the state file.
|
|
17
|
+
// Lightweight: tail-reads 32KB of the transcript for token counts, captures
|
|
18
|
+
// baseline overhead on the first 2 responses, and writes state.
|
|
22
19
|
// ---------------------------------------------------------------------------
|
|
23
20
|
let input;
|
|
24
21
|
try {
|
|
@@ -39,29 +36,12 @@ try {
|
|
|
39
36
|
payloadBytes = fs.statSync(transcript_path).size;
|
|
40
37
|
} catch {}
|
|
41
38
|
|
|
42
|
-
const cfg = loadConfig();
|
|
43
|
-
const threshold = cfg.threshold ?? 0.35;
|
|
44
|
-
|
|
45
39
|
const realUsage = getTokenUsage(transcript_path);
|
|
46
40
|
const currentTokens = realUsage
|
|
47
41
|
? realUsage.current_tokens
|
|
48
42
|
: estimateTokens(transcript_path);
|
|
49
|
-
const maxTokens = realUsage?.max_tokens || resolveMaxTokens() || 200000;
|
|
50
|
-
const pct = currentTokens / maxTokens;
|
|
51
43
|
const source = realUsage ? "real" : "estimated";
|
|
52
44
|
|
|
53
|
-
const headroom = Math.max(0, Math.round(maxTokens * threshold - currentTokens));
|
|
54
|
-
const pctDisplay = (pct * 100).toFixed(1);
|
|
55
|
-
const thresholdDisplay = Math.round(threshold * 100);
|
|
56
|
-
let recommendation;
|
|
57
|
-
if (pct < threshold * 0.5)
|
|
58
|
-
recommendation = "All clear. Plenty of context remaining.";
|
|
59
|
-
else if (pct < threshold)
|
|
60
|
-
recommendation = "Approaching threshold. Consider wrapping up complex tasks.";
|
|
61
|
-
else
|
|
62
|
-
recommendation =
|
|
63
|
-
"At threshold. Compaction recommended — run /cg:compact or /cg:prune.";
|
|
64
|
-
|
|
65
45
|
// Don't overwrite a recent state file with estimated data — checkpoint writes
|
|
66
46
|
// or the submit hook may have written accurate post-compaction counts that we'd clobber.
|
|
67
47
|
if (source === "estimated") {
|
|
@@ -77,24 +57,47 @@ if (source === "estimated") {
|
|
|
77
57
|
} catch {}
|
|
78
58
|
}
|
|
79
59
|
|
|
80
|
-
//
|
|
81
|
-
// This avoids re-reading and re-parsing the full transcript (~50MB at scale).
|
|
82
|
-
let smartEstimatePct = 0;
|
|
83
|
-
let recentEstimatePct = 0;
|
|
60
|
+
// Read previous state for carry-forward values.
|
|
84
61
|
let baselineOverhead = 0;
|
|
85
62
|
let baselineResponseCount = 0;
|
|
86
63
|
try {
|
|
87
64
|
const sf = stateFile(session_id);
|
|
88
65
|
if (fs.existsSync(sf)) {
|
|
89
66
|
const prev = JSON.parse(fs.readFileSync(sf, "utf8"));
|
|
90
|
-
smartEstimatePct = prev.smart_estimate_pct ?? 0;
|
|
91
|
-
recentEstimatePct = prev.recent_estimate_pct ?? 0;
|
|
92
67
|
baselineOverhead = prev.baseline_overhead ?? 0;
|
|
93
68
|
baselineResponseCount = prev.baseline_response_count ?? 0;
|
|
94
69
|
}
|
|
95
70
|
} catch (e) {
|
|
96
71
|
log(`state-read-error session=${session_id}: ${e.message}`);
|
|
97
72
|
}
|
|
73
|
+
// The statusline state file (~/.claude/cg/) is the primary source for
|
|
74
|
+
// context_window_size and model — the statusline receives these directly
|
|
75
|
+
// from Claude Code and is always authoritative, including after /model switches.
|
|
76
|
+
let ccContextWindowSize = null;
|
|
77
|
+
let ccModelId = null;
|
|
78
|
+
try {
|
|
79
|
+
const slFile = statuslineStateFile(session_id);
|
|
80
|
+
if (fs.existsSync(slFile)) {
|
|
81
|
+
const slState = JSON.parse(fs.readFileSync(slFile, "utf8"));
|
|
82
|
+
ccContextWindowSize = slState.context_window_size ?? null;
|
|
83
|
+
ccModelId = slState.cc_model_id ?? null;
|
|
84
|
+
}
|
|
85
|
+
} catch {}
|
|
86
|
+
const maxTokens = ccContextWindowSize || resolveMaxTokens() || 200000;
|
|
87
|
+
const threshold = adaptiveThreshold(maxTokens);
|
|
88
|
+
const pct = currentTokens / maxTokens;
|
|
89
|
+
|
|
90
|
+
const headroom = Math.max(0, Math.round(maxTokens * threshold - currentTokens));
|
|
91
|
+
const pctDisplay = (pct * 100).toFixed(1);
|
|
92
|
+
const thresholdDisplay = Math.round(threshold * 100);
|
|
93
|
+
let recommendation;
|
|
94
|
+
if (pct < threshold * 0.5)
|
|
95
|
+
recommendation = "All clear. Plenty of context remaining.";
|
|
96
|
+
else if (pct < threshold)
|
|
97
|
+
recommendation = "Approaching threshold. Consider wrapping up complex tasks.";
|
|
98
|
+
else
|
|
99
|
+
recommendation =
|
|
100
|
+
"At threshold. Compaction recommended — run /cg:compact or /cg:prune.";
|
|
98
101
|
|
|
99
102
|
if (baselineResponseCount < 2 && currentTokens > 0) {
|
|
100
103
|
if (baselineOverhead) {
|
|
@@ -106,24 +109,6 @@ if (baselineResponseCount < 2 && currentTokens > 0) {
|
|
|
106
109
|
log(
|
|
107
110
|
`baseline-overhead session=${session_id} tokens=${baselineOverhead} response=${baselineResponseCount}`,
|
|
108
111
|
);
|
|
109
|
-
|
|
110
|
-
// Recompute estimates now that we have the baseline — the submit hook ran
|
|
111
|
-
// before us and wrote 0 estimates because it didn't have the baseline yet.
|
|
112
|
-
try {
|
|
113
|
-
const savings = estimateSavings(
|
|
114
|
-
transcript_path,
|
|
115
|
-
currentTokens,
|
|
116
|
-
maxTokens,
|
|
117
|
-
baselineOverhead,
|
|
118
|
-
);
|
|
119
|
-
smartEstimatePct = savings.smartPct;
|
|
120
|
-
recentEstimatePct = savings.recentPct;
|
|
121
|
-
log(
|
|
122
|
-
`baseline-recompute session=${session_id} smart=${smartEstimatePct}% recent=${recentEstimatePct}%`,
|
|
123
|
-
);
|
|
124
|
-
} catch (e) {
|
|
125
|
-
log(`baseline-recompute-error: ${e.message}`);
|
|
126
|
-
}
|
|
127
112
|
}
|
|
128
113
|
|
|
129
114
|
try {
|
|
@@ -135,6 +120,7 @@ try {
|
|
|
135
120
|
const stateJson = JSON.stringify({
|
|
136
121
|
current_tokens: currentTokens,
|
|
137
122
|
max_tokens: maxTokens,
|
|
123
|
+
context_window_size: ccContextWindowSize,
|
|
138
124
|
pct,
|
|
139
125
|
pct_display: pctDisplay,
|
|
140
126
|
threshold,
|
|
@@ -143,9 +129,7 @@ try {
|
|
|
143
129
|
headroom,
|
|
144
130
|
recommendation,
|
|
145
131
|
source,
|
|
146
|
-
model: realUsage?.model || "unknown",
|
|
147
|
-
smart_estimate_pct: smartEstimatePct,
|
|
148
|
-
recent_estimate_pct: recentEstimatePct,
|
|
132
|
+
model: ccModelId || realUsage?.model || "unknown",
|
|
149
133
|
baseline_overhead: baselineOverhead,
|
|
150
134
|
baseline_response_count: baselineResponseCount,
|
|
151
135
|
payload_bytes: payloadBytes,
|
package/hooks/submit.mjs
CHANGED
|
@@ -8,8 +8,7 @@
|
|
|
8
8
|
* @module submit-hook
|
|
9
9
|
*/
|
|
10
10
|
import fs from "node:fs";
|
|
11
|
-
import {
|
|
12
|
-
import { estimateSavings } from "../lib/estimate.mjs";
|
|
11
|
+
import { adaptiveThreshold, resolveMaxTokens } from "../lib/config.mjs";
|
|
13
12
|
import { log } from "../lib/logger.mjs";
|
|
14
13
|
import {
|
|
15
14
|
atomicWriteFileSync,
|
|
@@ -45,17 +44,40 @@ try {
|
|
|
45
44
|
payloadBytes = fs.statSync(transcript_path).size;
|
|
46
45
|
} catch {}
|
|
47
46
|
|
|
48
|
-
const cfg = loadConfig();
|
|
49
|
-
const threshold = cfg.threshold ?? 0.35;
|
|
50
|
-
|
|
51
47
|
const realUsage = getTokenUsage(transcript_path);
|
|
52
48
|
const currentTokens = realUsage
|
|
53
49
|
? realUsage.current_tokens
|
|
54
50
|
: estimateTokens(transcript_path);
|
|
55
|
-
const maxTokens = realUsage?.max_tokens || resolveMaxTokens() || 200000;
|
|
56
|
-
const pct = currentTokens / maxTokens;
|
|
57
51
|
const source = realUsage ? "real" : "estimated";
|
|
58
52
|
|
|
53
|
+
// Read previous state for baseline overhead.
|
|
54
|
+
let baselineOverhead = 0;
|
|
55
|
+
try {
|
|
56
|
+
const sf = stateFile(session_id);
|
|
57
|
+
if (fs.existsSync(sf)) {
|
|
58
|
+
const prev = JSON.parse(fs.readFileSync(sf, "utf8"));
|
|
59
|
+
baselineOverhead = prev.baseline_overhead ?? 0;
|
|
60
|
+
}
|
|
61
|
+
} catch (e) {
|
|
62
|
+
log(`state-read-error session=${session_id}: ${e.message}`);
|
|
63
|
+
}
|
|
64
|
+
// The statusline state file (~/.claude/cg/) is the primary source for
|
|
65
|
+
// context_window_size and model — the statusline receives these directly
|
|
66
|
+
// from Claude Code and is always authoritative, including after /model switches.
|
|
67
|
+
let ccContextWindowSize = null;
|
|
68
|
+
let ccModelId = null;
|
|
69
|
+
try {
|
|
70
|
+
const slFile = statuslineStateFile(session_id);
|
|
71
|
+
if (fs.existsSync(slFile)) {
|
|
72
|
+
const slState = JSON.parse(fs.readFileSync(slFile, "utf8"));
|
|
73
|
+
ccContextWindowSize = slState.context_window_size ?? null;
|
|
74
|
+
ccModelId = slState.cc_model_id ?? null;
|
|
75
|
+
}
|
|
76
|
+
} catch {}
|
|
77
|
+
const maxTokens = ccContextWindowSize || resolveMaxTokens() || 200000;
|
|
78
|
+
const threshold = adaptiveThreshold(maxTokens);
|
|
79
|
+
const pct = currentTokens / maxTokens;
|
|
80
|
+
|
|
59
81
|
log(
|
|
60
82
|
`check session=${session_id} tokens=${currentTokens}/${maxTokens} pct=${(pct * 100).toFixed(1)}% threshold=${(threshold * 100).toFixed(0)}% source=${source}`,
|
|
61
83
|
);
|
|
@@ -73,34 +95,16 @@ else
|
|
|
73
95
|
recommendation =
|
|
74
96
|
"At threshold. Compaction recommended — run /cg:compact or /cg:prune.";
|
|
75
97
|
|
|
76
|
-
// Read measured baseline overhead from state (captured by stop hook on first response)
|
|
77
|
-
let baselineOverhead = 0;
|
|
78
|
-
try {
|
|
79
|
-
const sf = stateFile(session_id);
|
|
80
|
-
if (fs.existsSync(sf)) {
|
|
81
|
-
const prev = JSON.parse(fs.readFileSync(sf, "utf8"));
|
|
82
|
-
baselineOverhead = prev.baseline_overhead ?? 0;
|
|
83
|
-
}
|
|
84
|
-
} catch (e) {
|
|
85
|
-
log(`state-read-error session=${session_id}: ${e.message}`);
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
const savings = estimateSavings(
|
|
89
|
-
transcript_path,
|
|
90
|
-
currentTokens,
|
|
91
|
-
maxTokens,
|
|
92
|
-
baselineOverhead,
|
|
93
|
-
);
|
|
94
|
-
|
|
95
98
|
try {
|
|
96
99
|
ensureDataDir();
|
|
97
100
|
const remaining = Math.max(
|
|
98
101
|
0,
|
|
99
102
|
Math.round(thresholdDisplay - Number.parseFloat(pctDisplay)),
|
|
100
103
|
);
|
|
101
|
-
const
|
|
104
|
+
const stateObj = {
|
|
102
105
|
current_tokens: currentTokens,
|
|
103
106
|
max_tokens: maxTokens,
|
|
107
|
+
context_window_size: ccContextWindowSize,
|
|
104
108
|
pct,
|
|
105
109
|
pct_display: pctDisplay,
|
|
106
110
|
threshold,
|
|
@@ -109,15 +113,14 @@ try {
|
|
|
109
113
|
headroom,
|
|
110
114
|
recommendation,
|
|
111
115
|
source,
|
|
112
|
-
model: realUsage?.model || "unknown",
|
|
113
|
-
smart_estimate_pct: savings.smartPct,
|
|
114
|
-
recent_estimate_pct: savings.recentPct,
|
|
116
|
+
model: ccModelId || realUsage?.model || "unknown",
|
|
115
117
|
baseline_overhead: baselineOverhead,
|
|
116
118
|
payload_bytes: payloadBytes,
|
|
117
119
|
session_id,
|
|
118
120
|
transcript_path,
|
|
119
121
|
ts: Date.now(),
|
|
120
|
-
}
|
|
122
|
+
};
|
|
123
|
+
const stateJson = JSON.stringify(stateObj);
|
|
121
124
|
atomicWriteFileSync(stateFile(session_id), stateJson);
|
|
122
125
|
|
|
123
126
|
// Also write to fixed fallback location so the statusline can find it
|
package/lib/checkpoint.mjs
CHANGED
|
@@ -17,6 +17,7 @@ import {
|
|
|
17
17
|
ensureDataDir,
|
|
18
18
|
rotateCheckpoints,
|
|
19
19
|
stateFile,
|
|
20
|
+
statuslineStateFile,
|
|
20
21
|
} from "./paths.mjs";
|
|
21
22
|
import { formatCompactionStats } from "./stats.mjs";
|
|
22
23
|
import { estimateOverhead, estimateTokens, getTokenUsage } from "./tokens.mjs";
|
|
@@ -127,8 +128,6 @@ export function writeCompactionState(
|
|
|
127
128
|
recommendation: rec,
|
|
128
129
|
source: "estimated",
|
|
129
130
|
model: "unknown",
|
|
130
|
-
smart_estimate_pct: 0,
|
|
131
|
-
recent_estimate_pct: 0,
|
|
132
131
|
baseline_overhead: baselineOverhead,
|
|
133
132
|
payload_bytes: payloadBytes,
|
|
134
133
|
session_id: sessionId,
|
|
@@ -175,7 +174,17 @@ export function performCompaction(opts) {
|
|
|
175
174
|
|
|
176
175
|
// Extract and cap content
|
|
177
176
|
const usage = getTokenUsage(transcriptPath);
|
|
178
|
-
|
|
177
|
+
|
|
178
|
+
// Read authoritative context_window_size from statusline state file.
|
|
179
|
+
let ccContextWindowSize = null;
|
|
180
|
+
try {
|
|
181
|
+
const slFile = statuslineStateFile(sessionId);
|
|
182
|
+
if (fs.existsSync(slFile)) {
|
|
183
|
+
const slState = JSON.parse(fs.readFileSync(slFile, "utf8"));
|
|
184
|
+
ccContextWindowSize = slState.context_window_size ?? null;
|
|
185
|
+
}
|
|
186
|
+
} catch {}
|
|
187
|
+
const capMax = ccContextWindowSize || resolveMaxTokens() || 200000;
|
|
179
188
|
let content =
|
|
180
189
|
mode === "smart"
|
|
181
190
|
? extractConversation(transcriptPath)
|
|
@@ -217,7 +226,8 @@ export function performCompaction(opts) {
|
|
|
217
226
|
preStats?.currentTokens ||
|
|
218
227
|
usage?.current_tokens ||
|
|
219
228
|
estimateTokens(transcriptPath);
|
|
220
|
-
const preMax =
|
|
229
|
+
const preMax =
|
|
230
|
+
preStats?.maxTokens || ccContextWindowSize || resolveMaxTokens();
|
|
221
231
|
|
|
222
232
|
// Read baseline overhead from state file if available
|
|
223
233
|
let baselineOverhead = 0;
|
package/lib/config.mjs
CHANGED
|
@@ -14,14 +14,14 @@ let _cachedConfig = null;
|
|
|
14
14
|
|
|
15
15
|
export function loadConfig() {
|
|
16
16
|
if (_cachedConfig) return _cachedConfig;
|
|
17
|
+
let raw = {};
|
|
17
18
|
try {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
}
|
|
19
|
+
raw = JSON.parse(fs.readFileSync(CONFIG_FILE, "utf8"));
|
|
20
|
+
} catch {}
|
|
21
|
+
_cachedConfig = { ...DEFAULT_CONFIG, ...raw };
|
|
22
|
+
// Track whether the user explicitly set a threshold via /cg:config.
|
|
23
|
+
// If not, hooks and statusline use the adaptive threshold instead.
|
|
24
|
+
_cachedConfig._thresholdExplicit = "threshold" in raw;
|
|
25
25
|
return _cachedConfig;
|
|
26
26
|
}
|
|
27
27
|
|
|
@@ -30,11 +30,38 @@ export function loadConfig() {
|
|
|
30
30
|
// 1. Explicit max_tokens in config (covers most cases)
|
|
31
31
|
// 2. Safe default (200K)
|
|
32
32
|
//
|
|
33
|
-
// The
|
|
34
|
-
//
|
|
35
|
-
//
|
|
33
|
+
// The statusline writes the authoritative context_window_size to the
|
|
34
|
+
// per-session state file. This config value is the fallback before the
|
|
35
|
+
// statusline has fired.
|
|
36
36
|
// ---------------------------------------------------------------------------
|
|
37
37
|
export function resolveMaxTokens() {
|
|
38
38
|
const cfg = loadConfig();
|
|
39
39
|
return cfg.max_tokens ?? 200000;
|
|
40
40
|
}
|
|
41
|
+
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// Adaptive threshold — scales with context window size.
|
|
44
|
+
//
|
|
45
|
+
// Context rot research shows quality degrades measurably at 80-150K tokens
|
|
46
|
+
// regardless of window size. A 200K window needs a higher threshold (alert
|
|
47
|
+
// earlier as a %) because system overhead eats a large share. A 1M window
|
|
48
|
+
// needs a lower threshold so the alert fires before quality degrades.
|
|
49
|
+
//
|
|
50
|
+
// 200K → 55% (alert at 110K tokens)
|
|
51
|
+
// 500K → 46% (alert at 230K tokens)
|
|
52
|
+
// 1M → 30% (alert at 300K tokens)
|
|
53
|
+
//
|
|
54
|
+
// If the user explicitly set a threshold via /cg:config, that wins.
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
export function adaptiveThreshold(maxTokens) {
|
|
57
|
+
const cfg = loadConfig();
|
|
58
|
+
if (cfg._thresholdExplicit) return cfg.threshold;
|
|
59
|
+
return computeAdaptiveThreshold(maxTokens ?? cfg.max_tokens ?? 200000);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function computeAdaptiveThreshold(maxTokens) {
|
|
63
|
+
return Math.min(
|
|
64
|
+
0.55,
|
|
65
|
+
Math.max(0.25, 0.55 - ((maxTokens - 200000) * 0.25) / 800000),
|
|
66
|
+
);
|
|
67
|
+
}
|
package/lib/handoff.mjs
CHANGED
|
@@ -13,7 +13,7 @@ import fs from "node:fs";
|
|
|
13
13
|
import path from "node:path";
|
|
14
14
|
import { resolveMaxTokens } from "./config.mjs";
|
|
15
15
|
import { log } from "./logger.mjs";
|
|
16
|
-
import { stateFile } from "./paths.mjs";
|
|
16
|
+
import { stateFile, statuslineStateFile } from "./paths.mjs";
|
|
17
17
|
import { estimateOverhead, getTokenUsage } from "./tokens.mjs";
|
|
18
18
|
import { extractConversation } from "./transcript.mjs";
|
|
19
19
|
|
|
@@ -71,7 +71,17 @@ export function performHandoff({ transcriptPath, sessionId, label = "" }) {
|
|
|
71
71
|
const usage = getTokenUsage(transcriptPath);
|
|
72
72
|
const preTokens =
|
|
73
73
|
usage?.current_tokens || Math.round(Buffer.byteLength(content, "utf8") / 4);
|
|
74
|
-
|
|
74
|
+
|
|
75
|
+
// Read authoritative context_window_size from statusline state file.
|
|
76
|
+
let ccContextWindowSize = null;
|
|
77
|
+
try {
|
|
78
|
+
const slFile = statuslineStateFile(sessionId);
|
|
79
|
+
if (fs.existsSync(slFile)) {
|
|
80
|
+
const slState = JSON.parse(fs.readFileSync(slFile, "utf8"));
|
|
81
|
+
ccContextWindowSize = slState.context_window_size ?? null;
|
|
82
|
+
}
|
|
83
|
+
} catch {}
|
|
84
|
+
const maxTokens = ccContextWindowSize || resolveMaxTokens() || 200000;
|
|
75
85
|
const postTokens = Math.round(Buffer.byteLength(fullContent, "utf8") / 4);
|
|
76
86
|
|
|
77
87
|
let baselineOverhead = 0;
|