@ricky-stevens/context-guardian 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@
9
9
  "name": "cg",
10
10
  "source": "./",
11
11
  "description": "Automatic context window monitoring and smart compaction for Claude Code",
12
- "version": "2.1.0",
12
+ "version": "2.2.0",
13
13
  "author": {
14
14
  "name": "Ricky"
15
15
  },
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cg",
3
- "version": "2.1.0",
3
+ "version": "2.2.0",
4
4
  "description": "Automatic context window monitoring and smart compaction for Claude Code",
5
5
  "author": {
6
6
  "name": "Ricky Stevens",
package/CLAUDE.md CHANGED
@@ -92,11 +92,21 @@ Never chop at a point. Start+end trim: keep first N chars (intent) + last N char
92
92
  - Compaction checkpoints are also copied to `.context-guardian/cg-checkpoint-*.md` for user visibility
93
93
  - `rotateFiles` sorts by mtime (not filename) because label-prefixed filenames break alphabetical chronological ordering
94
94
 
95
+ ## Model & Token Limit Detection
96
+
97
+ The statusline receives the authoritative `context_window_size` and `model.id` directly from Claude Code's session JSON. It persists these to the per-session state file in `~/.claude/cg/`. Hooks read from this file as the primary source — values update immediately after `/model` switches.
98
+
99
+ Fallback chain when the statusline hasn't fired yet (first message): `config.max_tokens` → `200000`.
100
+
101
+ ## Adaptive Threshold
102
+
103
+ The compaction threshold scales with context window size: 55% at 200K, 30% at 1M (linear interpolation, clamped [25%, 55%]). Computed by `computeAdaptiveThreshold()` in `lib/config.mjs`. If the user explicitly sets a threshold via `/cg:config threshold X`, the explicit value wins.
104
+
95
105
  ## Token Counting
96
106
 
97
107
  1. **Real counts (preferred):** `input_tokens + cache_creation_input_tokens + cache_read_input_tokens` from `message.usage` in transcript JSONL. Written by both submit and stop hooks.
98
108
  2. **Byte estimation (fallback):** First message only. Content bytes / 4.
99
- 3. **Baseline overhead:** Stop hook captures on first response — irreducible floor (system prompts, tools, CLAUDE.md). Used in all savings estimates and session size calculation.
109
+ 3. **Baseline overhead:** Stop hook captures on first response — irreducible floor (system prompts, tools, CLAUDE.md). Used in compaction stats and session size calculation.
100
110
 
101
111
  ## Session Size (API Payload Monitoring)
102
112
 
package/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # Context Guardian
2
2
 
3
3
  [![CI](https://github.com/Ricky-Stevens/context-guardian/actions/workflows/ci.yml/badge.svg)](https://github.com/Ricky-Stevens/context-guardian/actions/workflows/ci.yml)
4
- [![Version](https://img.shields.io/badge/version-2.1.0-blue)](https://github.com/Ricky-Stevens/context-guardian/releases)
4
+ [![Version](https://img.shields.io/badge/version-2.2.0-blue)](https://github.com/Ricky-Stevens/context-guardian/releases)
5
5
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
6
  [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=Ricky-Stevens_context-guardian&metric=alert_status)](https://sonarcloud.io/summary/new_code?id=Ricky-Stevens_context-guardian)
7
7
  [![Coverage](https://sonarcloud.io/api/project_badges/measure?project=Ricky-Stevens_context-guardian&metric=coverage)](https://sonarcloud.io/summary/new_code?id=Ricky-Stevens_context-guardian)
@@ -53,7 +53,7 @@ Context Guardian adds five slash commands:
53
53
 
54
54
  ### `/cg:stats`
55
55
 
56
- Shows current token usage, session size, compaction estimates, and recommendations.
56
+ Shows current token usage, session size, threshold, and recommendations.
57
57
 
58
58
  ```
59
59
  ┌─────────────────────────────────────────────────
@@ -61,16 +61,12 @@ Shows current token usage, session size, compaction estimates, and recommendatio
61
61
 
62
62
  │ Current usage: 372,000 / 1,000,000 tokens (37.2%)
63
63
  │ Session size: 8.4MB / 20MB
64
- │ Threshold: 35% (0% remaining to warning)
65
- │ Data source: real counts
66
-
64
+ │ Threshold: 30% (0% remaining to warning)
67
65
  │ Model: claude-opus-4-6 / 1,000,000 tokens
68
- │ Last updated: 12 seconds ago
69
-
70
- │ /cg:compact ~37.2% → ~5%
71
- │ /cg:prune ~37.2% → ~3%
72
66
 
73
- │ /cg:handoff [name] save session for later
67
+ │ /cg:compact smart compact strips file reads, system noise
68
+ │ /cg:prune keep last 10 exchanges only
69
+ │ /cg:handoff [name] save session for later
74
70
 
75
71
  └─────────────────────────────────────────────────
76
72
  ```
@@ -78,10 +74,10 @@ Shows current token usage, session size, compaction estimates, and recommendatio
78
74
  ### `/cg:config`
79
75
 
80
76
  ```bash
81
- /cg:config # show current config + auto-detected model/limit
82
- /cg:config threshold 0.50 # trigger at 50%
83
- /cg:config max_tokens 1000000 # override token limit
84
- /cg:config reset # restore defaults
77
+ /cg:config # show current config + detected model/limit
78
+ /cg:config threshold 0.50 # override adaptive threshold with fixed 50%
79
+ /cg:config max_tokens 1000000 # override detected token limit
80
+ /cg:config reset # restore adaptive defaults
85
81
  ```
86
82
 
87
83
  ### `/cg:compact`
@@ -143,13 +139,21 @@ The 1M window is powerful, but it requires active management. Context Guardian p
143
139
 
144
140
  ---
145
141
 
146
- ## Why 35%?
142
+ ## Adaptive Threshold
143
+
144
+ Context Guardian's compaction threshold **scales automatically with the context window size**. Different window sizes need different thresholds — 35% of 200K is very different from 35% of 1M.
145
+
146
+ | Window | Default Threshold | Alert At | Rationale |
147
+ |--------|------------------|----------|-----------|
148
+ | **200K** | 55% | ~110K tokens | System overhead is 25-45K tokens, so a higher threshold maximises usable conversation space |
149
+ | **500K** | 46% | ~230K tokens | Balanced — quality is still strong, plenty of room before auto-compact |
150
+ | **1M** | 30% | ~300K tokens | Context rot research shows measurable quality degradation at 80-150K tokens regardless of window size. A lower threshold catches this earlier. |
147
151
 
148
- Context Guardian triggers at **35% usage** by default. This is deliberately conservative.
152
+ Override with `/cg:config threshold <value>` if the adaptive default doesn't suit your workflow.
149
153
 
150
- ### The Sweet Spot for Model Recall
154
+ ### Why These Numbers?
151
155
 
152
- [Research](https://news.mit.edu/2025/unpacking-large-language-model-bias-0617) on LLM attention patterns shows that models have a **U-shaped attention curve** - they attend strongly to the beginning and end of context, with weaker attention in the middle. As context grows:
156
+ Research on LLM attention patterns shows a **U-shaped attention curve** models attend strongly to the beginning and end of context, with weaker attention in the middle. Quality degrades gradually, not at a cliff:
153
157
 
154
158
  | Usage Range | Model Behavior |
155
159
  |-------------|---------------|
@@ -160,7 +164,7 @@ Context Guardian triggers at **35% usage** by default. This is deliberately cons
160
164
  | **80-95%** | Critical zone. Effective context is much smaller than the raw number suggests. |
161
165
  | **95%+** | Emergency auto-compact fires. Everything reduced to a brief summary. |
162
166
 
163
- **35% sits at the boundary between "full recall" and "beginning to degrade."** It's the last point where you can extract with full confidence that the output will be accurate, because Claude still has strong attention over the entire conversation.
167
+ The adaptive threshold places the alert at the boundary between "strong recall" and "beginning to degrade" for each window size.
164
168
 
165
169
  ### What Actually Fills the Context
166
170
 
@@ -250,19 +254,9 @@ Skills invoke `compact-cli.mjs` via Bash (since skills don't fire `UserPromptSub
250
254
  | `/cg:prune` | `lib/compact-cli.mjs recent` → `checkpoint.mjs:performCompaction()` |
251
255
  | `/cg:handoff` | `lib/compact-cli.mjs handoff` → `handoff.mjs:performHandoff()` |
252
256
 
253
- ### Token Counting
254
-
255
- Two methods, preferring the more accurate. State is written by **both** the submit hook (before the response) and the stop hook (after the response), so `/cg:stats` always reflects the latest counts.
256
-
257
- 1. **Real counts (preferred):** Reads `message.usage` from the most recent assistant message in the transcript JSONL. Calculates `input_tokens + cache_creation_input_tokens + cache_read_input_tokens`. Also detects the model name for auto-detecting max_tokens.
258
-
259
- 2. **Byte estimation (fallback):** Only used on the very first message of a session (before any assistant response). Counts content bytes after the most recent compact marker and divides by 4.
260
-
261
- 3. **Post-compaction estimates:** After compaction or checkpoint restore, a state file is written with estimated post-compaction token counts so `/cg:stats` works immediately.
262
-
263
257
  ### Baseline Overhead
264
258
 
265
- On the first assistant response of each session, the stop hook captures the current token count as `baseline_overhead` - at that point, context is almost entirely system prompts, CLAUDE.md, and tool definitions. This measured value serves as an irreducible floor in all compaction savings estimates.
259
+ On the first assistant response of each session, the stop hook captures the current token count as `baseline_overhead` - at that point, context is almost entirely system prompts, CLAUDE.md, and tool definitions. This measured value serves as an irreducible floor in compaction stats and session size calculations.
266
260
 
267
261
  ### Statusline
268
262
 
@@ -285,16 +279,9 @@ In green/yellow states, labels are dim/grey with only the numbers colored. At re
285
279
 
286
280
  The session-start hook **reclaims the statusline** if another tool overwrites it, logging a warning and notifying the user via `additionalContext`.
287
281
 
288
- ### Model & Token Limit Auto-Detection
289
-
290
- Every assistant message in the transcript includes a `model` field (e.g., `"claude-opus-4-6"`). Context Guardian uses this to set the token limit:
291
-
292
- - **Opus 4.6+** (major >= 4, minor >= 6): **1,000,000 tokens**
293
- - **Everything else** (Sonnet, Haiku, older Opus): **200,000 tokens**
294
-
295
- This is imperfect - I haven't found a better way to do this yet. Contributions or ideas welcome.
282
+ ### Model & Token Limit Detection
296
283
 
297
- You can override this with `/cg:config max_tokens <value>` if the auto-detection doesn't match your setup.
284
+ Context Guardian automatically detects the actual context window size and model for the current session. The detected values update immediately when you switch models via `/model`. You can override with `/cg:config max_tokens <value>` if needed.
298
285
 
299
286
  ### Data Storage
300
287
 
@@ -3,7 +3,11 @@ import fs from "node:fs";
3
3
  import os from "node:os";
4
4
  import path from "node:path";
5
5
  import { log } from "../lib/logger.mjs";
6
- import { atomicWriteFileSync, resolveDataDir } from "../lib/paths.mjs";
6
+ import {
7
+ atomicWriteFileSync,
8
+ resolveDataDir,
9
+ STATUSLINE_STATE_DIR,
10
+ } from "../lib/paths.mjs";
7
11
 
8
12
  let input;
9
13
  try {
@@ -15,22 +19,28 @@ try {
15
19
 
16
20
  const STALE_MS = 30 * 60 * 1000;
17
21
 
18
- // Clean up stale session-scoped state files (state-*.json) in data dir.
19
- // Each session writes its own state file; old ones accumulate.
22
+ // Clean up stale session-scoped state files (state-*.json) in both the primary
23
+ // data dir and the statusline fallback dir (~/.claude/cg/).
20
24
  const dataDir = resolveDataDir();
21
- if (fs.existsSync(dataDir)) {
25
+ for (const dir of new Set([dataDir, STATUSLINE_STATE_DIR])) {
26
+ if (!fs.existsSync(dir)) continue;
22
27
  try {
23
28
  const now3 = Date.now();
24
29
  for (const f of fs
25
- .readdirSync(dataDir)
30
+ .readdirSync(dir)
26
31
  .filter((f) => f.startsWith("state-") && f.endsWith(".json"))) {
27
- const filePath = path.join(dataDir, f);
32
+ const filePath = path.join(dir, f);
28
33
  try {
29
34
  if (now3 - fs.statSync(filePath).mtimeMs > STALE_MS) {
30
35
  fs.unlinkSync(filePath);
31
36
  }
32
37
  } catch {}
33
38
  }
39
+ // Remove legacy cc-context-window.json (context_window_size is now in state files)
40
+ const legacyFile = path.join(dir, "cc-context-window.json");
41
+ try {
42
+ fs.unlinkSync(legacyFile);
43
+ } catch {}
34
44
  } catch {}
35
45
  }
36
46
 
package/hooks/stop.mjs CHANGED
@@ -1,7 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
  import fs from "node:fs";
3
- import { loadConfig, resolveMaxTokens } from "../lib/config.mjs";
4
- import { estimateSavings } from "../lib/estimate.mjs";
3
+ import { adaptiveThreshold, resolveMaxTokens } from "../lib/config.mjs";
5
4
  import { log } from "../lib/logger.mjs";
6
5
  import {
7
6
  atomicWriteFileSync,
@@ -15,10 +14,8 @@ import { estimateTokens, getTokenUsage } from "../lib/tokens.mjs";
15
14
  // ---------------------------------------------------------------------------
16
15
  // Stop hook — writes fresh token counts after each assistant response.
17
16
  //
18
- // PERFORMANCE: Does NOT call estimateSavings (which reads the full transcript).
19
- // The submit hook already computed and saved savings estimates. This hook only
20
- // updates the token counts (cheap — tail-reads 32KB) and carries forward the
21
- // existing savings estimates from the state file.
17
+ // Lightweight: tail-reads 32KB of the transcript for token counts, captures
18
+ // baseline overhead on the first 2 responses, and writes state.
22
19
  // ---------------------------------------------------------------------------
23
20
  let input;
24
21
  try {
@@ -39,29 +36,12 @@ try {
39
36
  payloadBytes = fs.statSync(transcript_path).size;
40
37
  } catch {}
41
38
 
42
- const cfg = loadConfig();
43
- const threshold = cfg.threshold ?? 0.35;
44
-
45
39
  const realUsage = getTokenUsage(transcript_path);
46
40
  const currentTokens = realUsage
47
41
  ? realUsage.current_tokens
48
42
  : estimateTokens(transcript_path);
49
- const maxTokens = realUsage?.max_tokens || resolveMaxTokens() || 200000;
50
- const pct = currentTokens / maxTokens;
51
43
  const source = realUsage ? "real" : "estimated";
52
44
 
53
- const headroom = Math.max(0, Math.round(maxTokens * threshold - currentTokens));
54
- const pctDisplay = (pct * 100).toFixed(1);
55
- const thresholdDisplay = Math.round(threshold * 100);
56
- let recommendation;
57
- if (pct < threshold * 0.5)
58
- recommendation = "All clear. Plenty of context remaining.";
59
- else if (pct < threshold)
60
- recommendation = "Approaching threshold. Consider wrapping up complex tasks.";
61
- else
62
- recommendation =
63
- "At threshold. Compaction recommended — run /cg:compact or /cg:prune.";
64
-
65
45
  // Don't overwrite a recent state file with estimated data — checkpoint writes
66
46
  // or the submit hook may have written accurate post-compaction counts that we'd clobber.
67
47
  if (source === "estimated") {
@@ -77,24 +57,47 @@ if (source === "estimated") {
77
57
  } catch {}
78
58
  }
79
59
 
80
- // Carry forward savings estimates and baseline overhead from the existing state file.
81
- // This avoids re-reading and re-parsing the full transcript (~50MB at scale).
82
- let smartEstimatePct = 0;
83
- let recentEstimatePct = 0;
60
+ // Read previous state for carry-forward values.
84
61
  let baselineOverhead = 0;
85
62
  let baselineResponseCount = 0;
86
63
  try {
87
64
  const sf = stateFile(session_id);
88
65
  if (fs.existsSync(sf)) {
89
66
  const prev = JSON.parse(fs.readFileSync(sf, "utf8"));
90
- smartEstimatePct = prev.smart_estimate_pct ?? 0;
91
- recentEstimatePct = prev.recent_estimate_pct ?? 0;
92
67
  baselineOverhead = prev.baseline_overhead ?? 0;
93
68
  baselineResponseCount = prev.baseline_response_count ?? 0;
94
69
  }
95
70
  } catch (e) {
96
71
  log(`state-read-error session=${session_id}: ${e.message}`);
97
72
  }
73
+ // The statusline state file (~/.claude/cg/) is the primary source for
74
+ // context_window_size and model — the statusline receives these directly
75
+ // from Claude Code and is always authoritative, including after /model switches.
76
+ let ccContextWindowSize = null;
77
+ let ccModelId = null;
78
+ try {
79
+ const slFile = statuslineStateFile(session_id);
80
+ if (fs.existsSync(slFile)) {
81
+ const slState = JSON.parse(fs.readFileSync(slFile, "utf8"));
82
+ ccContextWindowSize = slState.context_window_size ?? null;
83
+ ccModelId = slState.cc_model_id ?? null;
84
+ }
85
+ } catch {}
86
+ const maxTokens = ccContextWindowSize || resolveMaxTokens() || 200000;
87
+ const threshold = adaptiveThreshold(maxTokens);
88
+ const pct = currentTokens / maxTokens;
89
+
90
+ const headroom = Math.max(0, Math.round(maxTokens * threshold - currentTokens));
91
+ const pctDisplay = (pct * 100).toFixed(1);
92
+ const thresholdDisplay = Math.round(threshold * 100);
93
+ let recommendation;
94
+ if (pct < threshold * 0.5)
95
+ recommendation = "All clear. Plenty of context remaining.";
96
+ else if (pct < threshold)
97
+ recommendation = "Approaching threshold. Consider wrapping up complex tasks.";
98
+ else
99
+ recommendation =
100
+ "At threshold. Compaction recommended — run /cg:compact or /cg:prune.";
98
101
 
99
102
  if (baselineResponseCount < 2 && currentTokens > 0) {
100
103
  if (baselineOverhead) {
@@ -106,24 +109,6 @@ if (baselineResponseCount < 2 && currentTokens > 0) {
106
109
  log(
107
110
  `baseline-overhead session=${session_id} tokens=${baselineOverhead} response=${baselineResponseCount}`,
108
111
  );
109
-
110
- // Recompute estimates now that we have the baseline — the submit hook ran
111
- // before us and wrote 0 estimates because it didn't have the baseline yet.
112
- try {
113
- const savings = estimateSavings(
114
- transcript_path,
115
- currentTokens,
116
- maxTokens,
117
- baselineOverhead,
118
- );
119
- smartEstimatePct = savings.smartPct;
120
- recentEstimatePct = savings.recentPct;
121
- log(
122
- `baseline-recompute session=${session_id} smart=${smartEstimatePct}% recent=${recentEstimatePct}%`,
123
- );
124
- } catch (e) {
125
- log(`baseline-recompute-error: ${e.message}`);
126
- }
127
112
  }
128
113
 
129
114
  try {
@@ -135,6 +120,7 @@ try {
135
120
  const stateJson = JSON.stringify({
136
121
  current_tokens: currentTokens,
137
122
  max_tokens: maxTokens,
123
+ context_window_size: ccContextWindowSize,
138
124
  pct,
139
125
  pct_display: pctDisplay,
140
126
  threshold,
@@ -143,9 +129,7 @@ try {
143
129
  headroom,
144
130
  recommendation,
145
131
  source,
146
- model: realUsage?.model || "unknown",
147
- smart_estimate_pct: smartEstimatePct,
148
- recent_estimate_pct: recentEstimatePct,
132
+ model: ccModelId || realUsage?.model || "unknown",
149
133
  baseline_overhead: baselineOverhead,
150
134
  baseline_response_count: baselineResponseCount,
151
135
  payload_bytes: payloadBytes,
package/hooks/submit.mjs CHANGED
@@ -8,8 +8,7 @@
8
8
  * @module submit-hook
9
9
  */
10
10
  import fs from "node:fs";
11
- import { loadConfig, resolveMaxTokens } from "../lib/config.mjs";
12
- import { estimateSavings } from "../lib/estimate.mjs";
11
+ import { adaptiveThreshold, resolveMaxTokens } from "../lib/config.mjs";
13
12
  import { log } from "../lib/logger.mjs";
14
13
  import {
15
14
  atomicWriteFileSync,
@@ -45,17 +44,40 @@ try {
45
44
  payloadBytes = fs.statSync(transcript_path).size;
46
45
  } catch {}
47
46
 
48
- const cfg = loadConfig();
49
- const threshold = cfg.threshold ?? 0.35;
50
-
51
47
  const realUsage = getTokenUsage(transcript_path);
52
48
  const currentTokens = realUsage
53
49
  ? realUsage.current_tokens
54
50
  : estimateTokens(transcript_path);
55
- const maxTokens = realUsage?.max_tokens || resolveMaxTokens() || 200000;
56
- const pct = currentTokens / maxTokens;
57
51
  const source = realUsage ? "real" : "estimated";
58
52
 
53
+ // Read previous state for baseline overhead.
54
+ let baselineOverhead = 0;
55
+ try {
56
+ const sf = stateFile(session_id);
57
+ if (fs.existsSync(sf)) {
58
+ const prev = JSON.parse(fs.readFileSync(sf, "utf8"));
59
+ baselineOverhead = prev.baseline_overhead ?? 0;
60
+ }
61
+ } catch (e) {
62
+ log(`state-read-error session=${session_id}: ${e.message}`);
63
+ }
64
+ // The statusline state file (~/.claude/cg/) is the primary source for
65
+ // context_window_size and model — the statusline receives these directly
66
+ // from Claude Code and is always authoritative, including after /model switches.
67
+ let ccContextWindowSize = null;
68
+ let ccModelId = null;
69
+ try {
70
+ const slFile = statuslineStateFile(session_id);
71
+ if (fs.existsSync(slFile)) {
72
+ const slState = JSON.parse(fs.readFileSync(slFile, "utf8"));
73
+ ccContextWindowSize = slState.context_window_size ?? null;
74
+ ccModelId = slState.cc_model_id ?? null;
75
+ }
76
+ } catch {}
77
+ const maxTokens = ccContextWindowSize || resolveMaxTokens() || 200000;
78
+ const threshold = adaptiveThreshold(maxTokens);
79
+ const pct = currentTokens / maxTokens;
80
+
59
81
  log(
60
82
  `check session=${session_id} tokens=${currentTokens}/${maxTokens} pct=${(pct * 100).toFixed(1)}% threshold=${(threshold * 100).toFixed(0)}% source=${source}`,
61
83
  );
@@ -73,34 +95,16 @@ else
73
95
  recommendation =
74
96
  "At threshold. Compaction recommended — run /cg:compact or /cg:prune.";
75
97
 
76
- // Read measured baseline overhead from state (captured by stop hook on first response)
77
- let baselineOverhead = 0;
78
- try {
79
- const sf = stateFile(session_id);
80
- if (fs.existsSync(sf)) {
81
- const prev = JSON.parse(fs.readFileSync(sf, "utf8"));
82
- baselineOverhead = prev.baseline_overhead ?? 0;
83
- }
84
- } catch (e) {
85
- log(`state-read-error session=${session_id}: ${e.message}`);
86
- }
87
-
88
- const savings = estimateSavings(
89
- transcript_path,
90
- currentTokens,
91
- maxTokens,
92
- baselineOverhead,
93
- );
94
-
95
98
  try {
96
99
  ensureDataDir();
97
100
  const remaining = Math.max(
98
101
  0,
99
102
  Math.round(thresholdDisplay - Number.parseFloat(pctDisplay)),
100
103
  );
101
- const stateJson = JSON.stringify({
104
+ const stateObj = {
102
105
  current_tokens: currentTokens,
103
106
  max_tokens: maxTokens,
107
+ context_window_size: ccContextWindowSize,
104
108
  pct,
105
109
  pct_display: pctDisplay,
106
110
  threshold,
@@ -109,15 +113,14 @@ try {
109
113
  headroom,
110
114
  recommendation,
111
115
  source,
112
- model: realUsage?.model || "unknown",
113
- smart_estimate_pct: savings.smartPct,
114
- recent_estimate_pct: savings.recentPct,
116
+ model: ccModelId || realUsage?.model || "unknown",
115
117
  baseline_overhead: baselineOverhead,
116
118
  payload_bytes: payloadBytes,
117
119
  session_id,
118
120
  transcript_path,
119
121
  ts: Date.now(),
120
- });
122
+ };
123
+ const stateJson = JSON.stringify(stateObj);
121
124
  atomicWriteFileSync(stateFile(session_id), stateJson);
122
125
 
123
126
  // Also write to fixed fallback location so the statusline can find it
@@ -17,6 +17,7 @@ import {
17
17
  ensureDataDir,
18
18
  rotateCheckpoints,
19
19
  stateFile,
20
+ statuslineStateFile,
20
21
  } from "./paths.mjs";
21
22
  import { formatCompactionStats } from "./stats.mjs";
22
23
  import { estimateOverhead, estimateTokens, getTokenUsage } from "./tokens.mjs";
@@ -127,8 +128,6 @@ export function writeCompactionState(
127
128
  recommendation: rec,
128
129
  source: "estimated",
129
130
  model: "unknown",
130
- smart_estimate_pct: 0,
131
- recent_estimate_pct: 0,
132
131
  baseline_overhead: baselineOverhead,
133
132
  payload_bytes: payloadBytes,
134
133
  session_id: sessionId,
@@ -175,7 +174,17 @@ export function performCompaction(opts) {
175
174
 
176
175
  // Extract and cap content
177
176
  const usage = getTokenUsage(transcriptPath);
178
- const capMax = usage?.max_tokens || resolveMaxTokens() || 200000;
177
+
178
+ // Read authoritative context_window_size from statusline state file.
179
+ let ccContextWindowSize = null;
180
+ try {
181
+ const slFile = statuslineStateFile(sessionId);
182
+ if (fs.existsSync(slFile)) {
183
+ const slState = JSON.parse(fs.readFileSync(slFile, "utf8"));
184
+ ccContextWindowSize = slState.context_window_size ?? null;
185
+ }
186
+ } catch {}
187
+ const capMax = ccContextWindowSize || resolveMaxTokens() || 200000;
179
188
  let content =
180
189
  mode === "smart"
181
190
  ? extractConversation(transcriptPath)
@@ -217,7 +226,8 @@ export function performCompaction(opts) {
217
226
  preStats?.currentTokens ||
218
227
  usage?.current_tokens ||
219
228
  estimateTokens(transcriptPath);
220
- const preMax = preStats?.maxTokens || usage?.max_tokens || resolveMaxTokens();
229
+ const preMax =
230
+ preStats?.maxTokens || ccContextWindowSize || resolveMaxTokens();
221
231
 
222
232
  // Read baseline overhead from state file if available
223
233
  let baselineOverhead = 0;
package/lib/config.mjs CHANGED
@@ -14,14 +14,14 @@ let _cachedConfig = null;
14
14
 
15
15
  export function loadConfig() {
16
16
  if (_cachedConfig) return _cachedConfig;
17
+ let raw = {};
17
18
  try {
18
- _cachedConfig = {
19
- ...DEFAULT_CONFIG,
20
- ...JSON.parse(fs.readFileSync(CONFIG_FILE, "utf8")),
21
- };
22
- } catch {
23
- _cachedConfig = { ...DEFAULT_CONFIG };
24
- }
19
+ raw = JSON.parse(fs.readFileSync(CONFIG_FILE, "utf8"));
20
+ } catch {}
21
+ _cachedConfig = { ...DEFAULT_CONFIG, ...raw };
22
+ // Track whether the user explicitly set a threshold via /cg:config.
23
+ // If not, hooks and statusline use the adaptive threshold instead.
24
+ _cachedConfig._thresholdExplicit = "threshold" in raw;
25
25
  return _cachedConfig;
26
26
  }
27
27
 
@@ -30,11 +30,38 @@ export function loadConfig() {
30
30
  // 1. Explicit max_tokens in config (covers most cases)
31
31
  // 2. Safe default (200K)
32
32
  //
33
- // The submit hook detects max_tokens from the model name in the transcript
34
- // (getTokenUsage in tokens.mjs). This config value is the initial fallback
35
- // before any assistant response provides real model info.
33
+ // The statusline writes the authoritative context_window_size to the
34
+ // per-session state file. This config value is the fallback before the
35
+ // statusline has fired.
36
36
  // ---------------------------------------------------------------------------
37
37
  export function resolveMaxTokens() {
38
38
  const cfg = loadConfig();
39
39
  return cfg.max_tokens ?? 200000;
40
40
  }
41
+
42
+ // ---------------------------------------------------------------------------
43
+ // Adaptive threshold — scales with context window size.
44
+ //
45
+ // Context rot research shows quality degrades measurably at 80-150K tokens
46
+ // regardless of window size. A 200K window needs a higher threshold (alert
47
+ // earlier as a %) because system overhead eats a large share. A 1M window
48
+ // needs a lower threshold so the alert fires before quality degrades.
49
+ //
50
+ // 200K → 55% (alert at 110K tokens)
51
+ // 500K → 46% (alert at 230K tokens)
52
+ // 1M → 30% (alert at 300K tokens)
53
+ //
54
+ // If the user explicitly set a threshold via /cg:config, that wins.
55
+ // ---------------------------------------------------------------------------
56
+ export function adaptiveThreshold(maxTokens) {
57
+ const cfg = loadConfig();
58
+ if (cfg._thresholdExplicit) return cfg.threshold;
59
+ return computeAdaptiveThreshold(maxTokens ?? cfg.max_tokens ?? 200000);
60
+ }
61
+
62
+ export function computeAdaptiveThreshold(maxTokens) {
63
+ return Math.min(
64
+ 0.55,
65
+ Math.max(0.25, 0.55 - ((maxTokens - 200000) * 0.25) / 800000),
66
+ );
67
+ }
package/lib/handoff.mjs CHANGED
@@ -13,7 +13,7 @@ import fs from "node:fs";
13
13
  import path from "node:path";
14
14
  import { resolveMaxTokens } from "./config.mjs";
15
15
  import { log } from "./logger.mjs";
16
- import { stateFile } from "./paths.mjs";
16
+ import { stateFile, statuslineStateFile } from "./paths.mjs";
17
17
  import { estimateOverhead, getTokenUsage } from "./tokens.mjs";
18
18
  import { extractConversation } from "./transcript.mjs";
19
19
 
@@ -71,7 +71,17 @@ export function performHandoff({ transcriptPath, sessionId, label = "" }) {
71
71
  const usage = getTokenUsage(transcriptPath);
72
72
  const preTokens =
73
73
  usage?.current_tokens || Math.round(Buffer.byteLength(content, "utf8") / 4);
74
- const maxTokens = usage?.max_tokens || resolveMaxTokens() || 200000;
74
+
75
+ // Read authoritative context_window_size from statusline state file.
76
+ let ccContextWindowSize = null;
77
+ try {
78
+ const slFile = statuslineStateFile(sessionId);
79
+ if (fs.existsSync(slFile)) {
80
+ const slState = JSON.parse(fs.readFileSync(slFile, "utf8"));
81
+ ccContextWindowSize = slState.context_window_size ?? null;
82
+ }
83
+ } catch {}
84
+ const maxTokens = ccContextWindowSize || resolveMaxTokens() || 200000;
75
85
  const postTokens = Math.round(Buffer.byteLength(fullContent, "utf8") / 4);
76
86
 
77
87
  let baselineOverhead = 0;