npm - @ricky-stevens/context-guardian - Versions diffs - 2.1.0 → 2.2.0 - Mend

@ricky-stevens/context-guardian 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/.claude-plugin/marketplace.json +1 -1
package/.claude-plugin/plugin.json +1 -1
package/CLAUDE.md +11 -1
package/README.md +26 -39
package/hooks/session-start.mjs +16 -6
package/hooks/stop.mjs +34 -50
package/hooks/submit.mjs +34 -31
package/lib/checkpoint.mjs +14 -4
package/lib/config.mjs +37 -10
package/lib/handoff.mjs +12 -2
package/lib/statusline.mjs +104 -54
package/lib/tokens.mjs +2 -16
package/package.json +1 -1
package/skills/config/SKILL.md +1 -1
package/skills/stats/SKILL.md +7 -28
package/test/checkpoint.test.mjs +2 -2
package/test/config.test.mjs +39 -0
package/test/integration.test.mjs +4 -1
package/test/statusline.test.mjs +116 -6
package/test/submit.test.mjs +3 -9
package/test/tokens.test.mjs +2 -40
package/lib/estimate.mjs +0 -254
package/test/estimate.test.mjs +0 -262

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -9,7 +9,7 @@
 			"name": "cg",
 			"source": "./",
 			"description": "Automatic context window monitoring and smart compaction for Claude Code",
-			"version": "2.1.0",
+			"version": "2.2.0",
 			"author": {
 				"name": "Ricky"
 			},

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "cg",
-	"version": "2.1.0",
+	"version": "2.2.0",
 	"description": "Automatic context window monitoring and smart compaction for Claude Code",
 	"author": {
 		"name": "Ricky Stevens",

package/CLAUDE.md CHANGED Viewed

@@ -92,11 +92,21 @@ Never chop at a point. Start+end trim: keep first N chars (intent) + last N char
 - Compaction checkpoints are also copied to `.context-guardian/cg-checkpoint-*.md` for user visibility
 - `rotateFiles` sorts by mtime (not filename) because label-prefixed filenames break alphabetical chronological ordering
+## Model & Token Limit Detection
+The statusline receives the authoritative `context_window_size` and `model.id` directly from Claude Code's session JSON. It persists these to the per-session state file in `~/.claude/cg/`. Hooks read from this file as the primary source — values update immediately after `/model` switches.
+Fallback chain when the statusline hasn't fired yet (first message): `config.max_tokens` → `200000`.
+## Adaptive Threshold
+The compaction threshold scales with context window size: 55% at 200K, 30% at 1M (linear interpolation, clamped [25%, 55%]). Computed by `computeAdaptiveThreshold()` in `lib/config.mjs`. If the user explicitly sets a threshold via `/cg:config threshold X`, the explicit value wins.
 ## Token Counting
 1. **Real counts (preferred):** `input_tokens + cache_creation_input_tokens + cache_read_input_tokens` from `message.usage` in transcript JSONL. Written by both submit and stop hooks.
 2. **Byte estimation (fallback):** First message only. Content bytes / 4.
-3. **Baseline overhead:** Stop hook captures on first response — irreducible floor (system prompts, tools, CLAUDE.md). Used in all savings estimates and session size calculation.
+3. **Baseline overhead:** Stop hook captures on first response — irreducible floor (system prompts, tools, CLAUDE.md). Used in compaction stats and session size calculation.
 ## Session Size (API Payload Monitoring)

package/README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 # Context Guardian
 [![CI](https://github.com/Ricky-Stevens/context-guardian/actions/workflows/ci.yml/badge.svg)](https://github.com/Ricky-Stevens/context-guardian/actions/workflows/ci.yml)
-[![Version](https://img.shields.io/badge/version-2.1.0-blue)](https://github.com/Ricky-Stevens/context-guardian/releases)
+[![Version](https://img.shields.io/badge/version-2.2.0-blue)](https://github.com/Ricky-Stevens/context-guardian/releases)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=Ricky-Stevens_context-guardian&metric=alert_status)](https://sonarcloud.io/summary/new_code?id=Ricky-Stevens_context-guardian)
 [![Coverage](https://sonarcloud.io/api/project_badges/measure?project=Ricky-Stevens_context-guardian&metric=coverage)](https://sonarcloud.io/summary/new_code?id=Ricky-Stevens_context-guardian)
@@ -53,7 +53,7 @@ Context Guardian adds five slash commands:
 ### `/cg:stats`
-Shows current token usage, session size, compaction estimates, and recommendations.
+Shows current token usage, session size, threshold, and recommendations.
 ```
 ┌─────────────────────────────────────────────────
@@ -61,16 +61,12 @@ Shows current token usage, session size, compaction estimates, and recommendatio
 │
 │  Current usage:   372,000 / 1,000,000 tokens (37.2%)
 │  Session size:    8.4MB / 20MB
-│  Threshold:       35% (0% remaining to warning)
-│  Data source:     real counts
-│
+│  Threshold:       30% (0% remaining to warning)
 │  Model:           claude-opus-4-6 / 1,000,000 tokens
-│  Last updated:    12 seconds ago
-│
-│  /cg:compact         ~37.2% → ~5%
-│  /cg:prune           ~37.2% → ~3%
 │
-│  /cg:handoff [name]  save session for later
+│  /cg:compact        smart compact — strips file reads, system noise
+│  /cg:prune          keep last 10 exchanges only
+│  /cg:handoff [name] save session for later
 │
 └─────────────────────────────────────────────────
 ```
@@ -78,10 +74,10 @@ Shows current token usage, session size, compaction estimates, and recommendatio
 ### `/cg:config`
 ```bash
-/cg:config                     # show current config + auto-detected model/limit
-/cg:config threshold 0.50      # trigger at 50%
-/cg:config max_tokens 1000000  # override token limit
-/cg:config reset               # restore defaults
+/cg:config                     # show current config + detected model/limit
+/cg:config threshold 0.50      # override adaptive threshold with fixed 50%
+/cg:config max_tokens 1000000  # override detected token limit
+/cg:config reset               # restore adaptive defaults
 ```
 ### `/cg:compact`
@@ -143,13 +139,21 @@ The 1M window is powerful, but it requires active management. Context Guardian p
 ---
-## Why 35%?
+## Adaptive Threshold
+Context Guardian's compaction threshold **scales automatically with the context window size**. Different window sizes need different thresholds — 35% of 200K is very different from 35% of 1M.
+| Window | Default Threshold | Alert At | Rationale |
+|--------|------------------|----------|-----------|
+| **200K** | 55% | ~110K tokens | System overhead is 25-45K tokens, so a higher threshold maximises usable conversation space |
+| **500K** | 46% | ~230K tokens | Balanced — quality is still strong, plenty of room before auto-compact |
+| **1M** | 30% | ~300K tokens | Context rot research shows measurable quality degradation at 80-150K tokens regardless of window size. A lower threshold catches this earlier. |
-Context Guardian triggers at **35% usage** by default. This is deliberately conservative.
+Override with `/cg:config threshold <value>` if the adaptive default doesn't suit your workflow.
-### The Sweet Spot for Model Recall
+### Why These Numbers?
-[Research](https://news.mit.edu/2025/unpacking-large-language-model-bias-0617) on LLM attention patterns shows that models have a **U-shaped attention curve** - they attend strongly to the beginning and end of context, with weaker attention in the middle. As context grows:
+Research on LLM attention patterns shows a **U-shaped attention curve** — models attend strongly to the beginning and end of context, with weaker attention in the middle. Quality degrades gradually, not at a cliff:
 | Usage Range | Model Behavior |
 |-------------|---------------|
@@ -160,7 +164,7 @@ Context Guardian triggers at **35% usage** by default. This is deliberately cons
 | **80-95%** | Critical zone. Effective context is much smaller than the raw number suggests. |
 | **95%+** | Emergency auto-compact fires. Everything reduced to a brief summary. |
-**35% sits at the boundary between "full recall" and "beginning to degrade."** It's the last point where you can extract with full confidence that the output will be accurate, because Claude still has strong attention over the entire conversation.
+The adaptive threshold places the alert at the boundary between "strong recall" and "beginning to degrade" for each window size.
 ### What Actually Fills the Context
@@ -250,19 +254,9 @@ Skills invoke `compact-cli.mjs` via Bash (since skills don't fire `UserPromptSub
 | `/cg:prune` | `lib/compact-cli.mjs recent` → `checkpoint.mjs:performCompaction()` |
 | `/cg:handoff` | `lib/compact-cli.mjs handoff` → `handoff.mjs:performHandoff()` |
-### Token Counting
-Two methods, preferring the more accurate. State is written by **both** the submit hook (before the response) and the stop hook (after the response), so `/cg:stats` always reflects the latest counts.
-1. **Real counts (preferred):** Reads `message.usage` from the most recent assistant message in the transcript JSONL. Calculates `input_tokens + cache_creation_input_tokens + cache_read_input_tokens`. Also detects the model name for auto-detecting max_tokens.
-2. **Byte estimation (fallback):** Only used on the very first message of a session (before any assistant response). Counts content bytes after the most recent compact marker and divides by 4.
-3. **Post-compaction estimates:** After compaction or checkpoint restore, a state file is written with estimated post-compaction token counts so `/cg:stats` works immediately.
 ### Baseline Overhead
-On the first assistant response of each session, the stop hook captures the current token count as `baseline_overhead` - at that point, context is almost entirely system prompts, CLAUDE.md, and tool definitions. This measured value serves as an irreducible floor in all compaction savings estimates.
+On the first assistant response of each session, the stop hook captures the current token count as `baseline_overhead` - at that point, context is almost entirely system prompts, CLAUDE.md, and tool definitions. This measured value serves as an irreducible floor in compaction stats and session size calculations.
 ### Statusline
@@ -285,16 +279,9 @@ In green/yellow states, labels are dim/grey with only the numbers colored. At re
 The session-start hook **reclaims the statusline** if another tool overwrites it, logging a warning and notifying the user via `additionalContext`.
-### Model & Token Limit Auto-Detection
-Every assistant message in the transcript includes a `model` field (e.g., `"claude-opus-4-6"`). Context Guardian uses this to set the token limit:
-- **Opus 4.6+** (major >= 4, minor >= 6): **1,000,000 tokens**
-- **Everything else** (Sonnet, Haiku, older Opus): **200,000 tokens**
-This is imperfect - I haven't found a better way to do this yet. Contributions or ideas welcome.
+### Model & Token Limit Detection
-You can override this with `/cg:config max_tokens <value>` if the auto-detection doesn't match your setup.
+Context Guardian automatically detects the actual context window size and model for the current session. The detected values update immediately when you switch models via `/model`. You can override with `/cg:config max_tokens <value>` if needed.
 ### Data Storage

package/hooks/session-start.mjs CHANGED Viewed

@@ -3,7 +3,11 @@ import fs from "node:fs";
 import os from "node:os";
 import path from "node:path";
 import { log } from "../lib/logger.mjs";
-import { atomicWriteFileSync, resolveDataDir } from "../lib/paths.mjs";
+import {
+	atomicWriteFileSync,
+	resolveDataDir,
+	STATUSLINE_STATE_DIR,
+} from "../lib/paths.mjs";
 let input;
 try {
@@ -15,22 +19,28 @@ try {
 const STALE_MS = 30 * 60 * 1000;
-// Clean up stale session-scoped state files (state-*.json) in data dir.
-// Each session writes its own state file; old ones accumulate.
+// Clean up stale session-scoped state files (state-*.json) in both the primary
+// data dir and the statusline fallback dir (~/.claude/cg/).
 const dataDir = resolveDataDir();
-if (fs.existsSync(dataDir)) {
+for (const dir of new Set([dataDir, STATUSLINE_STATE_DIR])) {
+	if (!fs.existsSync(dir)) continue;
 	try {
 		const now3 = Date.now();
 		for (const f of fs
-			.readdirSync(dataDir)
+			.readdirSync(dir)
 			.filter((f) => f.startsWith("state-") && f.endsWith(".json"))) {
-			const filePath = path.join(dataDir, f);
+			const filePath = path.join(dir, f);
 			try {
 				if (now3 - fs.statSync(filePath).mtimeMs > STALE_MS) {
 					fs.unlinkSync(filePath);
 				}
 			} catch {}
 		}
+		// Remove legacy cc-context-window.json (context_window_size is now in state files)
+		const legacyFile = path.join(dir, "cc-context-window.json");
+		try {
+			fs.unlinkSync(legacyFile);
+		} catch {}
 	} catch {}
 }

package/hooks/stop.mjs CHANGED Viewed

@@ -1,7 +1,6 @@
 #!/usr/bin/env node
 import fs from "node:fs";
-import { loadConfig, resolveMaxTokens } from "../lib/config.mjs";
-import { estimateSavings } from "../lib/estimate.mjs";
+import { adaptiveThreshold, resolveMaxTokens } from "../lib/config.mjs";
 import { log } from "../lib/logger.mjs";
 import {
 	atomicWriteFileSync,
@@ -15,10 +14,8 @@ import { estimateTokens, getTokenUsage } from "../lib/tokens.mjs";
 // ---------------------------------------------------------------------------
 // Stop hook — writes fresh token counts after each assistant response.
 //
-// PERFORMANCE: Does NOT call estimateSavings (which reads the full transcript).
-// The submit hook already computed and saved savings estimates. This hook only
-// updates the token counts (cheap — tail-reads 32KB) and carries forward the
-// existing savings estimates from the state file.
+// Lightweight: tail-reads 32KB of the transcript for token counts, captures
+// baseline overhead on the first 2 responses, and writes state.
 // ---------------------------------------------------------------------------
 let input;
 try {
@@ -39,29 +36,12 @@ try {
 	payloadBytes = fs.statSync(transcript_path).size;
 } catch {}
-const cfg = loadConfig();
-const threshold = cfg.threshold ?? 0.35;
 const realUsage = getTokenUsage(transcript_path);
 const currentTokens = realUsage
 	? realUsage.current_tokens
 	: estimateTokens(transcript_path);
-const maxTokens = realUsage?.max_tokens || resolveMaxTokens() || 200000;
-const pct = currentTokens / maxTokens;
 const source = realUsage ? "real" : "estimated";
-const headroom = Math.max(0, Math.round(maxTokens * threshold - currentTokens));
-const pctDisplay = (pct * 100).toFixed(1);
-const thresholdDisplay = Math.round(threshold * 100);
-let recommendation;
-if (pct < threshold * 0.5)
-	recommendation = "All clear. Plenty of context remaining.";
-else if (pct < threshold)
-	recommendation = "Approaching threshold. Consider wrapping up complex tasks.";
-else
-	recommendation =
-		"At threshold. Compaction recommended — run /cg:compact or /cg:prune.";
 // Don't overwrite a recent state file with estimated data — checkpoint writes
 // or the submit hook may have written accurate post-compaction counts that we'd clobber.
 if (source === "estimated") {
@@ -77,24 +57,47 @@ if (source === "estimated") {
 	} catch {}
 }
-// Carry forward savings estimates and baseline overhead from the existing state file.
-// This avoids re-reading and re-parsing the full transcript (~50MB at scale).
-let smartEstimatePct = 0;
-let recentEstimatePct = 0;
+// Read previous state for carry-forward values.
 let baselineOverhead = 0;
 let baselineResponseCount = 0;
 try {
 	const sf = stateFile(session_id);
 	if (fs.existsSync(sf)) {
 		const prev = JSON.parse(fs.readFileSync(sf, "utf8"));
-		smartEstimatePct = prev.smart_estimate_pct ?? 0;
-		recentEstimatePct = prev.recent_estimate_pct ?? 0;
 		baselineOverhead = prev.baseline_overhead ?? 0;
 		baselineResponseCount = prev.baseline_response_count ?? 0;
 	}
 } catch (e) {
 	log(`state-read-error session=${session_id}: ${e.message}`);
 }
+// The statusline state file (~/.claude/cg/) is the primary source for
+// context_window_size and model — the statusline receives these directly
+// from Claude Code and is always authoritative, including after /model switches.
+let ccContextWindowSize = null;
+let ccModelId = null;
+try {
+	const slFile = statuslineStateFile(session_id);
+	if (fs.existsSync(slFile)) {
+		const slState = JSON.parse(fs.readFileSync(slFile, "utf8"));
+		ccContextWindowSize = slState.context_window_size ?? null;
+		ccModelId = slState.cc_model_id ?? null;
+	}
+} catch {}
+const maxTokens = ccContextWindowSize || resolveMaxTokens() || 200000;
+const threshold = adaptiveThreshold(maxTokens);
+const pct = currentTokens / maxTokens;
+const headroom = Math.max(0, Math.round(maxTokens * threshold - currentTokens));
+const pctDisplay = (pct * 100).toFixed(1);
+const thresholdDisplay = Math.round(threshold * 100);
+let recommendation;
+if (pct < threshold * 0.5)
+	recommendation = "All clear. Plenty of context remaining.";
+else if (pct < threshold)
+	recommendation = "Approaching threshold. Consider wrapping up complex tasks.";
+else
+	recommendation =
+		"At threshold. Compaction recommended — run /cg:compact or /cg:prune.";
 if (baselineResponseCount < 2 && currentTokens > 0) {
 	if (baselineOverhead) {
@@ -106,24 +109,6 @@ if (baselineResponseCount < 2 && currentTokens > 0) {
 	log(
 		`baseline-overhead session=${session_id} tokens=${baselineOverhead} response=${baselineResponseCount}`,
 	);
-	// Recompute estimates now that we have the baseline — the submit hook ran
-	// before us and wrote 0 estimates because it didn't have the baseline yet.
-	try {
-		const savings = estimateSavings(
-			transcript_path,
-			currentTokens,
-			maxTokens,
-			baselineOverhead,
-		);
-		smartEstimatePct = savings.smartPct;
-		recentEstimatePct = savings.recentPct;
-		log(
-			`baseline-recompute session=${session_id} smart=${smartEstimatePct}% recent=${recentEstimatePct}%`,
-		);
-	} catch (e) {
-		log(`baseline-recompute-error: ${e.message}`);
-	}
 }
 try {
@@ -135,6 +120,7 @@ try {
 	const stateJson = JSON.stringify({
 		current_tokens: currentTokens,
 		max_tokens: maxTokens,
+		context_window_size: ccContextWindowSize,
 		pct,
 		pct_display: pctDisplay,
 		threshold,
@@ -143,9 +129,7 @@ try {
 		headroom,
 		recommendation,
 		source,
-		model: realUsage?.model || "unknown",
-		smart_estimate_pct: smartEstimatePct,
-		recent_estimate_pct: recentEstimatePct,
+		model: ccModelId || realUsage?.model || "unknown",
 		baseline_overhead: baselineOverhead,
 		baseline_response_count: baselineResponseCount,
 		payload_bytes: payloadBytes,

package/hooks/submit.mjs CHANGED Viewed

@@ -8,8 +8,7 @@
  * @module submit-hook
  */
 import fs from "node:fs";
-import { loadConfig, resolveMaxTokens } from "../lib/config.mjs";
-import { estimateSavings } from "../lib/estimate.mjs";
+import { adaptiveThreshold, resolveMaxTokens } from "../lib/config.mjs";
 import { log } from "../lib/logger.mjs";
 import {
 	atomicWriteFileSync,
@@ -45,17 +44,40 @@ try {
 	payloadBytes = fs.statSync(transcript_path).size;
 } catch {}
-const cfg = loadConfig();
-const threshold = cfg.threshold ?? 0.35;
 const realUsage = getTokenUsage(transcript_path);
 const currentTokens = realUsage
 	? realUsage.current_tokens
 	: estimateTokens(transcript_path);
-const maxTokens = realUsage?.max_tokens || resolveMaxTokens() || 200000;
-const pct = currentTokens / maxTokens;
 const source = realUsage ? "real" : "estimated";
+// Read previous state for baseline overhead.
+let baselineOverhead = 0;
+try {
+	const sf = stateFile(session_id);
+	if (fs.existsSync(sf)) {
+		const prev = JSON.parse(fs.readFileSync(sf, "utf8"));
+		baselineOverhead = prev.baseline_overhead ?? 0;
+	}
+} catch (e) {
+	log(`state-read-error session=${session_id}: ${e.message}`);
+}
+// The statusline state file (~/.claude/cg/) is the primary source for
+// context_window_size and model — the statusline receives these directly
+// from Claude Code and is always authoritative, including after /model switches.
+let ccContextWindowSize = null;
+let ccModelId = null;
+try {
+	const slFile = statuslineStateFile(session_id);
+	if (fs.existsSync(slFile)) {
+		const slState = JSON.parse(fs.readFileSync(slFile, "utf8"));
+		ccContextWindowSize = slState.context_window_size ?? null;
+		ccModelId = slState.cc_model_id ?? null;
+	}
+} catch {}
+const maxTokens = ccContextWindowSize || resolveMaxTokens() || 200000;
+const threshold = adaptiveThreshold(maxTokens);
+const pct = currentTokens / maxTokens;
 log(
 	`check session=${session_id} tokens=${currentTokens}/${maxTokens} pct=${(pct * 100).toFixed(1)}% threshold=${(threshold * 100).toFixed(0)}% source=${source}`,
 );
@@ -73,34 +95,16 @@ else
 	recommendation =
 		"At threshold. Compaction recommended — run /cg:compact or /cg:prune.";
-// Read measured baseline overhead from state (captured by stop hook on first response)
-let baselineOverhead = 0;
-try {
-	const sf = stateFile(session_id);
-	if (fs.existsSync(sf)) {
-		const prev = JSON.parse(fs.readFileSync(sf, "utf8"));
-		baselineOverhead = prev.baseline_overhead ?? 0;
-	}
-} catch (e) {
-	log(`state-read-error session=${session_id}: ${e.message}`);
-}
-const savings = estimateSavings(
-	transcript_path,
-	currentTokens,
-	maxTokens,
-	baselineOverhead,
-);
 try {
 	ensureDataDir();
 	const remaining = Math.max(
 		0,
 		Math.round(thresholdDisplay - Number.parseFloat(pctDisplay)),
 	);
-	const stateJson = JSON.stringify({
+	const stateObj = {
 		current_tokens: currentTokens,
 		max_tokens: maxTokens,
+		context_window_size: ccContextWindowSize,
 		pct,
 		pct_display: pctDisplay,
 		threshold,
@@ -109,15 +113,14 @@ try {
 		headroom,
 		recommendation,
 		source,
-		model: realUsage?.model || "unknown",
-		smart_estimate_pct: savings.smartPct,
-		recent_estimate_pct: savings.recentPct,
+		model: ccModelId || realUsage?.model || "unknown",
 		baseline_overhead: baselineOverhead,
 		payload_bytes: payloadBytes,
 		session_id,
 		transcript_path,
 		ts: Date.now(),
-	});
+	};
+	const stateJson = JSON.stringify(stateObj);
 	atomicWriteFileSync(stateFile(session_id), stateJson);
 	// Also write to fixed fallback location so the statusline can find it

package/lib/checkpoint.mjs CHANGED Viewed

@@ -17,6 +17,7 @@ import {
 	ensureDataDir,
 	rotateCheckpoints,
 	stateFile,
+	statuslineStateFile,
 } from "./paths.mjs";
 import { formatCompactionStats } from "./stats.mjs";
 import { estimateOverhead, estimateTokens, getTokenUsage } from "./tokens.mjs";
@@ -127,8 +128,6 @@ export function writeCompactionState(
 				recommendation: rec,
 				source: "estimated",
 				model: "unknown",
-				smart_estimate_pct: 0,
-				recent_estimate_pct: 0,
 				baseline_overhead: baselineOverhead,
 				payload_bytes: payloadBytes,
 				session_id: sessionId,
@@ -175,7 +174,17 @@ export function performCompaction(opts) {
 	// Extract and cap content
 	const usage = getTokenUsage(transcriptPath);
-	const capMax = usage?.max_tokens || resolveMaxTokens() || 200000;
+	// Read authoritative context_window_size from statusline state file.
+	let ccContextWindowSize = null;
+	try {
+		const slFile = statuslineStateFile(sessionId);
+		if (fs.existsSync(slFile)) {
+			const slState = JSON.parse(fs.readFileSync(slFile, "utf8"));
+			ccContextWindowSize = slState.context_window_size ?? null;
+		}
+	} catch {}
+	const capMax = ccContextWindowSize || resolveMaxTokens() || 200000;
 	let content =
 		mode === "smart"
 			? extractConversation(transcriptPath)
@@ -217,7 +226,8 @@ export function performCompaction(opts) {
 		preStats?.currentTokens ||
 		usage?.current_tokens ||
 		estimateTokens(transcriptPath);
-	const preMax = preStats?.maxTokens || usage?.max_tokens || resolveMaxTokens();
+	const preMax =
+		preStats?.maxTokens || ccContextWindowSize || resolveMaxTokens();
 	// Read baseline overhead from state file if available
 	let baselineOverhead = 0;

package/lib/config.mjs CHANGED Viewed

@@ -14,14 +14,14 @@ let _cachedConfig = null;
 export function loadConfig() {
 	if (_cachedConfig) return _cachedConfig;
+	let raw = {};
 	try {
-		_cachedConfig = {
-			...DEFAULT_CONFIG,
-			...JSON.parse(fs.readFileSync(CONFIG_FILE, "utf8")),
-		};
-	} catch {
-		_cachedConfig = { ...DEFAULT_CONFIG };
-	}
+		raw = JSON.parse(fs.readFileSync(CONFIG_FILE, "utf8"));
+	} catch {}
+	_cachedConfig = { ...DEFAULT_CONFIG, ...raw };
+	// Track whether the user explicitly set a threshold via /cg:config.
+	// If not, hooks and statusline use the adaptive threshold instead.
+	_cachedConfig._thresholdExplicit = "threshold" in raw;
 	return _cachedConfig;
 }
@@ -30,11 +30,38 @@ export function loadConfig() {
 //   1. Explicit max_tokens in config (covers most cases)
 //   2. Safe default (200K)
 //
-// The submit hook detects max_tokens from the model name in the transcript
-// (getTokenUsage in tokens.mjs). This config value is the initial fallback
-// before any assistant response provides real model info.
+// The statusline writes the authoritative context_window_size to the
+// per-session state file. This config value is the fallback before the
+// statusline has fired.
 // ---------------------------------------------------------------------------
 export function resolveMaxTokens() {
 	const cfg = loadConfig();
 	return cfg.max_tokens ?? 200000;
 }
+// ---------------------------------------------------------------------------
+// Adaptive threshold — scales with context window size.
+//
+// Context rot research shows quality degrades measurably at 80-150K tokens
+// regardless of window size. A 200K window needs a higher threshold (alert
+// earlier as a %) because system overhead eats a large share. A 1M window
+// needs a lower threshold so the alert fires before quality degrades.
+//
+//   200K → 55%  (alert at 110K tokens)
+//   500K → 46%  (alert at 230K tokens)
+//     1M → 30%  (alert at 300K tokens)
+//
+// If the user explicitly set a threshold via /cg:config, that wins.
+// ---------------------------------------------------------------------------
+export function adaptiveThreshold(maxTokens) {
+	const cfg = loadConfig();
+	if (cfg._thresholdExplicit) return cfg.threshold;
+	return computeAdaptiveThreshold(maxTokens ?? cfg.max_tokens ?? 200000);
+}
+export function computeAdaptiveThreshold(maxTokens) {
+	return Math.min(
+		0.55,
+		Math.max(0.25, 0.55 - ((maxTokens - 200000) * 0.25) / 800000),
+	);
+}

package/lib/handoff.mjs CHANGED Viewed

@@ -13,7 +13,7 @@ import fs from "node:fs";
 import path from "node:path";
 import { resolveMaxTokens } from "./config.mjs";
 import { log } from "./logger.mjs";
-import { stateFile } from "./paths.mjs";
+import { stateFile, statuslineStateFile } from "./paths.mjs";
 import { estimateOverhead, getTokenUsage } from "./tokens.mjs";
 import { extractConversation } from "./transcript.mjs";
@@ -71,7 +71,17 @@ export function performHandoff({ transcriptPath, sessionId, label = "" }) {
 	const usage = getTokenUsage(transcriptPath);
 	const preTokens =
 		usage?.current_tokens || Math.round(Buffer.byteLength(content, "utf8") / 4);
-	const maxTokens = usage?.max_tokens || resolveMaxTokens() || 200000;
+	// Read authoritative context_window_size from statusline state file.
+	let ccContextWindowSize = null;
+	try {
+		const slFile = statuslineStateFile(sessionId);
+		if (fs.existsSync(slFile)) {
+			const slState = JSON.parse(fs.readFileSync(slFile, "utf8"));
+			ccContextWindowSize = slState.context_window_size ?? null;
+		}
+	} catch {}
+	const maxTokens = ccContextWindowSize || resolveMaxTokens() || 200000;
 	const postTokens = Math.round(Buffer.byteLength(fullContent, "utf8") / 4);
 	let baselineOverhead = 0;