@ricky-stevens/context-guardian 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +29 -0
- package/.claude-plugin/plugin.json +63 -0
- package/.github/workflows/ci.yml +66 -0
- package/CLAUDE.md +132 -0
- package/LICENSE +21 -0
- package/README.md +362 -0
- package/biome.json +34 -0
- package/bun.lock +31 -0
- package/hooks/precompact.mjs +73 -0
- package/hooks/session-start.mjs +133 -0
- package/hooks/stop.mjs +172 -0
- package/hooks/submit.mjs +133 -0
- package/lib/checkpoint.mjs +258 -0
- package/lib/compact-cli.mjs +124 -0
- package/lib/compact-output.mjs +350 -0
- package/lib/config.mjs +40 -0
- package/lib/content.mjs +33 -0
- package/lib/diagnostics.mjs +221 -0
- package/lib/estimate.mjs +254 -0
- package/lib/extract-helpers.mjs +869 -0
- package/lib/handoff.mjs +329 -0
- package/lib/logger.mjs +34 -0
- package/lib/mcp-tools.mjs +200 -0
- package/lib/paths.mjs +90 -0
- package/lib/stats.mjs +81 -0
- package/lib/statusline.mjs +123 -0
- package/lib/synthetic-session.mjs +273 -0
- package/lib/tokens.mjs +170 -0
- package/lib/tool-summary.mjs +399 -0
- package/lib/transcript.mjs +939 -0
- package/lib/trim.mjs +158 -0
- package/package.json +22 -0
- package/skills/compact/SKILL.md +20 -0
- package/skills/config/SKILL.md +70 -0
- package/skills/handoff/SKILL.md +26 -0
- package/skills/prune/SKILL.md +20 -0
- package/skills/stats/SKILL.md +100 -0
- package/sonar-project.properties +12 -0
- package/test/checkpoint.test.mjs +171 -0
- package/test/compact-cli.test.mjs +230 -0
- package/test/compact-output.test.mjs +284 -0
- package/test/compaction-e2e.test.mjs +809 -0
- package/test/content.test.mjs +86 -0
- package/test/diagnostics.test.mjs +188 -0
- package/test/edge-cases.test.mjs +543 -0
- package/test/estimate.test.mjs +262 -0
- package/test/extract-helpers-coverage.test.mjs +333 -0
- package/test/extract-helpers.test.mjs +234 -0
- package/test/handoff.test.mjs +738 -0
- package/test/integration.test.mjs +582 -0
- package/test/logger.test.mjs +70 -0
- package/test/manual-compaction-test.md +426 -0
- package/test/mcp-tools.test.mjs +443 -0
- package/test/paths.test.mjs +250 -0
- package/test/quick-compaction-test.md +191 -0
- package/test/stats.test.mjs +88 -0
- package/test/statusline.test.mjs +222 -0
- package/test/submit.test.mjs +232 -0
- package/test/synthetic-session.test.mjs +600 -0
- package/test/tokens.test.mjs +293 -0
- package/test/tool-summary.test.mjs +771 -0
- package/test/transcript-coverage.test.mjs +369 -0
- package/test/transcript.test.mjs +596 -0
- package/test/trim.test.mjs +356 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "context-guardian",
|
|
3
|
+
"owner": {
|
|
4
|
+
"name": "Ricky Stevens",
|
|
5
|
+
"email": ""
|
|
6
|
+
},
|
|
7
|
+
"plugins": [
|
|
8
|
+
{
|
|
9
|
+
"name": "cg",
|
|
10
|
+
"source": "./",
|
|
11
|
+
"description": "Automatic context window monitoring and smart compaction for Claude Code",
|
|
12
|
+
"version": "2.1.0",
|
|
13
|
+
"author": {
|
|
14
|
+
"name": "Ricky"
|
|
15
|
+
},
|
|
16
|
+
"homepage": "https://github.com/Ricky-Stevens/context-guardian",
|
|
17
|
+
"repository": "https://github.com/Ricky-Stevens/context-guardian",
|
|
18
|
+
"license": "MIT",
|
|
19
|
+
"keywords": [
|
|
20
|
+
"context",
|
|
21
|
+
"tokens",
|
|
22
|
+
"compaction",
|
|
23
|
+
"memory",
|
|
24
|
+
"context-window"
|
|
25
|
+
],
|
|
26
|
+
"category": "productivity"
|
|
27
|
+
}
|
|
28
|
+
]
|
|
29
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "cg",
|
|
3
|
+
"version": "2.1.0",
|
|
4
|
+
"description": "Automatic context window monitoring and smart compaction for Claude Code",
|
|
5
|
+
"author": {
|
|
6
|
+
"name": "Ricky Stevens",
|
|
7
|
+
"url": "https://github.com/Ricky-Stevens"
|
|
8
|
+
},
|
|
9
|
+
"repository": "https://github.com/Ricky-Stevens/context-guardian",
|
|
10
|
+
"license": "MIT",
|
|
11
|
+
"keywords": ["context", "tokens", "compaction", "memory", "context-window"],
|
|
12
|
+
"category": "productivity",
|
|
13
|
+
"tags": ["context-management", "smart-compact", "token-monitoring"],
|
|
14
|
+
|
|
15
|
+
"skills": "./skills/",
|
|
16
|
+
|
|
17
|
+
"hooks": {
|
|
18
|
+
"SessionStart": [
|
|
19
|
+
{
|
|
20
|
+
"matcher": "",
|
|
21
|
+
"hooks": [
|
|
22
|
+
{
|
|
23
|
+
"type": "command",
|
|
24
|
+
"command": "node ${CLAUDE_PLUGIN_ROOT}/hooks/session-start.mjs"
|
|
25
|
+
}
|
|
26
|
+
]
|
|
27
|
+
}
|
|
28
|
+
],
|
|
29
|
+
"UserPromptSubmit": [
|
|
30
|
+
{
|
|
31
|
+
"matcher": "",
|
|
32
|
+
"hooks": [
|
|
33
|
+
{
|
|
34
|
+
"type": "command",
|
|
35
|
+
"command": "node ${CLAUDE_PLUGIN_ROOT}/hooks/submit.mjs"
|
|
36
|
+
}
|
|
37
|
+
]
|
|
38
|
+
}
|
|
39
|
+
],
|
|
40
|
+
"PreCompact": [
|
|
41
|
+
{
|
|
42
|
+
"matcher": "",
|
|
43
|
+
"hooks": [
|
|
44
|
+
{
|
|
45
|
+
"type": "command",
|
|
46
|
+
"command": "node ${CLAUDE_PLUGIN_ROOT}/hooks/precompact.mjs"
|
|
47
|
+
}
|
|
48
|
+
]
|
|
49
|
+
}
|
|
50
|
+
],
|
|
51
|
+
"Stop": [
|
|
52
|
+
{
|
|
53
|
+
"matcher": "",
|
|
54
|
+
"hooks": [
|
|
55
|
+
{
|
|
56
|
+
"type": "command",
|
|
57
|
+
"command": "node ${CLAUDE_PLUGIN_ROOT}/hooks/stop.mjs"
|
|
58
|
+
}
|
|
59
|
+
]
|
|
60
|
+
}
|
|
61
|
+
]
|
|
62
|
+
}
|
|
63
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
env:
|
|
10
|
+
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
lint:
|
|
14
|
+
name: Lint
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
- uses: oven-sh/setup-bun@v2
|
|
19
|
+
- run: bun install --frozen-lockfile
|
|
20
|
+
- run: bunx biome check hooks/ lib/
|
|
21
|
+
|
|
22
|
+
test:
|
|
23
|
+
name: Test (Bun)
|
|
24
|
+
runs-on: ubuntu-latest
|
|
25
|
+
steps:
|
|
26
|
+
- uses: actions/checkout@v4
|
|
27
|
+
- uses: oven-sh/setup-bun@v2
|
|
28
|
+
- run: bun install --frozen-lockfile
|
|
29
|
+
- run: bun test
|
|
30
|
+
- name: Generate coverage
|
|
31
|
+
run: bun test --coverage --coverage-reporter=lcov --coverage-dir=coverage
|
|
32
|
+
- uses: actions/upload-artifact@v4
|
|
33
|
+
with:
|
|
34
|
+
name: coverage
|
|
35
|
+
path: coverage/lcov.info
|
|
36
|
+
|
|
37
|
+
test-node:
|
|
38
|
+
name: Test (Node ${{ matrix.node-version }})
|
|
39
|
+
runs-on: ubuntu-latest
|
|
40
|
+
strategy:
|
|
41
|
+
matrix:
|
|
42
|
+
node-version: [20, 22, 24]
|
|
43
|
+
steps:
|
|
44
|
+
- uses: actions/checkout@v4
|
|
45
|
+
- uses: actions/setup-node@v4
|
|
46
|
+
with:
|
|
47
|
+
node-version: ${{ matrix.node-version }}
|
|
48
|
+
- uses: oven-sh/setup-bun@v2
|
|
49
|
+
- run: bun install --frozen-lockfile
|
|
50
|
+
- run: node --test test/*.test.mjs
|
|
51
|
+
|
|
52
|
+
sonarcloud:
|
|
53
|
+
name: SonarCloud Analysis
|
|
54
|
+
runs-on: ubuntu-latest
|
|
55
|
+
needs: test
|
|
56
|
+
steps:
|
|
57
|
+
- uses: actions/checkout@v4
|
|
58
|
+
with:
|
|
59
|
+
fetch-depth: 0
|
|
60
|
+
- uses: actions/download-artifact@v4
|
|
61
|
+
with:
|
|
62
|
+
name: coverage
|
|
63
|
+
path: coverage
|
|
64
|
+
- uses: SonarSource/sonarqube-scan-action@v6
|
|
65
|
+
env:
|
|
66
|
+
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
package/CLAUDE.md
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# Context Guardian
|
|
2
|
+
|
|
3
|
+
A Claude Code **plugin** — four hooks + five skills + a shared library. Monitors context window usage via a real-time statusline and provides on-demand compaction tools.
|
|
4
|
+
|
|
5
|
+
## Critical Rules
|
|
6
|
+
|
|
7
|
+
### Versioning — bump ALL FOUR files or marketplace updates break:
|
|
8
|
+
1. `package.json` → `"version"`
|
|
9
|
+
2. `.claude-plugin/plugin.json` → `"version"`
|
|
10
|
+
3. `.claude-plugin/marketplace.json` → `plugins[0].version`
|
|
11
|
+
4. `README.md` → `[![Version]()`
|
|
12
|
+
|
|
13
|
+
### Key Conventions
|
|
14
|
+
- Session flags (`.claude/cg-*`) live in the **project's** `.claude/` dir, not plugin data — they're project-scoped and cleaned by SessionStart.
|
|
15
|
+
- `.context-guardian/` at the project root holds user-visible artifacts (handoffs, checkpoint copies). Project-scoped, gitignored.
|
|
16
|
+
- `${CLAUDE_PLUGIN_DATA}` (fallback `~/.claude/cg/`) holds plugin-internal state (config, session state, checkpoints, synthetic session manifest).
|
|
17
|
+
- Skills invoke CLI entry points (`compact-cli.mjs`) via Bash because skills don't fire `UserPromptSubmit`.
|
|
18
|
+
- Compaction and handoff automatically create synthetic JSONL sessions so `/resume cg` or `/resume cg:{label}` loads checkpoints as real conversation messages.
|
|
19
|
+
|
|
20
|
+
## Statusline — Primary UX
|
|
21
|
+
|
|
22
|
+
The statusline is CG's main communication channel. It shows real-time context usage and session size in the terminal status bar:
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
Context usage: 3% | Session size: 0.4/20MB | /cg:stats for more
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
**Two independent metrics, two independent color schemes:**
|
|
29
|
+
|
|
30
|
+
| Metric | Green | Yellow | Red |
|
|
31
|
+
|--------|-------|--------|-----|
|
|
32
|
+
| Context usage | `pct < threshold × 0.7` | `pct < threshold` | `pct >= threshold` |
|
|
33
|
+
| Session size | `< 10MB` | `10–15MB` | `>= 15MB` |
|
|
34
|
+
|
|
35
|
+
- **Green/Yellow:** labels are dim/grey, only the numbers are colored
|
|
36
|
+
- **Red:** entire label+number goes bold red for maximum visibility
|
|
37
|
+
|
|
38
|
+
**Session size** is the estimated API request payload — transcript file size + system overhead (baseline_overhead × 4). This is separate from the token context window. The ~20MB API payload limit can lock users out entirely (can't send messages, can't even compact). The statusline warns before that happens.
|
|
39
|
+
|
|
40
|
+
At threshold, the trailing hint changes to: `compaction recommended — /cg:compact`
|
|
41
|
+
|
|
42
|
+
The session-start hook auto-configures the statusline and **reclaims it** if overwritten by another tool. The diagnostics check flags a missing CG statusline as a failure.
|
|
43
|
+
|
|
44
|
+
## How The Submit Hook Works
|
|
45
|
+
|
|
46
|
+
1. Every user message → submit hook reads real token counts from `message.usage` in the transcript JSONL. Falls back to byte estimation only on the very first message.
|
|
47
|
+
2. Measures transcript file size (`fs.statSync`) as `payload_bytes` — proxy for the API request payload size.
|
|
48
|
+
3. Writes token state + payload bytes to `state-{sessionId}.json` — consumed by `/cg:stats` and the statusline.
|
|
49
|
+
4. Also writes state to the fixed fallback location (`~/.claude/cg/`) so the statusline can find it (the statusline process doesn't receive `CLAUDE_PLUGIN_DATA`).
|
|
50
|
+
5. Handles manual compaction (`/cg:compact`, `/cg:prune`) via flag files or direct command detection.
|
|
51
|
+
6. `/` messages bypass the hook entirely.
|
|
52
|
+
|
|
53
|
+
No blocking, no menus, no cooldowns. The statusline handles all context pressure communication.
|
|
54
|
+
|
|
55
|
+
## Compaction Design
|
|
56
|
+
|
|
57
|
+
**Noise removal, not summarisation. Never lose context. No LLM involved — deterministic string processing.**
|
|
58
|
+
|
|
59
|
+
### What stays (decision-relevant)
|
|
60
|
+
- All user text (except affirmative confirmations like "yes", "ok")
|
|
61
|
+
- All assistant reasoning text
|
|
62
|
+
- Edit/Write diffs (start+end trim if >3K)
|
|
63
|
+
- Bash commands + output (start+end trim if >5K)
|
|
64
|
+
- AskUserQuestion answers, WebSearch results, Serena write tools, sequential thinking, agent results, all errors
|
|
65
|
+
|
|
66
|
+
### What's removed (re-obtainable / noise)
|
|
67
|
+
- File read results (Read/Grep/Glob) — dominant bloat, 30-50% of tokens
|
|
68
|
+
- Thinking/redacted_thinking blocks
|
|
69
|
+
- System/progress messages, Edit/Write success results
|
|
70
|
+
- Serena read/query results, context-mode results, Serena memory results
|
|
71
|
+
|
|
72
|
+
### Truncation
|
|
73
|
+
Never chop at a point. Start+end trim: keep first N chars (intent) + last N chars (outcome), mark middle with `[...N chars trimmed from middle...]`.
|
|
74
|
+
|
|
75
|
+
### Skip rules for user messages
|
|
76
|
+
- Slash commands, CG menu replies (0-4, cancel), compact markers, affirmative confirmations, system injections → skip
|
|
77
|
+
- "no", "n", bare numbers → KEEP (decisions)
|
|
78
|
+
- Long structured user messages → KEEP regardless of size
|
|
79
|
+
|
|
80
|
+
### Smart Compact vs Keep Recent
|
|
81
|
+
- Smart Compact: all messages after last compaction boundary, with tiered compression and edit coalescing
|
|
82
|
+
- Keep Recent: last 10 user exchanges (grouped with responses), same extraction engine
|
|
83
|
+
|
|
84
|
+
## Session Handoff & Resume
|
|
85
|
+
|
|
86
|
+
- `/cg:handoff [name]` → extracts conversation (same as Smart Compact), writes to `.context-guardian/cg-handoff-{slug}-{datetime}.md`
|
|
87
|
+
- Both `/cg:compact` and `/cg:handoff` automatically create synthetic JSONL sessions in Claude Code's session directory
|
|
88
|
+
- User restores via native `/resume cg:{hash}` (for compaction) or `/resume cg:{label}` (for handoff)
|
|
89
|
+
- No custom resume skill — leverages Claude Code's built-in `/resume` which calls `setMessages()` to replace the conversation
|
|
90
|
+
- The synthetic session contains the checkpoint as a real user message (not `additionalContext`), giving higher attention fidelity
|
|
91
|
+
- A manifest (`synthetic-sessions.json` in plugin data dir) tracks one synthetic session per title, cleaning up the previous one on each write
|
|
92
|
+
- Compaction checkpoints are also copied to `.context-guardian/cg-checkpoint-*.md` for user visibility
|
|
93
|
+
- `rotateFiles` sorts by mtime (not filename) because label-prefixed filenames break alphabetical chronological ordering
|
|
94
|
+
|
|
95
|
+
## Token Counting
|
|
96
|
+
|
|
97
|
+
1. **Real counts (preferred):** `input_tokens + cache_creation_input_tokens + cache_read_input_tokens` from `message.usage` in transcript JSONL. Written by both submit and stop hooks.
|
|
98
|
+
2. **Byte estimation (fallback):** First message only. Content bytes / 4.
|
|
99
|
+
3. **Baseline overhead:** Stop hook captures on first response — irreducible floor (system prompts, tools, CLAUDE.md). Used in all savings estimates and session size calculation.
|
|
100
|
+
|
|
101
|
+
## Session Size (API Payload Monitoring)
|
|
102
|
+
|
|
103
|
+
The ~20MB API payload limit is **separate from the token context window**. When the raw request body exceeds ~20MB, the API rejects it entirely — you can't send messages, can't compact, can't do anything except `/clear`.
|
|
104
|
+
|
|
105
|
+
Session size = `transcript file size` + `baseline_overhead × 4` (system overhead in bytes). This is tracked in `payload_bytes` in the state file, displayed in the statusline and `/cg:stats`, and shown as before/after in compaction results.
|
|
106
|
+
|
|
107
|
+
The statusline reads session size from a fixed fallback location (`~/.claude/cg/state-*.json`) because the statusline process doesn't receive `CLAUDE_PLUGIN_DATA`. Both hooks write to this fallback in addition to the primary data dir.
|
|
108
|
+
|
|
109
|
+
## Transcript JSONL Format
|
|
110
|
+
|
|
111
|
+
Each line is JSON. Types: `user`, `assistant`, `system`, `progress`. User messages have `message.role === "user"` and `message.content` (string or array of text/tool_result/image/document blocks). Assistant messages have content arrays (text/tool_use/thinking blocks). Tool results link back via `tool_use_id`. The extraction engine uses a `Map<tool_use_id, {name, input}>` to classify results by originating tool.
|
|
112
|
+
|
|
113
|
+
## Testing
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
bun test # all tests
|
|
117
|
+
bun test test/handoff.test.mjs # handoff/resume tests only
|
|
118
|
+
tail -f ~/.claude/logs/cg.log # watch hook activity
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Testing
|
|
122
|
+
1. All tests must pass before recommending a push
|
|
123
|
+
2. Code must remain above 80% code coverage on lines, functions, statements and branches
|
|
124
|
+
3. Biome linting must pass before recommending a push
|
|
125
|
+
|
|
126
|
+
## SonarQube Quality Gate
|
|
127
|
+
|
|
128
|
+
1. Before recommending a push, run `sonar-scanner` using `source .env.local`.
|
|
129
|
+
2. After scanning, use SonarQube MCP tools to check Quality Gate status.
|
|
130
|
+
3. If issues are flagged, fix holistically (not one rule at a time) and re-scan. Max 3 fix-scan cycles.
|
|
131
|
+
4. If issues persist after 3 cycles, stop and report remaining issues with analysis.
|
|
132
|
+
5. Only recommend pushing when the Quality Gate PASSES.
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ricky
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
# Context Guardian
|
|
2
|
+
|
|
3
|
+
[](https://github.com/Ricky-Stevens/context-guardian/actions/workflows/ci.yml)
|
|
4
|
+
[](https://github.com/Ricky-Stevens/context-guardian/releases)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://sonarcloud.io/summary/new_code?id=Ricky-Stevens_context-guardian)
|
|
7
|
+
[](https://sonarcloud.io/summary/new_code?id=Ricky-Stevens_context-guardian)
|
|
8
|
+
[](https://nodejs.org)
|
|
9
|
+
|
|
10
|
+
**Automatic context window monitoring and smart compaction for Claude Code. Zero dependencies.**
|
|
11
|
+
|
|
12
|
+
Context Guardian watches your context window usage in real time via a statusline and provides on-demand compaction tools. When usage crosses a configurable threshold, the statusline turns red and recommends compaction - preserving your work and keeping Claude sharp.
|
|
13
|
+
|
|
14
|
+
Distributed as a **Claude Code plugin** - it's called "cg" due to how Claude Code namespaces skills. `/cg:stats` is easier to type than `/context-guardian:stats`.
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
/plugin marketplace add https://github.com/Ricky-Stevens/context-guardian
|
|
22
|
+
/plugin install cg
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
**Note:** Claude's `/reload-plugins` can be a bit sketchy - try opening a new session if you hit issues.
|
|
26
|
+
|
|
27
|
+
### Update
|
|
28
|
+
|
|
29
|
+
To pull the latest version:
|
|
30
|
+
|
|
31
|
+
1. Open `/plugins`
|
|
32
|
+
2. Go to **Marketplaces** tab → select the context-guardian marketplace → **Update marketplace**
|
|
33
|
+
3. Inside the marketplace, go to **Browse Plugins** → select cg → **Update**
|
|
34
|
+
4. Run `/reload-plugins` or restart your session
|
|
35
|
+
|
|
36
|
+
### Uninstall
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
/plugin uninstall cg
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Local development
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
claude --plugin-dir /path/to/cg
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Commands
|
|
51
|
+
|
|
52
|
+
Context Guardian adds five slash commands:
|
|
53
|
+
|
|
54
|
+
### `/cg:stats`
|
|
55
|
+
|
|
56
|
+
Shows current token usage, session size, compaction estimates, and recommendations.
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
┌─────────────────────────────────────────────────
|
|
60
|
+
│ Context Guardian Stats
|
|
61
|
+
│
|
|
62
|
+
│ Current usage: 372,000 / 1,000,000 tokens (37.2%)
|
|
63
|
+
│ Session size: 8.4MB / 20MB
|
|
64
|
+
│ Threshold: 35% (0% remaining to warning)
|
|
65
|
+
│ Data source: real counts
|
|
66
|
+
│
|
|
67
|
+
│ Model: claude-opus-4-6 / 1,000,000 tokens
|
|
68
|
+
│ Last updated: 12 seconds ago
|
|
69
|
+
│
|
|
70
|
+
│ /cg:compact ~37.2% → ~5%
|
|
71
|
+
│ /cg:prune ~37.2% → ~3%
|
|
72
|
+
│
|
|
73
|
+
│ /cg:handoff [name] save session for later
|
|
74
|
+
│
|
|
75
|
+
└─────────────────────────────────────────────────
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### `/cg:config`
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
/cg:config # show current config + auto-detected model/limit
|
|
82
|
+
/cg:config threshold 0.50 # trigger at 50%
|
|
83
|
+
/cg:config max_tokens 1000000 # override token limit
|
|
84
|
+
/cg:config reset # restore defaults
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### `/cg:compact`
|
|
88
|
+
|
|
89
|
+
Runs Smart Compact - a deterministic extraction engine that removes re-obtainable noise (file reads, grep results, thinking blocks, system messages) while preserving everything that matters: user messages, assistant reasoning, edit diffs, bash commands and output, user decisions, and errors. Typically achieves 70-90% reduction.
|
|
90
|
+
|
|
91
|
+
After compaction, use `/resume cg:[4-char hash]` to restore the checkpoint.
|
|
92
|
+
|
|
93
|
+
### `/cg:prune`
|
|
94
|
+
|
|
95
|
+
Keeps the last 10 user exchanges (each grouped with their assistant responses and tool summaries). Uses the same extraction engine as Smart Compact. Good when only recent work matters.
|
|
96
|
+
|
|
97
|
+
After pruning, use `/resume cg:[4-char hash]` to restore.
|
|
98
|
+
|
|
99
|
+
### `/cg:handoff`
|
|
100
|
+
|
|
101
|
+
Save your current session context for cross-session continuity. Uses the same extraction engine as Smart Compact.
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
/cg:handoff # save without a label
|
|
105
|
+
/cg:handoff my auth refactor # save with a custom name
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Handoff files are saved to `.context-guardian/` in your project root. Add this directory to your `.gitignore`.
|
|
109
|
+
|
|
110
|
+
To restore a handoff in a future session:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
/resume cg:my-auth-refactor # restore a specific handoff by label
|
|
114
|
+
/resume # browse all sessions including CG handoffs
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## The Problem: Context Rot
|
|
120
|
+
|
|
121
|
+
LLMs have a fixed context window - the total amount of text they can "see" at once. Claude Code sessions accumulate context rapidly: every message you send, every file Claude reads, every tool call and its output, every thinking block - it all stacks up.
|
|
122
|
+
|
|
123
|
+
When the context window fills:
|
|
124
|
+
|
|
125
|
+
- **The U-Shape.** Models perform best with information at the beginning or end of context. As the prompt grows, the middle gets less attention.
|
|
126
|
+
- **Claude starts forgetting.** Earlier instructions, architectural decisions, and code context silently drop out of the effective attention window. Claude doesn't tell you it's forgotten - it just stops using that information.
|
|
127
|
+
- **Quality degrades gradually.** You won't get an error. Responses become less coherent, less grounded in your codebase, and more likely to hallucinate.
|
|
128
|
+
- **Native `/compact` is destructive.** When Claude Code hits ~95% usage, it summarizes everything into a brief paragraph, destroying the accumulated context.
|
|
129
|
+
- **The 20MB wall.** Separately from the token limit, the API has a ~20MB request payload size limit. When your session's raw data exceeds this, the API rejects the request entirely — you can't send messages, can't compact, can't do anything except `/clear` and lose everything. Context Guardian tracks session size alongside token usage to warn you before you hit this hard wall.
|
|
130
|
+
|
|
131
|
+
Context rot is insidious because **it's invisible**. You don't know Claude has forgotten something until the output is wrong.
|
|
132
|
+
|
|
133
|
+
### Why This Matters More on Opus 4.6
|
|
134
|
+
|
|
135
|
+
Opus 4.6 has a **1,000,000 token context window** - 5x larger than the previous 200K. This sounds like a pure advantage, but it creates a unique problem:
|
|
136
|
+
|
|
137
|
+
- **Sessions last longer.** With 1M tokens, you can work for hours without hitting the limit. This means more accumulated context, more tool outputs, more file reads - and more opportunity for subtle quality degradation.
|
|
138
|
+
- **The degradation is slower but deeper.** On Sonnet, you hit the wall at 200K and are forced to compact relatively quickly. On Opus, you can drift into the 40-60% range where quality is noticeably degraded but you haven't hit any hard limit.
|
|
139
|
+
- **Cost scales with context.** Every API call sends the full context window. At 500K tokens, each message costs significantly more than at 50K. Compacting early saves money.
|
|
140
|
+
- **Compaction quality depends on what's in context.** At 35%, Claude's full conversation is in sharp focus - it can produce a high-fidelity extraction. At 70%, earlier context is already fuzzy.
|
|
141
|
+
|
|
142
|
+
The 1M window is powerful, but it requires active management. Context Guardian provides that management.
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## Why 35%?
|
|
147
|
+
|
|
148
|
+
Context Guardian triggers at **35% usage** by default. This is deliberately conservative.
|
|
149
|
+
|
|
150
|
+
### The Sweet Spot for Model Recall
|
|
151
|
+
|
|
152
|
+
[Research](https://news.mit.edu/2025/unpacking-large-language-model-bias-0617) on LLM attention patterns shows that models have a **U-shaped attention curve** - they attend strongly to the beginning and end of context, with weaker attention in the middle. As context grows:
|
|
153
|
+
|
|
154
|
+
| Usage Range | Model Behavior |
|
|
155
|
+
|-------------|---------------|
|
|
156
|
+
| **0-25%** | Full attention across all content. Maximum recall and coherence. |
|
|
157
|
+
| **25-40%** | Still strong. The "middle" is small enough that attention covers it well. |
|
|
158
|
+
| **40-60%** | Noticeable degradation. Middle content gets less attention. Early instructions may be partially forgotten. |
|
|
159
|
+
| **60-80%** | Significant degradation. Claude may contradict earlier decisions, forget constraints, or hallucinate details about code it read earlier. |
|
|
160
|
+
| **80-95%** | Critical zone. Effective context is much smaller than the raw number suggests. |
|
|
161
|
+
| **95%+** | Emergency auto-compact fires. Everything reduced to a brief summary. |
|
|
162
|
+
|
|
163
|
+
**35% sits at the boundary between "full recall" and "beginning to degrade."** It's the last point where you can extract with full confidence that the output will be accurate, because Claude still has strong attention over the entire conversation.
|
|
164
|
+
|
|
165
|
+
### What Actually Fills the Context
|
|
166
|
+
|
|
167
|
+
In a typical Claude Code session, your actual conversation - what you typed and what Claude replied - is only **30-40% of total context**. The rest is:
|
|
168
|
+
|
|
169
|
+
- **Tool outputs (40-50%):** File reads, grep results, command output. A single large file read can consume 10-20K tokens.
|
|
170
|
+
- **System prompts (~5%):** CLAUDE.md, plugin instructions, skill descriptions, MCP server configs.
|
|
171
|
+
- **Tool calls and thinking (10-15%):** The structured blocks that Claude generates internally.
|
|
172
|
+
|
|
173
|
+
Smart Compact strips the re-obtainable noise and keeps the decision-relevant content. That's why it typically achieves **70-90% reduction** - most of the context is tool infrastructure, not your actual work.
|
|
174
|
+
|
|
175
|
+
## How It Works
|
|
176
|
+
|
|
177
|
+
### Compaction Engine
|
|
178
|
+
|
|
179
|
+
Context Guardian uses a **deterministic string-processing engine** - no LLM is involved in extraction. It removes re-obtainable and disposable data while keeping all decision-relevant content at full fidelity.
|
|
180
|
+
|
|
181
|
+
**What stays (decision-relevant):**
|
|
182
|
+
- All user text messages (except affirmative confirmations like "yes", "ok")
|
|
183
|
+
- All assistant reasoning text
|
|
184
|
+
- Edit/Write diffs (start+end trimmed if >3K chars)
|
|
185
|
+
- Bash commands and output (start+end trimmed if >5K chars)
|
|
186
|
+
- User answers to questions (AskUserQuestion results)
|
|
187
|
+
- Web search results, Serena write operations, sequential thinking chains
|
|
188
|
+
- Agent results, all error responses
|
|
189
|
+
|
|
190
|
+
**What's removed (re-obtainable / noise):**
|
|
191
|
+
- File read results (Read, Grep, Glob) - the dominant bloat at 30-50% of tokens
|
|
192
|
+
- Thinking and redacted_thinking blocks
|
|
193
|
+
- System and progress messages
|
|
194
|
+
- Edit/Write success confirmations (just "success")
|
|
195
|
+
- Serena read/query results, context-mode results
|
|
196
|
+
|
|
197
|
+
**Truncation:** When content exceeds its size limit, it's never chopped at a point. Start+end trim keeps the first N chars (intent) and last N chars (outcome), replacing only the middle with `[...N chars trimmed from middle...]`. This preserves the narrative thread because conclusions appear at the end.
|
|
198
|
+
|
|
199
|
+
### Token Counting
|
|
200
|
+
|
|
201
|
+
Context Guardian reads **actual token counts** from Claude Code's transcript. Every assistant message includes a `usage` object:
|
|
202
|
+
|
|
203
|
+
```json
|
|
204
|
+
{
|
|
205
|
+
"usage": {
|
|
206
|
+
"input_tokens": 3,
|
|
207
|
+
"cache_creation_input_tokens": 920,
|
|
208
|
+
"cache_read_input_tokens": 133868,
|
|
209
|
+
"output_tokens": 85
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
**Context used = `input_tokens` + `cache_creation_input_tokens` + `cache_read_input_tokens`**
|
|
215
|
+
|
|
216
|
+
These are the real values from the Anthropic API - the same numbers that determine your bill. On the first message of a session (before any assistant response exists), the plugin falls back to a byte-based estimate until real data is available.
|
|
217
|
+
|
|
218
|
+
### How Restore Works
|
|
219
|
+
|
|
220
|
+
After compaction or handoff, Context Guardian writes a **synthetic JSONL session** to Claude Code's session directory. This session contains the checkpoint as a real user message with a custom title (e.g., `cg` or `cg:my-feature`).
|
|
221
|
+
|
|
222
|
+
When you type `/resume cg:{hash}`, Claude Code's native resume mechanism finds and loads this synthetic sessions, replacing the current conversation with the checkpoint content. Because it's a real user message (not injected context), the model gives it full attention.
|
|
223
|
+
|
|
224
|
+
The flow:
|
|
225
|
+
1. Run `/cg:compact`, `/cg:prune`, or `/cg:handoff [name]` - checkpoint saved + synthetic session created
|
|
226
|
+
2. Type `/resume cg:{hash}` (or `/resume cg:{label}` for handoffs) - context restored
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## Architecture
|
|
231
|
+
|
|
232
|
+
### Hooks
|
|
233
|
+
|
|
234
|
+
| Hook | Event | Purpose |
|
|
235
|
+
|------|-------|---------|
|
|
236
|
+
| `submit.mjs` | `UserPromptSubmit` | Writes token usage + payload size state for statusline and `/cg:stats` on every user message |
|
|
237
|
+
| `session-start.mjs` | `SessionStart` | Cleans stale session flags, auto-configures statusline, self-healing marketplace clone |
|
|
238
|
+
| `stop.mjs` | `Stop` | Writes fresh token state after each assistant response. Captures baseline overhead on first response. |
|
|
239
|
+
| `precompact.mjs` | `PreCompact` | Injects CG's extraction as additional context into Claude Code's native `/compact` |
|
|
240
|
+
|
|
241
|
+
### Skills
|
|
242
|
+
|
|
243
|
+
Skills invoke `compact-cli.mjs` via Bash (since skills don't fire `UserPromptSubmit`). The CLI sets `CLAUDE_PLUGIN_DATA`, runs the extraction pipeline, and outputs JSON for the skill to display.
|
|
244
|
+
|
|
245
|
+
| Skill | Entry Point |
|
|
246
|
+
|-------|-------------|
|
|
247
|
+
| `/cg:stats` | `lib/diagnostics.mjs` (health checks) + state file read |
|
|
248
|
+
| `/cg:config` | Direct config file read/write |
|
|
249
|
+
| `/cg:compact` | `lib/compact-cli.mjs smart` → `checkpoint.mjs:performCompaction()` |
|
|
250
|
+
| `/cg:prune` | `lib/compact-cli.mjs recent` → `checkpoint.mjs:performCompaction()` |
|
|
251
|
+
| `/cg:handoff` | `lib/compact-cli.mjs handoff` → `handoff.mjs:performHandoff()` |
|
|
252
|
+
|
|
253
|
+
### Token Counting
|
|
254
|
+
|
|
255
|
+
Two methods, preferring the more accurate. State is written by **both** the submit hook (before the response) and the stop hook (after the response), so `/cg:stats` always reflects the latest counts.
|
|
256
|
+
|
|
257
|
+
1. **Real counts (preferred):** Reads `message.usage` from the most recent assistant message in the transcript JSONL. Calculates `input_tokens + cache_creation_input_tokens + cache_read_input_tokens`. Also detects the model name for auto-detecting max_tokens.
|
|
258
|
+
|
|
259
|
+
2. **Byte estimation (fallback):** Only used on the very first message of a session (before any assistant response). Counts content bytes after the most recent compact marker and divides by 4.
|
|
260
|
+
|
|
261
|
+
3. **Post-compaction estimates:** After compaction or checkpoint restore, a state file is written with estimated post-compaction token counts so `/cg:stats` works immediately.
|
|
262
|
+
|
|
263
|
+
### Baseline Overhead
|
|
264
|
+
|
|
265
|
+
On the first assistant response of each session, the stop hook captures the current token count as `baseline_overhead` - at that point, context is almost entirely system prompts, CLAUDE.md, and tool definitions. This measured value serves as an irreducible floor in all compaction savings estimates.
|
|
266
|
+
|
|
267
|
+
### Statusline
|
|
268
|
+
|
|
269
|
+
Context Guardian auto-configures a terminal statusline on first session start. It shows real-time context usage and session size:
|
|
270
|
+
|
|
271
|
+
```
|
|
272
|
+
Context usage: 3% | Session size: 0.4/20MB | /cg:stats for more
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
Two independent metrics with independent color schemes:
|
|
276
|
+
|
|
277
|
+
| Metric | Green | Yellow | Red |
|
|
278
|
+
|--------|-------|--------|-----|
|
|
279
|
+
| Context usage | Well below threshold | Approaching threshold | At/past threshold |
|
|
280
|
+
| Session size | < 10MB | 10–15MB | ≥ 15MB |
|
|
281
|
+
|
|
282
|
+
In green/yellow states, labels are dim/grey with only the numbers colored. At red, the entire label+number goes bold red for maximum visibility.
|
|
283
|
+
|
|
284
|
+
**Session size** tracks the estimated API request payload — transcript file size plus system overhead (prompts, tools, CLAUDE.md). The ~20MB API payload limit is separate from the token context window and can lock you out of a session entirely.
|
|
285
|
+
|
|
286
|
+
The session-start hook **reclaims the statusline** if another tool overwrites it, logging a warning and notifying the user via `additionalContext`.
|
|
287
|
+
|
|
288
|
+
### Model & Token Limit Auto-Detection
|
|
289
|
+
|
|
290
|
+
Every assistant message in the transcript includes a `model` field (e.g., `"claude-opus-4-6"`). Context Guardian uses this to set the token limit:
|
|
291
|
+
|
|
292
|
+
- **Opus 4.6+** (major >= 4, minor >= 6): **1,000,000 tokens**
|
|
293
|
+
- **Everything else** (Sonnet, Haiku, older Opus): **200,000 tokens**
|
|
294
|
+
|
|
295
|
+
This is imperfect - I haven't found a better way to do this yet. Contributions or ideas welcome.
|
|
296
|
+
|
|
297
|
+
You can override this with `/cg:config max_tokens <value>` if the auto-detection doesn't match your setup.
|
|
298
|
+
|
|
299
|
+
### Data Storage
|
|
300
|
+
|
|
301
|
+
All persistent data lives in the plugin's data directory (`${CLAUDE_PLUGIN_DATA}`, typically `~/.claude/plugins/data/cg/`):
|
|
302
|
+
|
|
303
|
+
| File | Purpose |
|
|
304
|
+
|------|---------|
|
|
305
|
+
| `config.json` | Threshold and max_tokens override |
|
|
306
|
+
| `state-{session_id}.json` | Latest token counts, payload bytes, model, transcript path (session-scoped) |
|
|
307
|
+
| `checkpoints/` | Saved compaction checkpoints (markdown) |
|
|
308
|
+
| `synthetic-sessions.json` | Manifest tracking synthetic JSONL sessions for `/resume` |
|
|
309
|
+
|
|
310
|
+
Each project also has a `.context-guardian/` directory at its root:
|
|
311
|
+
|
|
312
|
+
| File | Purpose |
|
|
313
|
+
|------|---------|
|
|
314
|
+
| `cg-handoff-[name]-{datetime}.md` | Session handoff files (from `/cg:handoff`) |
|
|
315
|
+
| `cg-checkpoint-{datetime}.md` | Copies of compaction checkpoints for visibility |
|
|
316
|
+
|
|
317
|
+
These files are project-scoped - each project gets its own isolated set. Add `.context-guardian/` to your `.gitignore`.
|
|
318
|
+
|
|
319
|
+
---
|
|
320
|
+
|
|
321
|
+
## Logging
|
|
322
|
+
|
|
323
|
+
All hook activity logs to `~/.claude/logs/cg.log`:
|
|
324
|
+
|
|
325
|
+
```bash
|
|
326
|
+
tail -f ~/.claude/logs/cg.log
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
Log entries include token counts, threshold checks, checkpoint creation with compression stats, synthetic session writes, and handoff activity.
|
|
330
|
+
|
|
331
|
+
---
|
|
332
|
+
|
|
333
|
+
## Troubleshooting
|
|
334
|
+
|
|
335
|
+
**Token counts show "estimated":**
|
|
336
|
+
- Only happens on the first message of a session. After one exchange, counts become real.
|
|
337
|
+
|
|
338
|
+
**`/resume cg` doesn't find the session:**
|
|
339
|
+
- Ensure you ran `/cg:compact`, `/cg:prune`, or `/cg:handoff` first - these create the synthetic session.
|
|
340
|
+
- Check logs: `tail -20 ~/.claude/logs/cg.log`
|
|
341
|
+
|
|
342
|
+
**Plugin not loading:**
|
|
343
|
+
- Check logs: `tail -20 ~/.claude/logs/cg.log`
|
|
344
|
+
- Verify plugin is loaded: `/plugins`
|
|
345
|
+
- Try: `/plugin uninstall cg` then `/plugin install cg`
|
|
346
|
+
|
|
347
|
+
---
|
|
348
|
+
|
|
349
|
+
## Contributing
|
|
350
|
+
|
|
351
|
+
```bash
|
|
352
|
+
bun test # run all tests (604 across 24 files)
|
|
353
|
+
npx biome check # lint
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
The e2e test (`test/compaction-e2e.test.mjs`) creates a 26-turn coding session with 19 trackable facts and 5 noise items, verifying every fact survives extraction and all noise is removed. If any change drops a fact, the test names exactly which one was lost.
|
|
357
|
+
|
|
358
|
+
---
|
|
359
|
+
|
|
360
|
+
## License
|
|
361
|
+
|
|
362
|
+
MIT
|