@grapine.ai/contextprune 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +426 -1
- package/dist/cli/commands/analyze.d.ts +2 -0
- package/dist/cli/commands/analyze.js +161 -0
- package/dist/cli/commands/compress.d.ts +2 -0
- package/dist/cli/commands/compress.js +65 -0
- package/dist/cli/commands/watch.d.ts +2 -0
- package/dist/cli/commands/watch.js +432 -0
- package/dist/cli/dashboard/index.html +720 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +19 -0
- package/dist/cli/labels.d.ts +4 -0
- package/dist/cli/labels.js +35 -0
- package/dist/cli/parse-input.d.ts +33 -0
- package/dist/cli/parse-input.js +191 -0
- package/dist/src/brief/index.d.ts +2 -0
- package/dist/src/brief/index.js +101 -0
- package/dist/src/classifier/confidence.d.ts +4 -0
- package/dist/src/classifier/confidence.js +23 -0
- package/dist/src/classifier/index.d.ts +11 -0
- package/dist/src/classifier/index.js +217 -0
- package/dist/src/classifier/patterns.d.ts +7 -0
- package/dist/src/classifier/patterns.js +81 -0
- package/dist/src/compression/engine.d.ts +23 -0
- package/dist/src/compression/engine.js +363 -0
- package/dist/src/index.d.ts +41 -0
- package/dist/src/index.js +120 -0
- package/dist/src/pipeline/index.d.ts +5 -0
- package/dist/src/pipeline/index.js +167 -0
- package/dist/src/scorer/index.d.ts +4 -0
- package/dist/src/scorer/index.js +136 -0
- package/dist/src/scorer/session-extractor.d.ts +2 -0
- package/dist/src/scorer/session-extractor.js +57 -0
- package/dist/src/strategy/selector.d.ts +3 -0
- package/dist/src/strategy/selector.js +158 -0
- package/dist/src/tokenizer/index.d.ts +18 -0
- package/dist/src/tokenizer/index.js +195 -0
- package/dist/src/types.d.ts +161 -0
- package/dist/src/types.js +5 -0
- package/dist/src/utils/index.d.ts +4 -0
- package/dist/src/utils/index.js +48 -0
- package/dist/src/validation/coherence.d.ts +3 -0
- package/dist/src/validation/coherence.js +87 -0
- package/license.md +14 -0
- package/package.json +76 -41
- package/index.js +0 -1
package/README.md
CHANGED
|
@@ -1 +1,426 @@
|
|
|
1
|
-
#
|
|
1
|
+
# @grapine.ai/contextprune
|
|
2
|
+
|
|
3
|
+
**Garbage collection for LLM context windows.**
|
|
4
|
+
|
|
5
|
+
Sits between your application and the LLM API. Analyzes your `messages[]` array, removes dead weight — stale tool outputs, resolved errors, superseded reasoning — and returns a leaner version. Every API call costs less. The model stays focused on what actually matters.
|
|
6
|
+
|
|
7
|
+
**100% local. No data sent anywhere. No LLM calls during compression.**
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install @grapine.ai/contextprune
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## The problem
|
|
16
|
+
|
|
17
|
+
Long LLM sessions fill up fast:
|
|
18
|
+
|
|
19
|
+
```
|
|
20
|
+
Turn 1 ████░░░░░░░░░░░░░░░░░░░░░░░░░░ 12% 4,100 tokens
|
|
21
|
+
Turn 5 ████████████░░░░░░░░░░░░░░░░░░ 38% 12,800 tokens
|
|
22
|
+
Turn 10 ████████████████████░░░░░░░░░░ 58% 19,400 tokens
|
|
23
|
+
Turn 15 ████████████████████████████░░ 78% 26,100 tokens ← quality degrades here
|
|
24
|
+
Turn 20 ██████████████████████████████ 91% 30,600 tokens ← coherence cliff
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Around 65–75% utilization, model behavior suddenly gets worse — the model loses track of earlier constraints, repeats itself, makes mistakes it wouldn't make with a clean context. Most developers hit this, get confused, and manually clear context — losing all the good state too.
|
|
28
|
+
|
|
29
|
+
**With contextprune:**
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
Turn 1 ████░░░░░░░░░░░░░░░░░░░░░░░░░░ 12% 4,100 tokens —
|
|
33
|
+
Turn 5 ████████████░░░░░░░░░░░░░░░░░░ 38% 12,800 tokens —
|
|
34
|
+
Turn 6 ████░░░░░░░░░░░░░░░░░░░░░░░░░░ 11% 3,700 tokens ← compressed, 71% saved
|
|
35
|
+
Turn 10 ██████████░░░░░░░░░░░░░░░░░░░░ 28% 9,500 tokens —
|
|
36
|
+
Turn 11 ████░░░░░░░░░░░░░░░░░░░░░░░░░░ 10% 3,200 tokens ← compressed, 66% saved
|
|
37
|
+
Turn 20 ████████████░░░░░░░░░░░░░░░░░░ 34% 11,600 tokens ← never exceeds 40%
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Quick start
|
|
43
|
+
|
|
44
|
+
```ts
|
|
45
|
+
import { ContextPrune } from '@grapine.ai/contextprune';
|
|
46
|
+
|
|
47
|
+
const cp = new ContextPrune({ model: 'claude-sonnet-4-5' });
|
|
48
|
+
|
|
49
|
+
const result = await cp.compress(messages);
|
|
50
|
+
// result.messages is a drop-in replacement for messages
|
|
51
|
+
// result.summary.tokensSaved — tokens recovered
|
|
52
|
+
// result.summary.savingsPercent — e.g. 0.47 = 47% saved
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
One line changes in your existing code:
|
|
56
|
+
|
|
57
|
+
```ts
|
|
58
|
+
// Before
|
|
59
|
+
const response = await anthropic.messages.create({
|
|
60
|
+
model: 'claude-sonnet-4-5',
|
|
61
|
+
messages, // ← growing unbounded
|
|
62
|
+
max_tokens: 8096,
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
// After
|
|
66
|
+
const { messages: lean } = await cp.compress(messages);
|
|
67
|
+
const response = await anthropic.messages.create({
|
|
68
|
+
model: 'claude-sonnet-4-5',
|
|
69
|
+
messages: lean, // ← compressed
|
|
70
|
+
max_tokens: 8096,
|
|
71
|
+
});
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## Installation
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
npm install @grapine.ai/contextprune
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Requires Node 18+. No mandatory peer dependencies — tiktoken is used for token counting when available, otherwise falls back to a character estimate.
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## CLI
|
|
87
|
+
|
|
88
|
+
No code required. Run directly with `npx` — no install needed.
|
|
89
|
+
|
|
90
|
+
### `analyze` — understand what's in your context
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
npx @grapine.ai/contextprune analyze ./session.json
|
|
94
|
+
npx @grapine.ai/contextprune analyze ./session.jsonl # Claude Code session transcripts too
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
─── ContextPrune Analysis ──────────────────────────────────────────────────
|
|
99
|
+
Model: claude-sonnet-4-5 | Capacity: 200,000 tokens
|
|
100
|
+
|
|
101
|
+
████████████████░░░░░░░░░░░░░░ 56% used · 112,266 / 200,000 tokens
|
|
102
|
+
|
|
103
|
+
[SUGGESTED] Context is 56% full. Compression available but not urgent.
|
|
104
|
+
Projected savings: 48,100 tokens (43%) → 64,166 tokens after
|
|
105
|
+
|
|
106
|
+
Classification Breakdown:
|
|
107
|
+
Outdated Tool Result 82 msgs 53,099 tokens ████████████░ 47%
|
|
108
|
+
Chat / Filler 54 msgs 24,446 tokens ████████░░░░░ 22%
|
|
109
|
+
Tool Result (active) 86 msgs 23,528 tokens ████████░░░░░ 21%
|
|
110
|
+
Final Answer 1 msgs 11,406 tokens ████░░░░░░░░░ 10%
|
|
111
|
+
|
|
112
|
+
Compression Strategies:
|
|
113
|
+
Keep 141 msgs 64,166 tokens
|
|
114
|
+
Remove 69 msgs 37,814 tokens ← will be dropped
|
|
115
|
+
Trim to Key Output 8 msgs 8,320 tokens ← key output preserved
|
|
116
|
+
Collapse to 1 Line 1 msgs 1,966 tokens ← collapsed to marker
|
|
117
|
+
|
|
118
|
+
Top Token Consumers:
|
|
119
|
+
#32 Final Answer 11,406 tokens Preserved no opportunity
|
|
120
|
+
#55 Outdated Tool Result 6,801 tokens Remove high opportunity
|
|
121
|
+
#48 Outdated Tool Result 4,992 tokens Remove high opportunity
|
|
122
|
+
#61 Tool Result (active) 4,210 tokens Trim medium opportunity
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
# Also print a session brief — a compact handoff prompt for starting a new session
|
|
127
|
+
npx @grapine.ai/contextprune analyze ./session.jsonl --brief
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### `compress` — compress a messages file
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
npx @grapine.ai/contextprune compress ./session.json -o compressed.json
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
```
|
|
137
|
+
✔ Compressed 112,266 → 64,166 tokens (43% saved, 48,100 tokens recovered)
|
|
138
|
+
|
|
139
|
+
Decisions:
|
|
140
|
+
Removed 69 messages (Outdated Tool Result, Chat/Filler)
|
|
141
|
+
Trimmed 8 messages (Tool Result — key output preserved)
|
|
142
|
+
Collapsed 1 message (Reasoning chain → 1-line marker)
|
|
143
|
+
Kept 141 messages (constraints, active errors, final answers)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Output is a standard JSON messages array — drop it straight into an API call:
|
|
147
|
+
|
|
148
|
+
```ts
|
|
149
|
+
const messages = JSON.parse(fs.readFileSync('compressed.json', 'utf-8'));
|
|
150
|
+
await anthropic.messages.create({ model: 'claude-sonnet-4-5', messages, max_tokens: 8096 });
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### `watch` — live dashboard in your browser
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
npx @grapine.ai/contextprune watch
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Discovers all Claude Code sessions in `~/.claude/projects/` and opens an interactive picker:
|
|
160
|
+
|
|
161
|
+
```
|
|
162
|
+
Select a Claude project to monitor:
|
|
163
|
+
|
|
164
|
+
› labs/contextprune #b6c62a11 just now ● active
|
|
165
|
+
labs/my-app #a1d3f920 2h ago
|
|
166
|
+
work/api-service #cc8801ab 1d ago
|
|
167
|
+
|
|
168
|
+
↑↓ to navigate · Enter to select · Ctrl+C to cancel
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
Opens a browser tab and starts live monitoring. The dashboard updates every time the session file changes.
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
# Or point directly at a file
|
|
175
|
+
npx @grapine.ai/contextprune watch --follow ~/.claude/projects/my-project/session.jsonl
|
|
176
|
+
|
|
177
|
+
# Use a different port
|
|
178
|
+
npx @grapine.ai/contextprune watch --port 8080
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## Dashboard
|
|
184
|
+
|
|
185
|
+
A live browser dashboard that monitors your Claude Code sessions in real time. No configuration — run `npx @grapine.ai/contextprune watch` and it opens automatically.
|
|
186
|
+
|
|
187
|
+
**Healthy Context Dashboard**
|
|
188
|
+
|
|
189
|
+

|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
**Context Compression Recommendation Dashboard**
|
|
194
|
+
|
|
195
|
+

|
|
196
|
+
|
|
197
|
+
**What the dashboard shows:**
|
|
198
|
+
|
|
199
|
+
**Context Window** — utilization bar with colour-coded status (green → yellow → red). Switches to Compression Suggested / Compress Now badges as context fills up.
|
|
200
|
+
|
|
201
|
+
**Session Cost** — cost per API call with input/output/cache breakdown, grouped by calendar day with proportional bars.
|
|
202
|
+
|
|
203
|
+
**Classification Breakdown** — how your context is distributed across message types (Outdated Tool Result, Active Tool Result, Chat/Filler, Final Answer, etc.) with token counts and percentages.
|
|
204
|
+
|
|
205
|
+
**Compression Strategies** — what contextprune would do right now: Keep / Remove / Trim / Collapse counts.
|
|
206
|
+
|
|
207
|
+
**Compression Projection** — before/after utilization bars showing exactly how much would be recovered if you compressed now. Hidden when context is healthy.
|
|
208
|
+
|
|
209
|
+
**Top Consumers** — the largest individual messages ranked by token count, with their classification and compression opportunity.
|
|
210
|
+
|
|
211
|
+
**Session Brief** — auto-generated handoff prompt that appears at 65%+ utilization. One click copies a compact context summary you can paste into a new session to continue without losing state.
|
|
212
|
+
|
|
213
|
+
**Desktop notifications** — opt-in alerts at 65% utilization, then every 5% increment until you compress.
|
|
214
|
+
|
|
215
|
+
**Push data from your own process** (no file watching needed):
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
npx @grapine.ai/contextprune watch &
|
|
219
|
+
|
|
220
|
+
curl -X POST http://localhost:4242/analyze \
|
|
221
|
+
-H 'Content-Type: application/json' \
|
|
222
|
+
-d '{ "messages": [...], "model": "gpt-4o" }'
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Works with any provider — Anthropic, OpenAI, OpenRouter, Groq, or any messages array you construct yourself.
|
|
226
|
+
|
|
227
|
+
---
|
|
228
|
+
|
|
229
|
+
## Three ways to use it
|
|
230
|
+
|
|
231
|
+
### 1. `compress(messages)` — explicit, you decide when
|
|
232
|
+
|
|
233
|
+
```ts
|
|
234
|
+
const result = await cp.compress(messages);
|
|
235
|
+
|
|
236
|
+
console.log(result.summary.tokensSaved); // 48100
|
|
237
|
+
console.log(result.summary.savingsPercent); // 0.43
|
|
238
|
+
console.log(result.messages.length); // fewer messages
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
Compresses unconditionally every time you call it. Use this when you explicitly decide compression is warranted — after a tool-heavy phase, on every N turns, or as part of a LangGraph compress node.
|
|
242
|
+
|
|
243
|
+
### 2. `watch(client)` — automatic, zero changes to call sites
|
|
244
|
+
|
|
245
|
+
```ts
|
|
246
|
+
// Wrap once at startup
|
|
247
|
+
const watched = cp.watch(anthropic);
|
|
248
|
+
|
|
249
|
+
// Use exactly as before — compression fires automatically when context > 65%
|
|
250
|
+
const response = await watched.messages.create({
|
|
251
|
+
model: 'claude-sonnet-4-5',
|
|
252
|
+
messages,
|
|
253
|
+
max_tokens: 8096,
|
|
254
|
+
});
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
Works with Anthropic, OpenAI, and any OpenAI-compatible provider:
|
|
258
|
+
|
|
259
|
+
```ts
|
|
260
|
+
// OpenRouter
|
|
261
|
+
const client = new OpenAI({ baseURL: 'https://openrouter.ai/api/v1', apiKey: '...' });
|
|
262
|
+
const watched = cp.watch(client);
|
|
263
|
+
await watched.chat.completions.create({ model: 'meta-llama/llama-3.3-70b-instruct', messages });
|
|
264
|
+
|
|
265
|
+
// Groq
|
|
266
|
+
const watched = cp.watch(new Groq());
|
|
267
|
+
await watched.chat.completions.create({ model: 'llama3-70b-8192', messages });
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### 3. `analyze(messages)` — read-only inspection
|
|
271
|
+
|
|
272
|
+
```ts
|
|
273
|
+
const analysis = await cp.analyze(messages);
|
|
274
|
+
|
|
275
|
+
analysis.recommendation.urgency // 'none' | 'suggested' | 'recommended' | 'critical'
|
|
276
|
+
analysis.recommendation.projectedSavings // tokens that would be saved
|
|
277
|
+
analysis.sessionState.tokenBudget.utilizationPercent // 0.56
|
|
278
|
+
analysis.sessionBrief // markdown handoff prompt for context continuation
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
Never compresses — use this to build dashboards, gate on urgency, or log opportunities.
|
|
282
|
+
|
|
283
|
+
---
|
|
284
|
+
|
|
285
|
+
## LangGraph
|
|
286
|
+
|
|
287
|
+
In a LangGraph agent, `state["messages"]` accumulates every tool result and intermediate step across all graph iterations. By call 20, a typical coding agent has 30–50k tokens of stale tool outputs.
|
|
288
|
+
|
|
289
|
+
**Wrap the client — zero changes inside the graph:**
|
|
290
|
+
|
|
291
|
+
```ts
|
|
292
|
+
import { ContextPrune } from '@grapine.ai/contextprune';
|
|
293
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
294
|
+
|
|
295
|
+
const client = new ContextPrune({ model: 'claude-sonnet-4-5' }).watch(new Anthropic());
|
|
296
|
+
|
|
297
|
+
// Every node compresses automatically, only when context > 65%
|
|
298
|
+
function callModel(state: MessagesState) {
|
|
299
|
+
return client.messages.create({ // ← unchanged
|
|
300
|
+
model: 'claude-sonnet-4-5',
|
|
301
|
+
messages: state.messages,
|
|
302
|
+
max_tokens: 8096,
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
**Or add a dedicated compress node:**
|
|
308
|
+
|
|
309
|
+
```ts
|
|
310
|
+
const cp = new ContextPrune({ model: 'claude-sonnet-4-5' });
|
|
311
|
+
|
|
312
|
+
async function compressNode(state: MessagesState) {
|
|
313
|
+
const result = await cp.compress(state.messages);
|
|
314
|
+
return { messages: result.messages };
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
builder
|
|
318
|
+
.addNode('compress', compressNode)
|
|
319
|
+
.addEdge('tools', 'compress') // compress after every tool cycle
|
|
320
|
+
.addEdge('compress', 'agent');
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
---
|
|
324
|
+
|
|
325
|
+
## When it helps (and when it doesn't)
|
|
326
|
+
|
|
327
|
+
**The core prerequisite:** there must be a growing `messages[]` array that gets passed to an LLM repeatedly.
|
|
328
|
+
|
|
329
|
+
### ✓ It helps: single-agent accumulating loops
|
|
330
|
+
|
|
331
|
+
```ts
|
|
332
|
+
// ReAct / tool-calling loop — context grows with every iteration
|
|
333
|
+
const messages: LLMMessage[] = [{ role: 'system', content: systemPrompt }];
|
|
334
|
+
|
|
335
|
+
while (!done) {
|
|
336
|
+
const response = await llm.invoke(messages);
|
|
337
|
+
messages.push({ role: 'assistant', content: response.content });
|
|
338
|
+
const toolResult = await runTool(response);
|
|
339
|
+
messages.push({ role: 'user', content: toolResult });
|
|
340
|
+
|
|
341
|
+
// ← contextprune here: stale tool results removed before next call
|
|
342
|
+
const { messages: lean } = await cp.compress(messages);
|
|
343
|
+
messages.splice(0, messages.length, ...lean);
|
|
344
|
+
}
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
By call 30, a typical agent has accumulated file reads, bash outputs, error traces, and intermediate reasoning that will never be referenced again. Every call pays for all of it. contextprune removes it.
|
|
348
|
+
|
|
349
|
+
### ✗ It doesn't help: parallel stateless fan-out
|
|
350
|
+
|
|
351
|
+
```ts
|
|
352
|
+
// Each agent call is 2–3 messages built fresh, discarded after
|
|
353
|
+
const [strategy, calendar, copy] = await Promise.all([
|
|
354
|
+
orchestrator.invoke([{ role: 'user', content: strategyPrompt }]),
|
|
355
|
+
strategist.invoke([{ role: 'user', content: calendarPrompt }]),
|
|
356
|
+
copywriter.invoke([{ role: 'user', content: copyPrompt }]),
|
|
357
|
+
]);
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
Each call is constructed fresh and discarded. There is no accumulating history. Nothing to prune.
|
|
361
|
+
|
|
362
|
+
**The diagnostic question:**
|
|
363
|
+
|
|
364
|
+
> After N agent calls, is there a single `messages[]` array that is longer than it was at call 1?
|
|
365
|
+
|
|
366
|
+
If yes — contextprune helps. If no — each call starts fresh, and contextprune has no leverage point.
|
|
367
|
+
|
|
368
|
+
---
|
|
369
|
+
|
|
370
|
+
## Compression modes
|
|
371
|
+
|
|
372
|
+
| Mode | When compression runs | Default for |
|
|
373
|
+
|------|----------------------|-------------|
|
|
374
|
+
| `manual` | Always, unconditionally | `compress()` |
|
|
375
|
+
| `auto` | Only when utilization ≥ `warningThreshold` | `watch()` |
|
|
376
|
+
| `suggest-only` | Never — analysis only | `analyze()` |
|
|
377
|
+
|
|
378
|
+
```ts
|
|
379
|
+
const cp = new ContextPrune({
|
|
380
|
+
model: 'claude-sonnet-4-5',
|
|
381
|
+
options: {
|
|
382
|
+
warningThreshold: 0.65, // start compressing at 65% full (default)
|
|
383
|
+
criticalThreshold: 0.80, // compress aggressively at 80% (default)
|
|
384
|
+
compressionMode: 'auto', // only compress when needed
|
|
385
|
+
}
|
|
386
|
+
});
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
---
|
|
390
|
+
|
|
391
|
+
## What gets compressed
|
|
392
|
+
|
|
393
|
+
| Message type | Strategy | Why |
|
|
394
|
+
|---|---|---|
|
|
395
|
+
| Outdated Tool Result | Remove | Not referenced in subsequent turns |
|
|
396
|
+
| Fixed Error | Remove | Stack trace no longer needed |
|
|
397
|
+
| Chain of Thought | Collapse to 1 line | Conclusion already in context |
|
|
398
|
+
| Status Update | Collapse to 1 line | Acknowledged, no longer active |
|
|
399
|
+
| Tool Result (active) | Trim to key output | Keep answer, drop verbose body |
|
|
400
|
+
| Chat / Filler | Remove | Low relevance to current task |
|
|
401
|
+
|
|
402
|
+
**Always preserved:** system prompts, user corrections, active errors, session goals, final answers.
|
|
403
|
+
|
|
404
|
+
The classifier assigns one of 11 types to each message. Classification confidence gates compression aggressiveness — if the classifier is uncertain, the message is always preserved.
|
|
405
|
+
|
|
406
|
+
---
|
|
407
|
+
|
|
408
|
+
## Supported providers and models
|
|
409
|
+
|
|
410
|
+
Token budgets are pre-configured for:
|
|
411
|
+
|
|
412
|
+
| Provider | Models |
|
|
413
|
+
|---|---|
|
|
414
|
+
| Anthropic | Claude 4.x, Claude 3.x (all variants) |
|
|
415
|
+
| OpenAI | GPT-4o, GPT-4.1, GPT-4-turbo, GPT-3.5, o1, o3 series |
|
|
416
|
+
| Google | Gemini 2.5 Pro/Flash, Gemini 2.0, Gemini 1.5 |
|
|
417
|
+
| Meta | Llama 3.3 / 3.1 (70B, 8B) |
|
|
418
|
+
| Mistral | Mistral Large/Medium/Small, Mixtral, Codestral |
|
|
419
|
+
| DeepSeek | DeepSeek Chat, DeepSeek Reasoner |
|
|
420
|
+
| Cohere | Command R, Command R+ |
|
|
421
|
+
| OpenRouter | All `provider/model` prefixed names |
|
|
422
|
+
| Groq | Llama3, Mixtral, Gemma hosted models |
|
|
423
|
+
|
|
424
|
+
Any unrecognized model string falls back to a 128k token budget.
|
|
425
|
+
|
|
426
|
+
---
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// cli/commands/analyze.ts
|
|
3
|
+
// npx contextprune analyze <file>
|
|
4
|
+
// Reads a JSON messages file, runs full pipeline analysis, prints a table.
|
|
5
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
6
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
7
|
+
};
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.analyzeCommand = analyzeCommand;
|
|
10
|
+
const commander_1 = require("commander");
|
|
11
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
12
|
+
const ora_1 = __importDefault(require("ora"));
|
|
13
|
+
const index_1 = require("../../src/index");
|
|
14
|
+
const parse_input_1 = require("../parse-input");
|
|
15
|
+
const labels_1 = require("../labels");
|
|
16
|
+
function analyzeCommand() {
|
|
17
|
+
return new commander_1.Command('analyze')
|
|
18
|
+
.description('Analyze a messages JSON file and show context breakdown')
|
|
19
|
+
.argument('<file>', 'Path to JSON file containing messages array')
|
|
20
|
+
.option('-m, --model <model>', 'Model to use for token counting', 'claude-sonnet-4-5')
|
|
21
|
+
.option('--json', 'Output raw JSON instead of formatted table')
|
|
22
|
+
.option('--brief', 'Print the session brief (handoff prompt for starting a new session)')
|
|
23
|
+
.action(async (file, opts) => {
|
|
24
|
+
const spinner = (0, ora_1.default)('Loading messages…').start();
|
|
25
|
+
let messages;
|
|
26
|
+
let actualInputTokens;
|
|
27
|
+
try {
|
|
28
|
+
({ messages, actualInputTokens } = (0, parse_input_1.loadMessagesWithUsage)(file));
|
|
29
|
+
}
|
|
30
|
+
catch (err) {
|
|
31
|
+
spinner.fail(`Failed to read file: ${err.message}`);
|
|
32
|
+
process.exit(1);
|
|
33
|
+
}
|
|
34
|
+
spinner.text = 'Analyzing context…';
|
|
35
|
+
let analysis;
|
|
36
|
+
try {
|
|
37
|
+
const ck = new index_1.ContextPrune({ model: opts.model, options: { actualInputTokens } });
|
|
38
|
+
analysis = await ck.analyze(messages);
|
|
39
|
+
}
|
|
40
|
+
catch (err) {
|
|
41
|
+
spinner.fail(`Analysis failed: ${err.message}`);
|
|
42
|
+
process.exit(1);
|
|
43
|
+
}
|
|
44
|
+
spinner.succeed('Analysis complete.');
|
|
45
|
+
if (opts.json) {
|
|
46
|
+
console.log(JSON.stringify(analysis, null, 2));
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
if (opts.brief) {
|
|
50
|
+
console.log(analysis.sessionBrief);
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
printAnalysis(analysis);
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
function printAnalysis(analysis) {
|
|
57
|
+
const { annotatedMessages, breakdown, recommendation, sessionState } = analysis;
|
|
58
|
+
const budget = sessionState.tokenBudget;
|
|
59
|
+
// ── Header ─────────────────────────────────────────────────────────────────
|
|
60
|
+
console.log('\n' + chalk_1.default.bold.cyan('─── ContextPrune Analysis ──────────────────────────────'));
|
|
61
|
+
console.log(chalk_1.default.dim(`Model capacity: ${budget.modelMaxTokens.toLocaleString()} tokens`) +
|
|
62
|
+
` | ` +
|
|
63
|
+
chalk_1.default.dim(`Current usage: ${budget.currentUsage.toLocaleString()} tokens`) +
|
|
64
|
+
` | ` +
|
|
65
|
+
utilizationBadge(budget.utilizationPercent));
|
|
66
|
+
// ── Recommendation ─────────────────────────────────────────────────────────
|
|
67
|
+
const urgencyColors = {
|
|
68
|
+
none: chalk_1.default.dim,
|
|
69
|
+
suggested: chalk_1.default.blue,
|
|
70
|
+
recommended: chalk_1.default.yellow,
|
|
71
|
+
critical: chalk_1.default.red,
|
|
72
|
+
};
|
|
73
|
+
const urgencyLabels = {
|
|
74
|
+
none: 'HEALTHY',
|
|
75
|
+
suggested: 'SUGGESTED',
|
|
76
|
+
recommended: 'RECOMMENDED',
|
|
77
|
+
critical: 'CRITICAL',
|
|
78
|
+
};
|
|
79
|
+
const urgencyColor = urgencyColors[recommendation.urgency] ?? chalk_1.default.white;
|
|
80
|
+
const urgencyLabel = urgencyLabels[recommendation.urgency] ?? recommendation.urgency.toUpperCase();
|
|
81
|
+
console.log('\n' + urgencyColor(`[${urgencyLabel}] ${recommendation.message}`));
|
|
82
|
+
if (recommendation.projectedSavings > 0) {
|
|
83
|
+
console.log(chalk_1.default.green(` Projected savings: ${recommendation.projectedSavings.toLocaleString()} tokens`) +
|
|
84
|
+
chalk_1.default.dim(` (${Math.round(recommendation.projectedSavingsPercent * 100)}%)`));
|
|
85
|
+
}
|
|
86
|
+
// ── Per-classification breakdown ───────────────────────────────────────────
|
|
87
|
+
console.log('\n' + chalk_1.default.bold('Classification Breakdown:'));
|
|
88
|
+
const rows = [];
|
|
89
|
+
for (const [cls, data] of Object.entries(breakdown.byClassification)) {
|
|
90
|
+
if (!data || data.count === 0)
|
|
91
|
+
continue;
|
|
92
|
+
const bar = tokenBar(data.percentOfTotal, 20);
|
|
93
|
+
rows.push([cls, data.count, data.tokens, bar]);
|
|
94
|
+
}
|
|
95
|
+
rows.sort((a, b) => (b[2] ?? 0) - (a[2] ?? 0));
|
|
96
|
+
for (const [cls, count, tokens, bar] of rows) {
|
|
97
|
+
console.log(` ${chalk_1.default.cyan((0, labels_1.clsLabel)(cls).padEnd(28))}` +
|
|
98
|
+
`${String(count).padStart(4)} msgs ` +
|
|
99
|
+
`${String(tokens.toLocaleString()).padStart(8)} tokens ` +
|
|
100
|
+
chalk_1.default.green(bar));
|
|
101
|
+
}
|
|
102
|
+
// ── Per-strategy breakdown ─────────────────────────────────────────────────
|
|
103
|
+
console.log('\n' + chalk_1.default.bold('Compression Strategies:'));
|
|
104
|
+
for (const [strat, data] of Object.entries(breakdown.byCompressionStrategy)) {
|
|
105
|
+
if (!data || data.count === 0)
|
|
106
|
+
continue;
|
|
107
|
+
const saved = data.tokensBefore - data.tokensAfter;
|
|
108
|
+
const color = strat === 'PRESERVE' ? chalk_1.default.dim : chalk_1.default.yellow;
|
|
109
|
+
console.log(` ${color((0, labels_1.stratLabel)(strat).padEnd(20))}` +
|
|
110
|
+
`${String(data.count).padStart(4)} msgs ` +
|
|
111
|
+
(saved > 0 ? chalk_1.default.green(`−${saved.toLocaleString()} tokens`) : ''));
|
|
112
|
+
}
|
|
113
|
+
// ── Top consumers ──────────────────────────────────────────────────────────
|
|
114
|
+
console.log('\n' + chalk_1.default.bold('Top Token Consumers:') + chalk_1.default.dim(' (largest single API calls)'));
|
|
115
|
+
for (const consumer of analysis.topConsumers.slice(0, 5)) {
|
|
116
|
+
const opp = opportunityColor(consumer.compressionOpportunity);
|
|
117
|
+
console.log(` Call ${String(consumer.originalIndex).padStart(3)} ` +
|
|
118
|
+
`${chalk_1.default.cyan((0, labels_1.clsLabel)(consumer.classification).padEnd(25))}` +
|
|
119
|
+
`${String(consumer.tokenCount.toLocaleString()).padStart(7)} tokens ` +
|
|
120
|
+
opp(`[${consumer.compressionOpportunity} opportunity]`));
|
|
121
|
+
}
|
|
122
|
+
// ── Per-message table (if short) ──────────────────────────────────────────
|
|
123
|
+
if (annotatedMessages.length <= 30) {
|
|
124
|
+
console.log('\n' + chalk_1.default.bold('Message Detail:'));
|
|
125
|
+
printMessageTable(annotatedMessages);
|
|
126
|
+
}
|
|
127
|
+
console.log('');
|
|
128
|
+
}
|
|
129
|
+
function printMessageTable(messages) {
|
|
130
|
+
console.log(chalk_1.default.dim(' Call Role Classification Tokens Strategy Score'));
|
|
131
|
+
console.log(chalk_1.default.dim(' ' + '─'.repeat(80)));
|
|
132
|
+
for (const m of messages) {
|
|
133
|
+
const role = m.original.role.padEnd(10);
|
|
134
|
+
const cls = (0, labels_1.clsLabel)(m.classification).padEnd(28);
|
|
135
|
+
const tok = String(m.tokenCount).padStart(6);
|
|
136
|
+
const strat = (0, labels_1.stratLabel)(m.compressionStrategy).padEnd(20);
|
|
137
|
+
const score = m.relevanceScore.toFixed(2);
|
|
138
|
+
const rowColor = m.compressionStrategy === 'PRESERVE' ? chalk_1.default.dim : chalk_1.default.white;
|
|
139
|
+
console.log(rowColor(` ${String(m.originalIndex).padStart(4)} ${role} ${cls} ${tok} ${strat} ${score}`));
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
function utilizationBadge(pct) {
|
|
143
|
+
const label = `${Math.round(pct * 100)}% utilized`;
|
|
144
|
+
if (pct >= 0.80)
|
|
145
|
+
return chalk_1.default.bgRed.white(` ${label} `);
|
|
146
|
+
if (pct >= 0.65)
|
|
147
|
+
return chalk_1.default.bgYellow.black(` ${label} `);
|
|
148
|
+
return chalk_1.default.bgGreen.black(` ${label} `);
|
|
149
|
+
}
|
|
150
|
+
function tokenBar(pct, width) {
|
|
151
|
+
const filled = Math.round(pct * width);
|
|
152
|
+
return '█'.repeat(filled) + '░'.repeat(width - filled) + ` ${Math.round(pct * 100)}%`;
|
|
153
|
+
}
|
|
154
|
+
function opportunityColor(opp) {
|
|
155
|
+
switch (opp) {
|
|
156
|
+
case 'high': return chalk_1.default.red;
|
|
157
|
+
case 'medium': return chalk_1.default.yellow;
|
|
158
|
+
case 'low': return chalk_1.default.blue;
|
|
159
|
+
default: return chalk_1.default.dim;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// cli/commands/compress.ts
|
|
3
|
+
// npx contextprune compress <file> [-o output.json]
|
|
4
|
+
// Reads a messages JSON file, compresses it, writes the compressed result.
|
|
5
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
6
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
7
|
+
};
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.compressCommand = compressCommand;
|
|
10
|
+
const commander_1 = require("commander");
|
|
11
|
+
const fs_1 = require("fs");
|
|
12
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
13
|
+
const ora_1 = __importDefault(require("ora"));
|
|
14
|
+
const index_1 = require("../../src/index");
|
|
15
|
+
const parse_input_1 = require("../parse-input");
|
|
16
|
+
function compressCommand() {
|
|
17
|
+
return new commander_1.Command('compress')
|
|
18
|
+
.description('Compress a messages JSON file and output the result')
|
|
19
|
+
.argument('<file>', 'Path to JSON file containing messages array')
|
|
20
|
+
.option('-m, --model <model>', 'Model to use for token counting', 'claude-sonnet-4-5')
|
|
21
|
+
.option('-o, --output <file>', 'Output file path (default: stdout)')
|
|
22
|
+
.option('--mode <mode>', 'Compression mode: auto | manual | suggest-only', 'manual')
|
|
23
|
+
.action(async (file, opts) => {
|
|
24
|
+
const spinner = (0, ora_1.default)('Loading messages…').start();
|
|
25
|
+
let messages;
|
|
26
|
+
try {
|
|
27
|
+
messages = (0, parse_input_1.loadMessages)(file);
|
|
28
|
+
}
|
|
29
|
+
catch (err) {
|
|
30
|
+
spinner.fail(`Failed to read file: ${err.message}`);
|
|
31
|
+
process.exit(1);
|
|
32
|
+
}
|
|
33
|
+
spinner.text = 'Compressing…';
|
|
34
|
+
try {
|
|
35
|
+
const ck = new index_1.ContextPrune({
|
|
36
|
+
model: opts.model,
|
|
37
|
+
options: {
|
|
38
|
+
compressionMode: opts.mode,
|
|
39
|
+
},
|
|
40
|
+
});
|
|
41
|
+
const result = await ck.compress(messages);
|
|
42
|
+
spinner.succeed('Compression complete.');
|
|
43
|
+
const { summary } = result;
|
|
44
|
+
console.log(chalk_1.default.green(` Tokens: ${summary.tokensBefore.toLocaleString()} → ${summary.tokensAfter.toLocaleString()}`) +
|
|
45
|
+
chalk_1.default.dim(` (−${summary.tokensSaved.toLocaleString()}, ${Math.round(summary.savingsPercent * 100)}% saved)`) +
|
|
46
|
+
` Messages: ${messages.length} → ${result.messages.length}`);
|
|
47
|
+
const output = JSON.stringify(result.messages, null, 2);
|
|
48
|
+
if (opts.output) {
|
|
49
|
+
(0, fs_1.writeFileSync)(opts.output, output, 'utf-8');
|
|
50
|
+
console.log(chalk_1.default.dim(` Written to ${opts.output}`));
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
console.log(output);
|
|
54
|
+
}
|
|
55
|
+
console.log(chalk_1.default.dim(`\n ℹ This output is a compressed messages array — it does not modify your live Claude Code session.\n`) +
|
|
56
|
+
chalk_1.default.dim(` To use it, pass the result as the messages array in your next API call:\n`) +
|
|
57
|
+
chalk_1.default.dim(` const result = JSON.parse(fs.readFileSync('${opts.output ?? 'output.json'}'));\n`) +
|
|
58
|
+
chalk_1.default.dim(` await anthropic.messages.create({ model: '...', messages: result, max_tokens: 8096 });`));
|
|
59
|
+
}
|
|
60
|
+
catch (err) {
|
|
61
|
+
spinner.fail(`Compression failed: ${err.message}`);
|
|
62
|
+
process.exit(1);
|
|
63
|
+
}
|
|
64
|
+
});
|
|
65
|
+
}
|