@grapine.ai/contextprune 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +426 -1
  2. package/dist/cli/commands/analyze.d.ts +2 -0
  3. package/dist/cli/commands/analyze.js +161 -0
  4. package/dist/cli/commands/compress.d.ts +2 -0
  5. package/dist/cli/commands/compress.js +65 -0
  6. package/dist/cli/commands/watch.d.ts +2 -0
  7. package/dist/cli/commands/watch.js +432 -0
  8. package/dist/cli/dashboard/index.html +720 -0
  9. package/dist/cli/index.d.ts +2 -0
  10. package/dist/cli/index.js +19 -0
  11. package/dist/cli/labels.d.ts +4 -0
  12. package/dist/cli/labels.js +35 -0
  13. package/dist/cli/parse-input.d.ts +33 -0
  14. package/dist/cli/parse-input.js +191 -0
  15. package/dist/src/brief/index.d.ts +2 -0
  16. package/dist/src/brief/index.js +101 -0
  17. package/dist/src/classifier/confidence.d.ts +4 -0
  18. package/dist/src/classifier/confidence.js +23 -0
  19. package/dist/src/classifier/index.d.ts +11 -0
  20. package/dist/src/classifier/index.js +217 -0
  21. package/dist/src/classifier/patterns.d.ts +7 -0
  22. package/dist/src/classifier/patterns.js +81 -0
  23. package/dist/src/compression/engine.d.ts +23 -0
  24. package/dist/src/compression/engine.js +363 -0
  25. package/dist/src/index.d.ts +41 -0
  26. package/dist/src/index.js +120 -0
  27. package/dist/src/pipeline/index.d.ts +5 -0
  28. package/dist/src/pipeline/index.js +167 -0
  29. package/dist/src/scorer/index.d.ts +4 -0
  30. package/dist/src/scorer/index.js +136 -0
  31. package/dist/src/scorer/session-extractor.d.ts +2 -0
  32. package/dist/src/scorer/session-extractor.js +57 -0
  33. package/dist/src/strategy/selector.d.ts +3 -0
  34. package/dist/src/strategy/selector.js +158 -0
  35. package/dist/src/tokenizer/index.d.ts +18 -0
  36. package/dist/src/tokenizer/index.js +195 -0
  37. package/dist/src/types.d.ts +161 -0
  38. package/dist/src/types.js +5 -0
  39. package/dist/src/utils/index.d.ts +4 -0
  40. package/dist/src/utils/index.js +48 -0
  41. package/dist/src/validation/coherence.d.ts +3 -0
  42. package/dist/src/validation/coherence.js +87 -0
  43. package/license.md +14 -0
  44. package/package.json +76 -41
  45. package/index.js +0 -1
package/README.md CHANGED
@@ -1 +1,426 @@
1
- # Placeholder for @grapine.ai/contextprune
1
+ # @grapine.ai/contextprune
2
+
3
+ **Garbage collection for LLM context windows.**
4
+
5
+ Sits between your application and the LLM API. Analyzes your `messages[]` array, removes dead weight — stale tool outputs, resolved errors, superseded reasoning — and returns a leaner version. Every API call costs less. The model stays focused on what actually matters.
6
+
7
+ **100% local. No data sent anywhere. No LLM calls during compression.**
8
+
9
+ ```bash
10
+ npm install @grapine.ai/contextprune
11
+ ```
12
+
13
+ ---
14
+
15
+ ## The problem
16
+
17
+ Long LLM sessions fill up fast:
18
+
19
+ ```
20
+ Turn 1 ████░░░░░░░░░░░░░░░░░░░░░░░░░░ 12% 4,100 tokens
21
+ Turn 5 ████████████░░░░░░░░░░░░░░░░░░ 38% 12,800 tokens
22
+ Turn 10 ████████████████████░░░░░░░░░░ 58% 19,400 tokens
23
+ Turn 15 ████████████████████████████░░ 78% 26,100 tokens ← quality degrades here
24
+ Turn 20 ██████████████████████████████ 91% 30,600 tokens ← coherence cliff
25
+ ```
26
+
27
+ Around 65–75% utilization, model behavior suddenly gets worse — the model loses track of earlier constraints, repeats itself, makes mistakes it wouldn't make with a clean context. Most developers hit this, get confused, and manually clear context — losing all the good state too.
28
+
29
+ **With contextprune:**
30
+
31
+ ```
32
+ Turn 1 ████░░░░░░░░░░░░░░░░░░░░░░░░░░ 12% 4,100 tokens —
33
+ Turn 5 ████████████░░░░░░░░░░░░░░░░░░ 38% 12,800 tokens —
34
+ Turn 6 ████░░░░░░░░░░░░░░░░░░░░░░░░░░ 11% 3,700 tokens ← compressed, 71% saved
35
+ Turn 10 ██████████░░░░░░░░░░░░░░░░░░░░ 28% 9,500 tokens —
36
+ Turn 11 ████░░░░░░░░░░░░░░░░░░░░░░░░░░ 10% 3,200 tokens ← compressed, 66% saved
37
+ Turn 20 ████████████░░░░░░░░░░░░░░░░░░ 34% 11,600 tokens ← never exceeds 40%
38
+ ```
39
+
40
+ ---
41
+
42
+ ## Quick start
43
+
44
+ ```ts
45
+ import { ContextPrune } from '@grapine.ai/contextprune';
46
+
47
+ const cp = new ContextPrune({ model: 'claude-sonnet-4-5' });
48
+
49
+ const result = await cp.compress(messages);
50
+ // result.messages is a drop-in replacement for messages
51
+ // result.summary.tokensSaved — tokens recovered
52
+ // result.summary.savingsPercent — e.g. 0.47 = 47% saved
53
+ ```
54
+
55
+ One line changes in your existing code:
56
+
57
+ ```ts
58
+ // Before
59
+ const response = await anthropic.messages.create({
60
+ model: 'claude-sonnet-4-5',
61
+ messages, // ← growing unbounded
62
+ max_tokens: 8096,
63
+ });
64
+
65
+ // After
66
+ const { messages: lean } = await cp.compress(messages);
67
+ const response = await anthropic.messages.create({
68
+ model: 'claude-sonnet-4-5',
69
+ messages: lean, // ← compressed
70
+ max_tokens: 8096,
71
+ });
72
+ ```
73
+
74
+ ---
75
+
76
+ ## Installation
77
+
78
+ ```bash
79
+ npm install @grapine.ai/contextprune
80
+ ```
81
+
82
+ Requires Node 18+. No mandatory peer dependencies — tiktoken is used for token counting when available, otherwise falls back to a character estimate.
83
+
84
+ ---
85
+
86
+ ## CLI
87
+
88
+ No code required. Run directly with `npx` — no install needed.
89
+
90
+ ### `analyze` — understand what's in your context
91
+
92
+ ```bash
93
+ npx @grapine.ai/contextprune analyze ./session.json
94
+ npx @grapine.ai/contextprune analyze ./session.jsonl # Claude Code session transcripts too
95
+ ```
96
+
97
+ ```
98
+ ─── ContextPrune Analysis ──────────────────────────────────────────────────
99
+ Model: claude-sonnet-4-5 | Capacity: 200,000 tokens
100
+
101
+ ████████████████░░░░░░░░░░░░░░ 56% used · 112,266 / 200,000 tokens
102
+
103
+ [SUGGESTED] Context is 56% full. Compression available but not urgent.
104
+ Projected savings: 48,100 tokens (43%) → 64,166 tokens after
105
+
106
+ Classification Breakdown:
107
+ Outdated Tool Result 82 msgs 53,099 tokens ████████████░ 47%
108
+ Chat / Filler 54 msgs 24,446 tokens ████████░░░░░ 22%
109
+ Tool Result (active) 86 msgs 23,528 tokens ████████░░░░░ 21%
110
+ Final Answer 1 msgs 11,406 tokens ████░░░░░░░░░ 10%
111
+
112
+ Compression Strategies:
113
+ Keep 141 msgs 64,166 tokens
114
+ Remove 69 msgs 37,814 tokens ← will be dropped
115
+ Trim to Key Output 8 msgs 8,320 tokens ← key output preserved
116
+ Collapse to 1 Line 1 msgs 1,966 tokens ← collapsed to marker
117
+
118
+ Top Token Consumers:
119
+ #32 Final Answer 11,406 tokens Preserved no opportunity
120
+ #55 Outdated Tool Result 6,801 tokens Remove high opportunity
121
+ #48 Outdated Tool Result 4,992 tokens Remove high opportunity
122
+ #61 Tool Result (active) 4,210 tokens Trim medium opportunity
123
+ ```
124
+
125
+ ```bash
126
+ # Also print a session brief — a compact handoff prompt for starting a new session
127
+ npx @grapine.ai/contextprune analyze ./session.jsonl --brief
128
+ ```
129
+
130
+ ### `compress` — compress a messages file
131
+
132
+ ```bash
133
+ npx @grapine.ai/contextprune compress ./session.json -o compressed.json
134
+ ```
135
+
136
+ ```
137
+ ✔ Compressed 112,266 → 64,166 tokens (43% saved, 48,100 tokens recovered)
138
+
139
+ Decisions:
140
+ Removed 69 messages (Outdated Tool Result, Chat/Filler)
141
+ Trimmed 8 messages (Tool Result — key output preserved)
142
+ Collapsed 1 message (Reasoning chain → 1-line marker)
143
+ Kept 141 messages (constraints, active errors, final answers)
144
+ ```
145
+
146
+ Output is a standard JSON messages array — drop it straight into an API call:
147
+
148
+ ```ts
149
+ const messages = JSON.parse(fs.readFileSync('compressed.json', 'utf-8'));
150
+ await anthropic.messages.create({ model: 'claude-sonnet-4-5', messages, max_tokens: 8096 });
151
+ ```
152
+
153
+ ### `watch` — live dashboard in your browser
154
+
155
+ ```bash
156
+ npx @grapine.ai/contextprune watch
157
+ ```
158
+
159
+ Discovers all Claude Code sessions in `~/.claude/projects/` and opens an interactive picker:
160
+
161
+ ```
162
+ Select a Claude project to monitor:
163
+
164
+ › labs/contextprune #b6c62a11 just now ● active
165
+ labs/my-app #a1d3f920 2h ago
166
+ work/api-service #cc8801ab 1d ago
167
+
168
+ ↑↓ to navigate · Enter to select · Ctrl+C to cancel
169
+ ```
170
+
171
+ Opens a browser tab and starts live monitoring. The dashboard updates every time the session file changes.
172
+
173
+ ```bash
174
+ # Or point directly at a file
175
+ npx @grapine.ai/contextprune watch --follow ~/.claude/projects/my-project/session.jsonl
176
+
177
+ # Use a different port
178
+ npx @grapine.ai/contextprune watch --port 8080
179
+ ```
180
+
181
+ ---
182
+
183
+ ## Dashboard
184
+
185
+ A live browser dashboard that monitors your Claude Code sessions in real time. No configuration — run `npx @grapine.ai/contextprune watch` and it opens automatically.
186
+
187
+ **Healthy Context Dashboard**
188
+
189
+ ![Healthy Context Dashboard](screenshots/cp_dashboard_healthy.jpg)
190
+
191
+
192
+
193
+ **Context Compression Recommendation Dashboard**
194
+
195
+ ![Context Compression Recommendation Dashboard](screenshots/cp_dashboard_compression.jpg)
196
+
197
+ **What the dashboard shows:**
198
+
199
+ **Context Window** — utilization bar with colour-coded status (green → yellow → red). Switches to Compression Suggested / Compress Now badges as context fills up.
200
+
201
+ **Session Cost** — cost per API call with input/output/cache breakdown, grouped by calendar day with proportional bars.
202
+
203
+ **Classification Breakdown** — how your context is distributed across message types (Outdated Tool Result, Active Tool Result, Chat/Filler, Final Answer, etc.) with token counts and percentages.
204
+
205
+ **Compression Strategies** — what contextprune would do right now: Keep / Remove / Trim / Collapse counts.
206
+
207
+ **Compression Projection** — before/after utilization bars showing exactly how much would be recovered if you compressed now. Hidden when context is healthy.
208
+
209
+ **Top Consumers** — the largest individual messages ranked by token count, with their classification and compression opportunity.
210
+
211
+ **Session Brief** — auto-generated handoff prompt that appears at 65%+ utilization. One click copies a compact context summary you can paste into a new session to continue without losing state.
212
+
213
+ **Desktop notifications** — opt-in alerts at 65% utilization, then every 5% increment until you compress.
214
+
215
+ **Push data from your own process** (no file watching needed):
216
+
217
+ ```bash
218
+ npx @grapine.ai/contextprune watch &
219
+
220
+ curl -X POST http://localhost:4242/analyze \
221
+ -H 'Content-Type: application/json' \
222
+ -d '{ "messages": [...], "model": "gpt-4o" }'
223
+ ```
224
+
225
+ Works with any provider — Anthropic, OpenAI, OpenRouter, Groq, or any messages array you construct yourself.
226
+
227
+ ---
228
+
229
+ ## Three ways to use it
230
+
231
+ ### 1. `compress(messages)` — explicit, you decide when
232
+
233
+ ```ts
234
+ const result = await cp.compress(messages);
235
+
236
+ console.log(result.summary.tokensSaved); // 48100
237
+ console.log(result.summary.savingsPercent); // 0.43
238
+ console.log(result.messages.length); // fewer messages
239
+ ```
240
+
241
+ Compresses unconditionally every time you call it. Use this when you explicitly decide compression is warranted — after a tool-heavy phase, on every N turns, or as part of a LangGraph compress node.
242
+
243
+ ### 2. `watch(client)` — automatic, zero changes to call sites
244
+
245
+ ```ts
246
+ // Wrap once at startup
247
+ const watched = cp.watch(anthropic);
248
+
249
+ // Use exactly as before — compression fires automatically when context > 65%
250
+ const response = await watched.messages.create({
251
+ model: 'claude-sonnet-4-5',
252
+ messages,
253
+ max_tokens: 8096,
254
+ });
255
+ ```
256
+
257
+ Works with Anthropic, OpenAI, and any OpenAI-compatible provider:
258
+
259
+ ```ts
260
+ // OpenRouter
261
+ const client = new OpenAI({ baseURL: 'https://openrouter.ai/api/v1', apiKey: '...' });
262
+ const watched = cp.watch(client);
263
+ await watched.chat.completions.create({ model: 'meta-llama/llama-3.3-70b-instruct', messages });
264
+
265
+ // Groq
266
+ const watched = cp.watch(new Groq());
267
+ await watched.chat.completions.create({ model: 'llama3-70b-8192', messages });
268
+ ```
269
+
270
+ ### 3. `analyze(messages)` — read-only inspection
271
+
272
+ ```ts
273
+ const analysis = await cp.analyze(messages);
274
+
275
+ analysis.recommendation.urgency // 'none' | 'suggested' | 'recommended' | 'critical'
276
+ analysis.recommendation.projectedSavings // tokens that would be saved
277
+ analysis.sessionState.tokenBudget.utilizationPercent // 0.56
278
+ analysis.sessionBrief // markdown handoff prompt for context continuation
279
+ ```
280
+
281
+ Never compresses — use this to build dashboards, gate on urgency, or log opportunities.
282
+
283
+ ---
284
+
285
+ ## LangGraph
286
+
287
+ In a LangGraph agent, `state["messages"]` accumulates every tool result and intermediate step across all graph iterations. By call 20, a typical coding agent has 30–50k tokens of stale tool outputs.
288
+
289
+ **Wrap the client — zero changes inside the graph:**
290
+
291
+ ```ts
292
+ import { ContextPrune } from '@grapine.ai/contextprune';
293
+ import Anthropic from '@anthropic-ai/sdk';
294
+
295
+ const client = new ContextPrune({ model: 'claude-sonnet-4-5' }).watch(new Anthropic());
296
+
297
+ // Every node compresses automatically, only when context > 65%
298
+ function callModel(state: MessagesState) {
299
+ return client.messages.create({ // ← unchanged
300
+ model: 'claude-sonnet-4-5',
301
+ messages: state.messages,
302
+ max_tokens: 8096,
303
+ });
304
+ }
305
+ ```
306
+
307
+ **Or add a dedicated compress node:**
308
+
309
+ ```ts
310
+ const cp = new ContextPrune({ model: 'claude-sonnet-4-5' });
311
+
312
+ async function compressNode(state: MessagesState) {
313
+ const result = await cp.compress(state.messages);
314
+ return { messages: result.messages };
315
+ }
316
+
317
+ builder
318
+ .addNode('compress', compressNode)
319
+ .addEdge('tools', 'compress') // compress after every tool cycle
320
+ .addEdge('compress', 'agent');
321
+ ```
322
+
323
+ ---
324
+
325
+ ## When it helps (and when it doesn't)
326
+
327
+ **The core prerequisite:** there must be a growing `messages[]` array that gets passed to an LLM repeatedly.
328
+
329
+ ### ✓ It helps: single-agent accumulating loops
330
+
331
+ ```ts
332
+ // ReAct / tool-calling loop — context grows with every iteration
333
+ const messages: LLMMessage[] = [{ role: 'system', content: systemPrompt }];
334
+
335
+ while (!done) {
336
+ const response = await llm.invoke(messages);
337
+ messages.push({ role: 'assistant', content: response.content });
338
+ const toolResult = await runTool(response);
339
+ messages.push({ role: 'user', content: toolResult });
340
+
341
+ // ← contextprune here: stale tool results removed before next call
342
+ const { messages: lean } = await cp.compress(messages);
343
+ messages.splice(0, messages.length, ...lean);
344
+ }
345
+ ```
346
+
347
+ By call 30, a typical agent has accumulated file reads, bash outputs, error traces, and intermediate reasoning that will never be referenced again. Every call pays for all of it. contextprune removes it.
348
+
349
+ ### ✗ It doesn't help: parallel stateless fan-out
350
+
351
+ ```ts
352
+ // Each agent call is 2–3 messages built fresh, discarded after
353
+ const [strategy, calendar, copy] = await Promise.all([
354
+ orchestrator.invoke([{ role: 'user', content: strategyPrompt }]),
355
+ strategist.invoke([{ role: 'user', content: calendarPrompt }]),
356
+ copywriter.invoke([{ role: 'user', content: copyPrompt }]),
357
+ ]);
358
+ ```
359
+
360
+ Each call is constructed fresh and discarded. There is no accumulating history. Nothing to prune.
361
+
362
+ **The diagnostic question:**
363
+
364
+ > After N agent calls, is there a single `messages[]` array that is longer than it was at call 1?
365
+
366
+ If yes — contextprune helps. If no — each call starts fresh, and contextprune has no leverage point.
367
+
368
+ ---
369
+
370
+ ## Compression modes
371
+
372
+ | Mode | When compression runs | Default for |
373
+ |------|----------------------|-------------|
374
+ | `manual` | Always, unconditionally | `compress()` |
375
+ | `auto` | Only when utilization ≥ `warningThreshold` | `watch()` |
376
+ | `suggest-only` | Never — analysis only | `analyze()` |
377
+
378
+ ```ts
379
+ const cp = new ContextPrune({
380
+ model: 'claude-sonnet-4-5',
381
+ options: {
382
+ warningThreshold: 0.65, // start compressing at 65% full (default)
383
+ criticalThreshold: 0.80, // compress aggressively at 80% (default)
384
+ compressionMode: 'auto', // only compress when needed
385
+ }
386
+ });
387
+ ```
388
+
389
+ ---
390
+
391
+ ## What gets compressed
392
+
393
+ | Message type | Strategy | Why |
394
+ |---|---|---|
395
+ | Outdated Tool Result | Remove | Not referenced in subsequent turns |
396
+ | Fixed Error | Remove | Stack trace no longer needed |
397
+ | Chain of Thought | Collapse to 1 line | Conclusion already in context |
398
+ | Status Update | Collapse to 1 line | Acknowledged, no longer active |
399
+ | Tool Result (active) | Trim to key output | Keep answer, drop verbose body |
400
+ | Chat / Filler | Remove | Low relevance to current task |
401
+
402
+ **Always preserved:** system prompts, user corrections, active errors, session goals, final answers.
403
+
404
+ The classifier assigns one of 11 types to each message. Classification confidence gates compression aggressiveness — if the classifier is uncertain, the message is always preserved.
405
+
406
+ ---
407
+
408
+ ## Supported providers and models
409
+
410
+ Token budgets are pre-configured for:
411
+
412
+ | Provider | Models |
413
+ |---|---|
414
+ | Anthropic | Claude 4.x, Claude 3.x (all variants) |
415
+ | OpenAI | GPT-4o, GPT-4.1, GPT-4-turbo, GPT-3.5, o1, o3 series |
416
+ | Google | Gemini 2.5 Pro/Flash, Gemini 2.0, Gemini 1.5 |
417
+ | Meta | Llama 3.3 / 3.1 (70B, 8B) |
418
+ | Mistral | Mistral Large/Medium/Small, Mixtral, Codestral |
419
+ | DeepSeek | DeepSeek Chat, DeepSeek Reasoner |
420
+ | Cohere | Command R, Command R+ |
421
+ | OpenRouter | All `provider/model` prefixed names |
422
+ | Groq | Llama3, Mixtral, Gemma hosted models |
423
+
424
+ Any unrecognized model string falls back to a 128k token budget.
425
+
426
+ ---
@@ -0,0 +1,2 @@
1
+ import { Command } from 'commander';
2
+ export declare function analyzeCommand(): Command;
@@ -0,0 +1,161 @@
1
+ "use strict";
2
+ // cli/commands/analyze.ts
3
+ // npx contextprune analyze <file>
4
+ // Reads a JSON messages file, runs full pipeline analysis, prints a table.
5
+ var __importDefault = (this && this.__importDefault) || function (mod) {
6
+ return (mod && mod.__esModule) ? mod : { "default": mod };
7
+ };
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.analyzeCommand = analyzeCommand;
10
+ const commander_1 = require("commander");
11
+ const chalk_1 = __importDefault(require("chalk"));
12
+ const ora_1 = __importDefault(require("ora"));
13
+ const index_1 = require("../../src/index");
14
+ const parse_input_1 = require("../parse-input");
15
+ const labels_1 = require("../labels");
16
+ function analyzeCommand() {
17
+ return new commander_1.Command('analyze')
18
+ .description('Analyze a messages JSON file and show context breakdown')
19
+ .argument('<file>', 'Path to JSON file containing messages array')
20
+ .option('-m, --model <model>', 'Model to use for token counting', 'claude-sonnet-4-5')
21
+ .option('--json', 'Output raw JSON instead of formatted table')
22
+ .option('--brief', 'Print the session brief (handoff prompt for starting a new session)')
23
+ .action(async (file, opts) => {
24
+ const spinner = (0, ora_1.default)('Loading messages…').start();
25
+ let messages;
26
+ let actualInputTokens;
27
+ try {
28
+ ({ messages, actualInputTokens } = (0, parse_input_1.loadMessagesWithUsage)(file));
29
+ }
30
+ catch (err) {
31
+ spinner.fail(`Failed to read file: ${err.message}`);
32
+ process.exit(1);
33
+ }
34
+ spinner.text = 'Analyzing context…';
35
+ let analysis;
36
+ try {
37
+ const ck = new index_1.ContextPrune({ model: opts.model, options: { actualInputTokens } });
38
+ analysis = await ck.analyze(messages);
39
+ }
40
+ catch (err) {
41
+ spinner.fail(`Analysis failed: ${err.message}`);
42
+ process.exit(1);
43
+ }
44
+ spinner.succeed('Analysis complete.');
45
+ if (opts.json) {
46
+ console.log(JSON.stringify(analysis, null, 2));
47
+ return;
48
+ }
49
+ if (opts.brief) {
50
+ console.log(analysis.sessionBrief);
51
+ return;
52
+ }
53
+ printAnalysis(analysis);
54
+ });
55
+ }
56
+ function printAnalysis(analysis) {
57
+ const { annotatedMessages, breakdown, recommendation, sessionState } = analysis;
58
+ const budget = sessionState.tokenBudget;
59
+ // ── Header ─────────────────────────────────────────────────────────────────
60
+ console.log('\n' + chalk_1.default.bold.cyan('─── ContextPrune Analysis ──────────────────────────────'));
61
+ console.log(chalk_1.default.dim(`Model capacity: ${budget.modelMaxTokens.toLocaleString()} tokens`) +
62
+ ` | ` +
63
+ chalk_1.default.dim(`Current usage: ${budget.currentUsage.toLocaleString()} tokens`) +
64
+ ` | ` +
65
+ utilizationBadge(budget.utilizationPercent));
66
+ // ── Recommendation ─────────────────────────────────────────────────────────
67
+ const urgencyColors = {
68
+ none: chalk_1.default.dim,
69
+ suggested: chalk_1.default.blue,
70
+ recommended: chalk_1.default.yellow,
71
+ critical: chalk_1.default.red,
72
+ };
73
+ const urgencyLabels = {
74
+ none: 'HEALTHY',
75
+ suggested: 'SUGGESTED',
76
+ recommended: 'RECOMMENDED',
77
+ critical: 'CRITICAL',
78
+ };
79
+ const urgencyColor = urgencyColors[recommendation.urgency] ?? chalk_1.default.white;
80
+ const urgencyLabel = urgencyLabels[recommendation.urgency] ?? recommendation.urgency.toUpperCase();
81
+ console.log('\n' + urgencyColor(`[${urgencyLabel}] ${recommendation.message}`));
82
+ if (recommendation.projectedSavings > 0) {
83
+ console.log(chalk_1.default.green(` Projected savings: ${recommendation.projectedSavings.toLocaleString()} tokens`) +
84
+ chalk_1.default.dim(` (${Math.round(recommendation.projectedSavingsPercent * 100)}%)`));
85
+ }
86
+ // ── Per-classification breakdown ───────────────────────────────────────────
87
+ console.log('\n' + chalk_1.default.bold('Classification Breakdown:'));
88
+ const rows = [];
89
+ for (const [cls, data] of Object.entries(breakdown.byClassification)) {
90
+ if (!data || data.count === 0)
91
+ continue;
92
+ const bar = tokenBar(data.percentOfTotal, 20);
93
+ rows.push([cls, data.count, data.tokens, bar]);
94
+ }
95
+ rows.sort((a, b) => (b[2] ?? 0) - (a[2] ?? 0));
96
+ for (const [cls, count, tokens, bar] of rows) {
97
+ console.log(` ${chalk_1.default.cyan((0, labels_1.clsLabel)(cls).padEnd(28))}` +
98
+ `${String(count).padStart(4)} msgs ` +
99
+ `${String(tokens.toLocaleString()).padStart(8)} tokens ` +
100
+ chalk_1.default.green(bar));
101
+ }
102
+ // ── Per-strategy breakdown ─────────────────────────────────────────────────
103
+ console.log('\n' + chalk_1.default.bold('Compression Strategies:'));
104
+ for (const [strat, data] of Object.entries(breakdown.byCompressionStrategy)) {
105
+ if (!data || data.count === 0)
106
+ continue;
107
+ const saved = data.tokensBefore - data.tokensAfter;
108
+ const color = strat === 'PRESERVE' ? chalk_1.default.dim : chalk_1.default.yellow;
109
+ console.log(` ${color((0, labels_1.stratLabel)(strat).padEnd(20))}` +
110
+ `${String(data.count).padStart(4)} msgs ` +
111
+ (saved > 0 ? chalk_1.default.green(`−${saved.toLocaleString()} tokens`) : ''));
112
+ }
113
+ // ── Top consumers ──────────────────────────────────────────────────────────
114
+ console.log('\n' + chalk_1.default.bold('Top Token Consumers:') + chalk_1.default.dim(' (largest single API calls)'));
115
+ for (const consumer of analysis.topConsumers.slice(0, 5)) {
116
+ const opp = opportunityColor(consumer.compressionOpportunity);
117
+ console.log(` Call ${String(consumer.originalIndex).padStart(3)} ` +
118
+ `${chalk_1.default.cyan((0, labels_1.clsLabel)(consumer.classification).padEnd(25))}` +
119
+ `${String(consumer.tokenCount.toLocaleString()).padStart(7)} tokens ` +
120
+ opp(`[${consumer.compressionOpportunity} opportunity]`));
121
+ }
122
+ // ── Per-message table (if short) ──────────────────────────────────────────
123
+ if (annotatedMessages.length <= 30) {
124
+ console.log('\n' + chalk_1.default.bold('Message Detail:'));
125
+ printMessageTable(annotatedMessages);
126
+ }
127
+ console.log('');
128
+ }
129
+ function printMessageTable(messages) {
130
+ console.log(chalk_1.default.dim(' Call Role Classification Tokens Strategy Score'));
131
+ console.log(chalk_1.default.dim(' ' + '─'.repeat(80)));
132
+ for (const m of messages) {
133
+ const role = m.original.role.padEnd(10);
134
+ const cls = (0, labels_1.clsLabel)(m.classification).padEnd(28);
135
+ const tok = String(m.tokenCount).padStart(6);
136
+ const strat = (0, labels_1.stratLabel)(m.compressionStrategy).padEnd(20);
137
+ const score = m.relevanceScore.toFixed(2);
138
+ const rowColor = m.compressionStrategy === 'PRESERVE' ? chalk_1.default.dim : chalk_1.default.white;
139
+ console.log(rowColor(` ${String(m.originalIndex).padStart(4)} ${role} ${cls} ${tok} ${strat} ${score}`));
140
+ }
141
+ }
142
+ function utilizationBadge(pct) {
143
+ const label = `${Math.round(pct * 100)}% utilized`;
144
+ if (pct >= 0.80)
145
+ return chalk_1.default.bgRed.white(` ${label} `);
146
+ if (pct >= 0.65)
147
+ return chalk_1.default.bgYellow.black(` ${label} `);
148
+ return chalk_1.default.bgGreen.black(` ${label} `);
149
+ }
150
+ function tokenBar(pct, width) {
151
+ const filled = Math.round(pct * width);
152
+ return '█'.repeat(filled) + '░'.repeat(width - filled) + ` ${Math.round(pct * 100)}%`;
153
+ }
154
+ function opportunityColor(opp) {
155
+ switch (opp) {
156
+ case 'high': return chalk_1.default.red;
157
+ case 'medium': return chalk_1.default.yellow;
158
+ case 'low': return chalk_1.default.blue;
159
+ default: return chalk_1.default.dim;
160
+ }
161
+ }
@@ -0,0 +1,2 @@
1
+ import { Command } from 'commander';
2
+ export declare function compressCommand(): Command;
@@ -0,0 +1,65 @@
1
+ "use strict";
2
+ // cli/commands/compress.ts
3
+ // npx contextprune compress <file> [-o output.json]
4
+ // Reads a messages JSON file, compresses it, writes the compressed result.
5
+ var __importDefault = (this && this.__importDefault) || function (mod) {
6
+ return (mod && mod.__esModule) ? mod : { "default": mod };
7
+ };
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.compressCommand = compressCommand;
10
+ const commander_1 = require("commander");
11
+ const fs_1 = require("fs");
12
+ const chalk_1 = __importDefault(require("chalk"));
13
+ const ora_1 = __importDefault(require("ora"));
14
+ const index_1 = require("../../src/index");
15
+ const parse_input_1 = require("../parse-input");
16
+ function compressCommand() {
17
+ return new commander_1.Command('compress')
18
+ .description('Compress a messages JSON file and output the result')
19
+ .argument('<file>', 'Path to JSON file containing messages array')
20
+ .option('-m, --model <model>', 'Model to use for token counting', 'claude-sonnet-4-5')
21
+ .option('-o, --output <file>', 'Output file path (default: stdout)')
22
+ .option('--mode <mode>', 'Compression mode: auto | manual | suggest-only', 'manual')
23
+ .action(async (file, opts) => {
24
+ const spinner = (0, ora_1.default)('Loading messages…').start();
25
+ let messages;
26
+ try {
27
+ messages = (0, parse_input_1.loadMessages)(file);
28
+ }
29
+ catch (err) {
30
+ spinner.fail(`Failed to read file: ${err.message}`);
31
+ process.exit(1);
32
+ }
33
+ spinner.text = 'Compressing…';
34
+ try {
35
+ const ck = new index_1.ContextPrune({
36
+ model: opts.model,
37
+ options: {
38
+ compressionMode: opts.mode,
39
+ },
40
+ });
41
+ const result = await ck.compress(messages);
42
+ spinner.succeed('Compression complete.');
43
+ const { summary } = result;
44
+ console.log(chalk_1.default.green(` Tokens: ${summary.tokensBefore.toLocaleString()} → ${summary.tokensAfter.toLocaleString()}`) +
45
+ chalk_1.default.dim(` (−${summary.tokensSaved.toLocaleString()}, ${Math.round(summary.savingsPercent * 100)}% saved)`) +
46
+ ` Messages: ${messages.length} → ${result.messages.length}`);
47
+ const output = JSON.stringify(result.messages, null, 2);
48
+ if (opts.output) {
49
+ (0, fs_1.writeFileSync)(opts.output, output, 'utf-8');
50
+ console.log(chalk_1.default.dim(` Written to ${opts.output}`));
51
+ }
52
+ else {
53
+ console.log(output);
54
+ }
55
+ console.log(chalk_1.default.dim(`\n ℹ This output is a compressed messages array — it does not modify your live Claude Code session.\n`) +
56
+ chalk_1.default.dim(` To use it, pass the result as the messages array in your next API call:\n`) +
57
+ chalk_1.default.dim(` const result = JSON.parse(fs.readFileSync('${opts.output ?? 'output.json'}'));\n`) +
58
+ chalk_1.default.dim(` await anthropic.messages.create({ model: '...', messages: result, max_tokens: 8096 });`));
59
+ }
60
+ catch (err) {
61
+ spinner.fail(`Compression failed: ${err.message}`);
62
+ process.exit(1);
63
+ }
64
+ });
65
+ }
@@ -0,0 +1,2 @@
1
+ import { Command } from 'commander';
2
+ export declare function watchCommand(): Command;