squeezr-ai 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +476 -0
  2. package/bin/squeezr.js +117 -0
  3. package/dist/__tests__/cache.test.d.ts +1 -0
  4. package/dist/__tests__/cache.test.js +73 -0
  5. package/dist/__tests__/compressor.test.d.ts +1 -0
  6. package/dist/__tests__/compressor.test.js +311 -0
  7. package/dist/__tests__/config.test.d.ts +1 -0
  8. package/dist/__tests__/config.test.js +132 -0
  9. package/dist/__tests__/deterministic.test.d.ts +1 -0
  10. package/dist/__tests__/deterministic.test.js +769 -0
  11. package/dist/__tests__/expand.test.d.ts +1 -0
  12. package/dist/__tests__/expand.test.js +192 -0
  13. package/dist/__tests__/sessionCache.test.d.ts +1 -0
  14. package/dist/__tests__/sessionCache.test.js +72 -0
  15. package/dist/cache.d.ts +18 -0
  16. package/dist/cache.js +65 -0
  17. package/dist/compressor.d.ts +49 -0
  18. package/dist/compressor.js +482 -0
  19. package/dist/config.d.ts +27 -0
  20. package/dist/config.js +113 -0
  21. package/dist/deterministic.d.ts +39 -0
  22. package/dist/deterministic.js +1097 -0
  23. package/dist/discover.d.ts +10 -0
  24. package/dist/discover.js +133 -0
  25. package/dist/expand.d.ts +47 -0
  26. package/dist/expand.js +119 -0
  27. package/dist/gain.d.ts +2 -0
  28. package/dist/gain.js +48 -0
  29. package/dist/index.d.ts +1 -0
  30. package/dist/index.js +19 -0
  31. package/dist/server.d.ts +4 -0
  32. package/dist/server.js +253 -0
  33. package/dist/sessionCache.d.ts +30 -0
  34. package/dist/sessionCache.js +17 -0
  35. package/dist/stats.d.ts +29 -0
  36. package/dist/stats.js +90 -0
  37. package/dist/systemPrompt.d.ts +1 -0
  38. package/dist/systemPrompt.js +84 -0
  39. package/dist/version.d.ts +1 -0
  40. package/dist/version.js +1 -0
  41. package/package.json +58 -0
  42. package/squeezr.toml +38 -0
package/README.md ADDED
@@ -0,0 +1,476 @@
1
+ # Squeezr
2
+
3
+ [![npm version](https://badge.fury.io/js/squeezr-ai.svg)](https://www.npmjs.com/package/squeezr-ai)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
+ [![Node.js 18+](https://img.shields.io/badge/node-18%2B-brightgreen)](https://nodejs.org)
6
+ [![Tests](https://img.shields.io/badge/tests-190%20passing-brightgreen)](https://github.com/sergioramosv/Squeezr)
7
+
8
+ **Squeezr is a local proxy that sits between your AI coding CLI and its API. It automatically compresses your context window on every request — saving thousands of tokens per session with zero changes to your workflow.**
9
+
10
+ Works with Claude Code, Codex, Aider, OpenCode, Gemini CLI, and any Ollama-powered local LLM.
11
+
12
+ ---
13
+
14
+ ## The problem
15
+
16
+ Every time you send a message in an AI coding CLI, the entire conversation history is re-sent to the API. That includes every file you read, every `git diff`, every test output, every bash command — even from 30 messages ago when it's no longer relevant. The system prompt alone can weigh 13KB and gets sent on every single request.
17
+
18
+ The result: context fills up fast, costs spike, and sessions hit the limit sooner than they should.
19
+
20
+ ---
21
+
22
+ ## How Squeezr fixes it
23
+
24
+ Squeezr intercepts every API request before it reaches the provider and runs multiple compression layers:
25
+
26
+ ```
27
+ Your CLI (Claude Code / Codex / Aider / Gemini CLI / Ollama)
28
+ |
29
+ v
30
+ localhost:8080 (Squeezr proxy)
31
+ |
32
+ |-- [1] System prompt compression
33
+ | Compressed once on first request, cached forever.
34
+ | ~13KB Claude Code system prompt → ~600 tokens. Never resent in full again.
35
+ |
36
+ |-- [2] Deterministic preprocessing — noise removal
37
+ | Runs on every tool result before anything else:
38
+ | strip ANSI codes, strip progress bars, strip timestamps,
39
+ | deduplicate repeated stack traces, deduplicate repeated lines,
40
+ | minify inline JSON, collapse whitespace.
41
+ |
42
+ |-- [3] Deterministic preprocessing — tool-specific patterns (~30 patterns)
43
+ | Applied automatically to every matching output:
44
+ | git: diff (1-line context + Changed: fn summary on large diffs)
45
+ | log (capped, adaptive), status, branch (capped at 20)
46
+ | cargo: test (failures only), build/check/clippy (errors only)
47
+ | JS/TS: vitest/jest (failures + summary only)
48
+ | playwright (✘ blocks only)
49
+ | tsc (errors grouped by file)
50
+ | eslint/biome (grouped, no rule URLs)
51
+ | prettier --check (only files needing format)
52
+ | pnpm/npm install (summary line only)
53
+ | pnpm/npm list (direct deps only)
54
+ | pnpm/npm outdated (capped at 30)
55
+ | next build (route table + errors)
56
+ | npx noise stripped
57
+ | Python: pytest (FAILED lines + tracebacks only)
58
+ | Go: go test (--- FAIL blocks only)
59
+ | Terraform: resource change summary + Plan line
60
+ | Docker: ps (compact), images (no dangling), logs (last 50 lines)
61
+ | kubectl: get (compact alignment)
62
+ | Prisma: strip ASCII box-drawing art
63
+ | gh CLI: pr view, pr checks, run list, issue list (all capped)
64
+ | Network: curl (strip verbose headers), wget (strip progress)
65
+ | Exclusive patterns:
66
+ | Read tool → lockfiles replaced with summary count
67
+ | large code files (.ts/.js/.py/.go/.rs > 500 lines)
68
+ | → imports + top-level signatures only, bodies omitted
69
+ | files > 200 lines → head + tail with omission note
70
+ | Grep tool → matches grouped by file, capped per file and total
71
+ | Glob tool → > 30 files collapsed to directory summary
72
+ | Any output → auto-extracts error lines when > 50% of content is noise
73
+ | Stack traces → repeated crash frames collapsed across log output
74
+ |
75
+ |-- [4] Cross-turn Read deduplication
76
+ | When the model reads the same file multiple times in a session,
77
+ | earlier occurrences are replaced with a reference token.
78
+ | Most recent copy always kept at full fidelity.
79
+ |
80
+ |-- [5] Adaptive AI compression
81
+ | Old bash output, file reads, grep results compressed by a cheap model.
82
+ | Threshold adjusts automatically based on context pressure:
83
+ | < 50% full → compress blocks > 1,500 chars
84
+ | 50-75% full → compress blocks > 800 chars
85
+ | 75-90% full → compress blocks > 400 chars
86
+ | > 90% full → compress everything > 150 chars
87
+ | At > 90% pressure, deterministic patterns also tighten:
88
+ | git diff → 0 context lines per hunk (vs 1)
89
+ | git log → cap 10 commits (vs 30)
90
+ | grep → 4 matches/file (vs 8)
91
+ |
92
+ |-- [6] Session cache + KV cache warming
93
+ | Session cache: blocks identical to a previous request skip the pipeline.
94
+ | KV warming: unchanged blocks keep deterministic IDs so Anthropic's
95
+ | prefix cache stays warm — 90% discount on already-seen tokens.
96
+ |
97
+ |-- [7] expand() — lossless retrieval
98
+ | Every compressed block is stored by ID. If the model needs the full
99
+ | original, it calls squeezr_expand(id). Squeezr intercepts the tool call,
100
+ | injects the original, and makes a continuation request — transparently.
101
+ |
102
+ v
103
+ Your provider's API (Anthropic / OpenAI / Google / Ollama)
104
+ ```
105
+
106
+ Recent content is always preserved untouched — by default the last 3 tool results are never compressed. Your CLI always has full context for what it's currently working on.
107
+
108
+ ---
109
+
110
+ ## Supported CLIs and providers
111
+
112
+ Squeezr auto-detects which provider each request targets from the auth headers. No configuration needed beyond pointing your CLI at the proxy.
113
+
114
+ | CLI | Set this env var | Compresses with | Extra keys needed |
115
+ |---|---|---|---|
116
+ | **Claude Code** | `ANTHROPIC_BASE_URL=http://localhost:8080` | Claude Haiku | None |
117
+ | **Codex CLI** | `OPENAI_BASE_URL=http://localhost:8080` | GPT-4o-mini | None |
118
+ | **Aider** (OpenAI backend) | `OPENAI_BASE_URL=http://localhost:8080` | GPT-4o-mini | None |
119
+ | **Aider** (Anthropic backend) | `ANTHROPIC_BASE_URL=http://localhost:8080` | Claude Haiku | None |
120
+ | **OpenCode** | `OPENAI_BASE_URL=http://localhost:8080` | GPT-4o-mini | None |
121
+ | **Gemini CLI** | `GEMINI_API_BASE_URL=http://localhost:8080` | Gemini Flash 8B | None |
122
+ | **Ollama** (any CLI) | `OPENAI_BASE_URL=http://localhost:8080` | Local model (configurable) | None |
123
+
124
+ Squeezr extracts the API key from the request itself and reuses it for compression. Zero extra setup.
125
+
126
+ ---
127
+
128
+ ## Quick start
129
+
130
+ ```bash
131
+ npm install -g squeezr-ai
132
+ squeezr start
133
+ ```
134
+
135
+ Then point your CLI at the proxy:
136
+
137
+ ```bash
138
+ # Claude Code
139
+ export ANTHROPIC_BASE_URL=http://localhost:8080 # macOS / Linux
140
+ $env:ANTHROPIC_BASE_URL="http://localhost:8080" # Windows PowerShell
141
+
142
+ # Codex / Aider / OpenCode
143
+ export OPENAI_BASE_URL=http://localhost:8080
144
+
145
+ # Gemini CLI
146
+ export GEMINI_API_BASE_URL=http://localhost:8080
147
+
148
+ # Ollama
149
+ export OPENAI_BASE_URL=http://localhost:8080
150
+ ```
151
+
152
+ Or use the shell installer to set up the env var permanently and register Squeezr as a login service:
153
+
154
+ ```bash
155
+ # macOS / Linux
156
+ bash install.sh
157
+
158
+ # Windows (PowerShell, run as admin for Task Scheduler)
159
+ .\install.ps1
160
+ ```
161
+
162
+ ---
163
+
164
+ ## Configuration
165
+
166
+ ### Global config — `squeezr.toml`
167
+
168
+ Located in the Squeezr install directory. Environment variables override any TOML value.
169
+
170
+ ```toml
171
+ [proxy]
172
+ port = 8080
173
+
174
+ [compression]
175
+ threshold = 800 # min chars to compress a tool result
176
+ keep_recent = 3 # recent tool results to leave untouched
177
+ disabled = false
178
+ compress_system_prompt = true # compress the CLI's system prompt (cached)
179
+ compress_conversation = false # also compress old user/assistant messages (aggressive)
180
+
181
+ # Explicit control over which tools are compressed:
182
+ # skip_tools = ["Read"] # never compress these tools
183
+ # only_tools = ["Bash"] # only compress these tools (overrides skip_tools)
184
+
185
+ [cache]
186
+ enabled = true
187
+ max_entries = 1000 # LRU cap for cached compressions
188
+
189
+ [adaptive]
190
+ enabled = true
191
+ low_threshold = 1500 # used when context < 50% full
192
+ mid_threshold = 800 # 50-75%
193
+ high_threshold = 400 # 75-90%
194
+ critical_threshold = 150 # > 90% — compress everything
195
+
196
+ [local]
197
+ enabled = true
198
+ upstream_url = "http://localhost:11434" # your Ollama URL
199
+ # Model used to compress tool results — must be pulled in Ollama.
200
+ # Good options:
201
+ # qwen2.5-coder:1.5b (best for code, ~1GB RAM) ← default
202
+ # qwen2.5:1.5b (good general, ~1GB RAM)
203
+ # llama3.2:1b (good English, ~800MB RAM)
204
+ # qwen2.5:3b (better quality, ~2GB RAM)
205
+ compression_model = "qwen2.5-coder:1.5b"
206
+ dummy_keys = ["ollama", "lm-studio", "sk-no-key-required", "local", "none", ""]
207
+ ```
208
+
209
+ ### Per-project config — `.squeezr.toml`
210
+
211
+ Drop a `.squeezr.toml` in any project root. It deep-merges over the global config, so you only need to specify what differs:
212
+
213
+ ```toml
214
+ # .squeezr.toml — project-level overrides
215
+ [compression]
216
+ threshold = 400
217
+ skip_tools = ["Read"] # don't compress file reads in this project
218
+ ```
219
+
220
+ Squeezr logs `[squeezr] Using project config: /path/to/.squeezr.toml` when a local config is detected.
221
+
222
+ ### Environment variable reference
223
+
224
+ | Variable | Default | Description |
225
+ |---|---|---|
226
+ | `SQUEEZR_PORT` | `8080` | Local port |
227
+ | `SQUEEZR_THRESHOLD` | `800` | Base compression threshold (chars) |
228
+ | `SQUEEZR_KEEP_RECENT` | `3` | Recent tool results to skip |
229
+ | `SQUEEZR_DISABLED` | — | Set to `1` to disable (passthrough only) |
230
+ | `SQUEEZR_DRY_RUN` | — | Set to `1` to preview savings without compressing |
231
+ | `SQUEEZR_LOCAL_UPSTREAM` | `http://localhost:11434` | Ollama URL |
232
+ | `SQUEEZR_LOCAL_MODEL` | `qwen2.5-coder:1.5b` | Ollama compression model |
233
+
234
+ ---
235
+
236
+ ## Explicit control — skip and only
237
+
238
+ You can control exactly which tool results Squeezr compresses, both globally and per-command.
239
+
240
+ ### Config-level (global or per-project)
241
+
242
+ ```toml
243
+ [compression]
244
+ # Never compress Read or Grep results:
245
+ skip_tools = ["Read", "Grep"]
246
+
247
+ # Only compress Bash results — ignore everything else:
248
+ only_tools = ["Bash"] # overrides skip_tools when set
249
+ ```
250
+
251
+ ### Inline per-command — `# squeezr:skip`
252
+
253
+ Add `# squeezr:skip` anywhere in a Bash command to prevent that specific result from being compressed, regardless of config:
254
+
255
+ ```bash
256
+ # This result will never be compressed, even if it's 10,000 chars:
257
+ git diff HEAD~3 # squeezr:skip
258
+
259
+ # Normal commands are compressed as usual:
260
+ cargo test
261
+ ```
262
+
263
+ ---
264
+
265
+ ## Dry-run mode
266
+
267
+ Preview what Squeezr would compress without modifying any requests:
268
+
269
+ ```bash
270
+ SQUEEZR_DRY_RUN=1 squeezr start
271
+ ```
272
+
273
+ Console output shows exactly what would be compressed:
274
+
275
+ ```
276
+ [squeezr dry-run] Would compress 4 block(s) | potential -12,430 chars | pressure=67% threshold=800
277
+ [squeezr dry-run/ollama] Would compress 2 block(s) | potential -5,210 chars | model=qwen2.5-coder:1.5b
278
+ ```
279
+
280
+ ---
281
+
282
+ ## Ollama — local compression
283
+
284
+ Pull the compression model once, then Squeezr handles the rest:
285
+
286
+ ```bash
287
+ ollama pull qwen2.5-coder:1.5b # or any model you prefer
288
+ ```
289
+
290
+ Any CLI that sends requests with a dummy auth key (`ollama`, `lm-studio`, empty string, etc.) is automatically detected as local and routed to your Ollama instance.
291
+
292
+ To use a different model:
293
+
294
+ ```toml
295
+ [local]
296
+ compression_model = "llama3.2:1b"
297
+ ```
298
+
299
+ ---
300
+
301
+ ## Live stats
302
+
303
+ Each compressed request logs to console:
304
+
305
+ ```
306
+ [squeezr] 2 block(s) compressed | -4,821 chars (~1,377 tokens) (87% saved)
307
+ [squeezr] Context pressure: 68% → threshold=800 chars
308
+ [squeezr/haiku] System prompt compressed: -71% (13,204 → 3,849 chars) [cached]
309
+ [squeezr/ollama] 1 block(s) compressed | -3,102 chars (~886 tokens) (79% saved)
310
+ [squeezr] Session cache: 3 block(s) reused (KV cache preserved)
311
+ [squeezr] Cross-turn dedup: 2 Read result(s) collapsed
312
+ ```
313
+
314
+ ### `squeezr gain` — full stats dashboard
315
+
316
+ ```bash
317
+ squeezr gain
318
+ ```
319
+
320
+ ```
321
+ ┌─────────────────────────────────────────┐
322
+ │ Squeezr — Token Savings │
323
+ ├─────────────────────────────────────────┤
324
+ │ Requests 38 │
325
+ │ Saved chars 142,830 │
326
+ │ Saved tokens 40,808 │
327
+ │ Savings 73.4% │
328
+ ├─────────────────────────────────────────┤
329
+ │ By Tool │
330
+ │ Bash (41x): -81% │
331
+ │ Read (28x): -74% │
332
+ │ Grep (14x): -69% │
333
+ └─────────────────────────────────────────┘
334
+ ```
335
+
336
+ Stats persist to `~/.squeezr/stats.json` across restarts.
337
+
338
+ ```bash
339
+ squeezr gain --reset # clear all saved stats
340
+ ```
341
+
342
+ Full JSON at: `http://localhost:8080/squeezr/stats`
343
+
344
+ ### `squeezr discover` — pattern coverage report
345
+
346
+ After a session, run:
347
+
348
+ ```bash
349
+ squeezr discover
350
+ ```
351
+
352
+ Shows which deterministic patterns fired, how many outputs hit the AI fallback, and the Read/Grep/Glob breakdown. Useful for spotting coverage gaps or misconfigured skip lists.
353
+
354
+ ---
355
+
356
+ ## How session-level optimisations work
357
+
358
+ ### Session cache + differential compression
359
+
360
+ Every request re-sends the full conversation history. Without deduplication, a 50-tool-result session would run 50 Haiku calls on request #51 — even though 49 of them haven't changed.
361
+
362
+ Squeezr tracks a hash of each compressed block in memory for the session lifetime. Blocks identical to the previous request skip the entire pipeline (preprocessing + AI call).
363
+
364
+ ```
365
+ Without session cache: request 51 → up to 50 Haiku calls
366
+ With session cache: request 51 → 1 Haiku call (only the new block)
367
+ ```
368
+
369
+ In a 100-request session with 40 tool results: ~4,000 Haiku calls → ~200.
370
+
371
+ ### KV cache warming
372
+
373
+ Claude charges 90% less for tokens already in its prefix cache. The cache only activates when the message prefix is byte-for-byte identical between requests. Standard compression breaks this — each call might produce different bytes, invalidating the cache.
374
+
375
+ Squeezr fixes this by assigning compressed blocks a deterministic MD5-based ID. Identical content always produces the same `[squeezr:id -ratio%]` string. Unchanged blocks produce identical bytes across requests, keeping the prefix stable.
376
+
377
+ ```
378
+ Without KV warming: request N+1 → new compressed bytes → cache miss on all subsequent tokens
379
+ With KV warming: request N+1 → same IDs for unchanged blocks → cache hit on entire history
380
+ → pay 10% of normal price for everything already seen
381
+ ```
382
+
383
+ These two optimisations compound: session cache reduces Haiku calls, KV warming reduces charges on the main model.
384
+
385
+ ### Cross-turn Read deduplication
386
+
387
+ When the model reads the same file multiple times (common in long refactoring sessions), every earlier occurrence is replaced with a reference token:
388
+
389
+ ```
390
+ [same file content as a later read — squeezr_expand(id) to retrieve]
391
+ ```
392
+
393
+ The most recent copy is always kept at full fidelity. The model can call `squeezr_expand(id)` to retrieve any earlier version on demand.
394
+
395
+ ### Adaptive pressure
396
+
397
+ As context fills up, Squeezr gets more aggressive — both in what it compresses and how aggressively the deterministic patterns behave:
398
+
399
+ | Context used | Threshold | git diff context | git log cap | grep cap/file |
400
+ |---|---|---|---|---|
401
+ | < 50% | 1,500 chars | 1 line | 30 commits | 8 matches |
402
+ | 50-75% | 800 chars | 1 line | 20 commits | 6 matches |
403
+ | 75-90% | 400 chars | 1 line | 20 commits | 6 matches |
404
+ | > 90% | 150 chars | **0 lines** | **10 commits** | **4 matches** |
405
+
406
+ ---
407
+
408
+ ## The economics
409
+
410
+ Compression is done by the cheapest model in each ecosystem:
411
+
412
+ | Provider | Compression model | Cost vs main model |
413
+ |---|---|---|
414
+ | Anthropic | Claude Haiku | ~25x cheaper than Sonnet |
415
+ | OpenAI | GPT-4o-mini | ~15x cheaper than GPT-4o |
416
+ | Google | Gemini Flash 8B | ~10x cheaper than Gemini Pro |
417
+ | Ollama | Your configured local model | Free |
418
+
419
+ **Example:** Haiku compresses a 3,000-token tool result to 150 tokens. Cost: ~$0.0001. Saving on every subsequent Sonnet request: ~$0.009. Net savings per compression: ~98%.
420
+
421
+ Typical 2-hour session (50+ tool calls): ~200K tokens without compression → ~80K with Squeezr (-60%). The session cache and KV warming compound this further in long sessions.
422
+
423
+ ---
424
+
425
+ ## Why not just use /compact?
426
+
427
+ `/compact` is a nuclear option: it replaces your entire context with a single lossy summary. You lose granularity and can't go back. Squeezr is surgical — it compresses old, irrelevant content while keeping recent work at full fidelity, with lossless retrieval via `squeezr_expand` for anything that needs to be recovered.
428
+
429
+ ---
430
+
431
+ ## Auto-start
432
+
433
+ The installer configures Squeezr to start automatically on login:
434
+
435
+ | OS | Method |
436
+ |---|---|
437
+ | macOS | launchd (`~/Library/LaunchAgents/com.squeezr.plist`) |
438
+ | Linux | systemd user service (`~/.config/systemd/user/squeezr.service`) |
439
+ | Windows | Task Scheduler (runs at login, restarts on failure) |
440
+
441
+ ---
442
+
443
+ ## Requirements
444
+
445
+ - Node.js 18+
446
+ - Your AI CLI already set up and working — nothing else needed
447
+
448
+ Squeezr works with **any auth method** your CLI uses:
449
+
450
+ | Auth type | Example | Works? |
451
+ |---|---|---|
452
+ | API key | `ANTHROPIC_API_KEY=sk-ant-...` | ✅ Full pipeline |
453
+ | OAuth / subscription | Claude Code via claude.ai plan | ✅ Full pipeline — OAuth token reused for Haiku |
454
+ | Local / no key | Ollama, LM Studio | ✅ Full pipeline — local model for compression |
455
+
456
+ No extra credentials needed. Squeezr extracts and reuses whatever auth is already in your requests.
457
+
458
+ ---
459
+
460
+ ## Endpoints
461
+
462
+ | Endpoint | Description |
463
+ |---|---|
464
+ | `POST /v1/messages` | Anthropic — Claude Code |
465
+ | `POST /v1/chat/completions` | OpenAI / Ollama — Codex, Aider, OpenCode, local CLIs |
466
+ | `POST /v1beta/models/{model}:generateContent` | Google — Gemini CLI |
467
+ | `GET /squeezr/stats` | JSON session stats + cache hit rate + pattern coverage |
468
+ | `GET /squeezr/health` | Health check + version |
469
+ | `GET /squeezr/expand/:id` | Retrieve original content for a compressed block |
470
+ | `* /{path}` | All other endpoints forwarded unmodified to detected upstream |
471
+
472
+ ---
473
+
474
+ ## Changelog
475
+
476
+ See [CHANGELOG.md](CHANGELOG.md).
package/bin/squeezr.js ADDED
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env node
2
+ 'use strict'
3
+
4
+ const { spawn } = require('child_process')
5
+ const http = require('http')
6
+ const path = require('path')
7
+ const fs = require('fs')
8
+
9
+ const ROOT = path.join(__dirname, '..')
10
+ const args = process.argv.slice(2)
11
+ const command = args[0]
12
+
13
+ const HELP = `
14
+ Squeezr v1.8.0 — AI context compressor for Claude Code, Codex, Aider, Gemini CLI and Ollama
15
+
16
+ Usage:
17
+ squeezr Start the proxy (default)
18
+ squeezr start Start the proxy
19
+ squeezr gain Show token savings stats
20
+ squeezr gain --reset Reset saved stats
21
+ squeezr discover Show pattern coverage report (proxy must be running)
22
+ squeezr status Check if proxy is running
23
+ squeezr config Print config file path and current settings
24
+ squeezr help Show this help
25
+
26
+ Set your CLI to use Squeezr:
27
+ Claude Code: ANTHROPIC_BASE_URL=http://localhost:8080
28
+ Codex / Aider: OPENAI_BASE_URL=http://localhost:8080
29
+ Gemini CLI: GEMINI_API_BASE_URL=http://localhost:8080
30
+ Ollama: OPENAI_BASE_URL=http://localhost:8080
31
+ `
32
+
33
+ function runNode(script, extraArgs = []) {
34
+ const distPath = path.join(ROOT, 'dist', script)
35
+ if (!fs.existsSync(distPath)) {
36
+ console.error(`Error: ${distPath} not found. Run 'npm run build' first.`)
37
+ process.exit(1)
38
+ }
39
+ const child = spawn(process.execPath, [distPath, ...extraArgs], {
40
+ stdio: 'inherit',
41
+ cwd: ROOT,
42
+ })
43
+ child.on('exit', code => process.exit(code ?? 0))
44
+ }
45
+
46
+ async function checkStatus() {
47
+ const port = process.env.SQUEEZR_PORT || 8080
48
+ return new Promise(resolve => {
49
+ const req = http.get(`http://localhost:${port}/squeezr/health`, res => {
50
+ let data = ''
51
+ res.on('data', chunk => { data += chunk })
52
+ res.on('end', () => {
53
+ try {
54
+ const json = JSON.parse(data)
55
+ console.log(`Squeezr is running (v${json.version} on port ${port})`)
56
+ } catch {
57
+ console.log(`Squeezr is running on port ${port}`)
58
+ }
59
+ resolve(true)
60
+ })
61
+ })
62
+ req.on('error', () => {
63
+ console.log(`Squeezr is NOT running on port ${port}`)
64
+ console.log('Start it with: squeezr start')
65
+ resolve(false)
66
+ })
67
+ req.setTimeout(2000, () => {
68
+ req.destroy()
69
+ resolve(false)
70
+ })
71
+ })
72
+ }
73
+
74
+ function showConfig() {
75
+ const tomlPath = path.join(ROOT, 'squeezr.toml')
76
+ console.log(`Config file: ${tomlPath}`)
77
+ if (fs.existsSync(tomlPath)) {
78
+ console.log('\nCurrent config:')
79
+ console.log(fs.readFileSync(tomlPath, 'utf-8'))
80
+ } else {
81
+ console.log('No squeezr.toml found. Using defaults.')
82
+ }
83
+ }
84
+
85
+ switch (command) {
86
+ case undefined:
87
+ case 'start':
88
+ runNode('index.js')
89
+ break
90
+
91
+ case 'gain':
92
+ runNode('gain.js', args.slice(1))
93
+ break
94
+
95
+ case 'discover':
96
+ runNode('discover.js', args.slice(1))
97
+ break
98
+
99
+ case 'status':
100
+ checkStatus()
101
+ break
102
+
103
+ case 'config':
104
+ showConfig()
105
+ break
106
+
107
+ case '--help':
108
+ case '-h':
109
+ case 'help':
110
+ console.log(HELP)
111
+ break
112
+
113
+ default:
114
+ console.error(`Unknown command: ${command}`)
115
+ console.log(HELP)
116
+ process.exit(1)
117
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,73 @@
1
+ import { describe, it, expect, beforeEach } from 'vitest';
2
+ import { CompressionCache } from '../cache.js';
3
+ describe('CompressionCache', () => {
4
+ let cache;
5
+ beforeEach(() => {
6
+ // maxEntries=5 for fast LRU testing; file I/O fails silently in test env
7
+ cache = new CompressionCache(5);
8
+ });
9
+ it('returns undefined for a cache miss', () => {
10
+ expect(cache.get('never stored this')).toBeUndefined();
11
+ });
12
+ it('returns the compressed value after set', () => {
13
+ cache.set('original text', 'compressed');
14
+ expect(cache.get('original text')).toBe('compressed');
15
+ });
16
+ it('is keyed by text content, not reference', () => {
17
+ cache.set('hello world', 'hi');
18
+ expect(cache.get('hello' + ' ' + 'world')).toBe('hi');
19
+ });
20
+ it('tracks hit and miss counts', () => {
21
+ cache.set('foo', 'bar');
22
+ cache.get('foo'); // hit
23
+ cache.get('foo'); // hit
24
+ cache.get('miss'); // miss
25
+ const s = cache.stats();
26
+ expect(s.hits).toBe(2);
27
+ expect(s.misses).toBe(1);
28
+ });
29
+ it('calculates hit rate correctly', () => {
30
+ cache.set('a', 'x');
31
+ cache.get('a'); // hit
32
+ cache.get('b'); // miss
33
+ const s = cache.stats();
34
+ expect(s.hit_rate_pct).toBe(50);
35
+ });
36
+ it('hit rate is 0 when no requests', () => {
37
+ expect(cache.stats().hit_rate_pct).toBe(0);
38
+ });
39
+ it('evicts oldest entry when maxEntries is reached', () => {
40
+ cache.set('a', '1');
41
+ cache.set('b', '2');
42
+ cache.set('c', '3');
43
+ cache.set('d', '4');
44
+ cache.set('e', '5');
45
+ // All 5 entries stored
46
+ expect(cache.stats().size).toBe(5);
47
+ // Add one more — oldest ('a') should be evicted
48
+ cache.set('f', '6');
49
+ expect(cache.stats().size).toBe(5);
50
+ expect(cache.get('a')).toBeUndefined();
51
+ expect(cache.get('f')).toBe('6');
52
+ });
53
+ it('reports correct size (relative to initial)', () => {
54
+ // Use a large maxEntries so LRU eviction doesn't interfere
55
+ const bigCache = new CompressionCache(1000);
56
+ const initialSize = bigCache.stats().size;
57
+ bigCache.set('unique-key-x-' + Date.now(), 'y');
58
+ expect(bigCache.stats().size).toBe(initialSize + 1);
59
+ bigCache.set('unique-key-z-' + Date.now(), 'w');
60
+ expect(bigCache.stats().size).toBe(initialSize + 2);
61
+ });
62
+ it('overwrites existing entry', () => {
63
+ cache.set('key', 'first');
64
+ cache.set('key', 'second');
65
+ expect(cache.get('key')).toBe('second');
66
+ });
67
+ it('different texts produce different cache entries', () => {
68
+ cache.set('text1', 'compressed1');
69
+ cache.set('text2', 'compressed2');
70
+ expect(cache.get('text1')).toBe('compressed1');
71
+ expect(cache.get('text2')).toBe('compressed2');
72
+ });
73
+ });
@@ -0,0 +1 @@
1
+ export {};