squeezr-ai 1.21.8 → 1.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -0
- package/bin/squeezr.js +42 -0
- package/dist/bypass.d.ts +9 -0
- package/dist/bypass.js +18 -0
- package/dist/circuitBreaker.d.ts +47 -0
- package/dist/circuitBreaker.js +91 -0
- package/dist/compressor.d.ts +3 -0
- package/dist/compressor.js +44 -11
- package/dist/dashboard.d.ts +1 -1
- package/dist/dashboard.js +161 -89
- package/dist/mcp.js +62 -1
- package/dist/server.js +162 -10
- package/dist/stats.d.ts +47 -2
- package/dist/stats.js +76 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -125,6 +125,7 @@ Built-in MCP server (`squeezr-mcp`) that gives any MCP-capable AI CLI real-time
|
|
|
125
125
|
| `squeezr_check_updates` | Check npm for newer Squeezr version |
|
|
126
126
|
| `squeezr_update` | Update to latest version via `npm install -g squeezr-ai@latest` |
|
|
127
127
|
| `squeezr_set_project` | Manually set/clear the current project name (overrides auto-detection) |
|
|
128
|
+
| `squeezr_bypass` | Toggle bypass mode — disable compression instantly without restart (runtime-only) |
|
|
128
129
|
|
|
129
130
|
Every MCP tool response automatically checks for updates and appends a notification banner when a new version is available.
|
|
130
131
|
|
|
@@ -245,12 +246,25 @@ squeezr gain --session # live session savings from the running proxy
|
|
|
245
246
|
squeezr gain --details # all-time stats with per-tool breakdown
|
|
246
247
|
squeezr gain --reset # reset all-time counters
|
|
247
248
|
squeezr discover # detect which AI CLIs are installed
|
|
249
|
+
squeezr bypass # toggle bypass mode (skip compression, keep logging)
|
|
250
|
+
squeezr bypass --on # enable bypass (disable compression)
|
|
251
|
+
squeezr bypass --off # disable bypass (resume compression)
|
|
248
252
|
squeezr mcp install # register MCP server in Claude Code, Cursor, Windsurf, Cline
|
|
249
253
|
squeezr mcp uninstall # remove MCP server registration
|
|
250
254
|
squeezr uninstall # remove Squeezr completely (env vars, CA, auto-start, logs)
|
|
251
255
|
squeezr version # print version
|
|
252
256
|
```
|
|
253
257
|
|
|
258
|
+
## Resilience
|
|
259
|
+
|
|
260
|
+
Squeezr sits in the critical path between your AI CLI and the upstream API. It's designed to never break your workflow:
|
|
261
|
+
|
|
262
|
+
- **Circuit breaker** — If the AI compression backend (Haiku, GPT-4o-mini, etc.) fails 3 times in a row, Squeezr automatically skips AI compression for 60 seconds, then probes recovery. Deterministic compression continues working. Visible in dashboard, `squeezr status`, and MCP.
|
|
263
|
+
- **5-second AI timeout** — Each AI compression call has a hard 5s timeout. If the backend is slow, the original content passes through unmodified.
|
|
264
|
+
- **Bypass mode** — `squeezr bypass` instantly disables all compression without restarting. Requests still pass through and are logged. Toggle via CLI, MCP, dashboard, or REST API.
|
|
265
|
+
- **Expand rate tracking** — Monitors how often the model calls `squeezr_expand` to recover compressed content. High expand rate signals the compression is too aggressive.
|
|
266
|
+
- **Latency tracking** — p50/p95/p99 compression latency visible in dashboard and MCP stats.
|
|
267
|
+
|
|
254
268
|
## Compression backends
|
|
255
269
|
|
|
256
270
|
Squeezr uses cheap/free models for AI compression (the deterministic layer is pure regex, no API calls):
|
package/bin/squeezr.js
CHANGED
|
@@ -203,6 +203,9 @@ Usage:
|
|
|
203
203
|
squeezr mcp uninstall Remove Squeezr MCP registration
|
|
204
204
|
squeezr ports Change HTTP and MITM proxy ports
|
|
205
205
|
squeezr tunnel Expose proxy via Cloudflare Tunnel for Cursor IDE
|
|
206
|
+
squeezr bypass Toggle bypass mode (skip compression, keep logging)
|
|
207
|
+
squeezr bypass --on Enable bypass (disable compression)
|
|
208
|
+
squeezr bypass --off Disable bypass (resume compression)
|
|
206
209
|
squeezr update Kill old processes, install latest from npm, restart
|
|
207
210
|
squeezr uninstall Remove Squeezr completely (env vars, CA, auto-start, logs)
|
|
208
211
|
squeezr version Print version
|
|
@@ -383,6 +386,18 @@ async function checkStatus() {
|
|
|
383
386
|
console.log(` HTTP proxy (Claude/Aider/Gemini): http://localhost:${port}`)
|
|
384
387
|
console.log(` MITM proxy (Codex): http://localhost:${mitmPort}`)
|
|
385
388
|
console.log(` Dashboard: http://localhost:${port}/squeezr/dashboard`)
|
|
389
|
+
if (json.mode) console.log(` Mode: ${json.mode}`)
|
|
390
|
+
if (json.uptime_seconds != null) {
|
|
391
|
+
const s = json.uptime_seconds
|
|
392
|
+
const fmt = s < 60 ? `${s}s` : s < 3600 ? `${Math.floor(s/60)}m ${s%60}s` : `${Math.floor(s/3600)}h ${Math.floor((s%3600)/60)}m`
|
|
393
|
+
console.log(` Uptime: ${fmt}`)
|
|
394
|
+
}
|
|
395
|
+
if (json.bypassed) console.log(` ⚠ Bypass mode is ON (compression disabled)`)
|
|
396
|
+
if (json.circuit_breaker) {
|
|
397
|
+
const cb = json.circuit_breaker
|
|
398
|
+
const icons = { closed: '🟢 OK', open: '🔴 OPEN', 'half-open': '🟡 PROBING' }
|
|
399
|
+
console.log(` Circuit: ${icons[cb.state] || cb.state}${cb.total_trips ? ` (${cb.total_trips} trip${cb.total_trips > 1 ? 's' : ''})` : ''}`)
|
|
400
|
+
}
|
|
386
401
|
} catch {
|
|
387
402
|
console.log(`Squeezr is running on port ${port}`)
|
|
388
403
|
}
|
|
@@ -1532,6 +1547,33 @@ switch (command) {
|
|
|
1532
1547
|
await startTunnel()
|
|
1533
1548
|
break
|
|
1534
1549
|
|
|
1550
|
+
case 'bypass':
|
|
1551
|
+
await (async () => {
|
|
1552
|
+
const port = getPort()
|
|
1553
|
+
const body = args[1] === '--on' ? JSON.stringify({ enabled: true })
|
|
1554
|
+
: args[1] === '--off' ? JSON.stringify({ enabled: false })
|
|
1555
|
+
: '{}'
|
|
1556
|
+
try {
|
|
1557
|
+
const res = await fetch(`http://localhost:${port}/squeezr/bypass`, {
|
|
1558
|
+
method: 'POST',
|
|
1559
|
+
headers: { 'content-type': 'application/json' },
|
|
1560
|
+
body,
|
|
1561
|
+
})
|
|
1562
|
+
const json = await res.json()
|
|
1563
|
+
if (json.bypassed) {
|
|
1564
|
+
console.log('⏸️ Bypass mode ON — compression disabled')
|
|
1565
|
+
console.log(' Requests pass through uncompressed but are still logged.')
|
|
1566
|
+
console.log(' Turn off: squeezr bypass --off')
|
|
1567
|
+
} else {
|
|
1568
|
+
console.log('▶️ Bypass mode OFF — compression active')
|
|
1569
|
+
}
|
|
1570
|
+
} catch {
|
|
1571
|
+
console.log('Squeezr is NOT running')
|
|
1572
|
+
console.log('Start it with: squeezr start')
|
|
1573
|
+
}
|
|
1574
|
+
})()
|
|
1575
|
+
break
|
|
1576
|
+
|
|
1535
1577
|
case 'uninstall':
|
|
1536
1578
|
await uninstall()
|
|
1537
1579
|
break
|
package/dist/bypass.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bypass module — runtime-only compression toggle.
|
|
3
|
+
*
|
|
4
|
+
* When bypass is ON, requests pass through uncompressed but still logged.
|
|
5
|
+
* Resets on process restart. Does not touch config files.
|
|
6
|
+
*/
|
|
7
|
+
export declare function isBypassed(): boolean;
|
|
8
|
+
export declare function setBypassed(val: boolean): void;
|
|
9
|
+
export declare function toggleBypassed(): boolean;
|
package/dist/bypass.js
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bypass module — runtime-only compression toggle.
|
|
3
|
+
*
|
|
4
|
+
* When bypass is ON, requests pass through uncompressed but still logged.
|
|
5
|
+
* Resets on process restart. Does not touch config files.
|
|
6
|
+
*/
|
|
7
|
+
let bypassed = false;
|
|
8
|
+
export function isBypassed() {
|
|
9
|
+
return bypassed;
|
|
10
|
+
}
|
|
11
|
+
export function setBypassed(val) {
|
|
12
|
+
bypassed = val;
|
|
13
|
+
console.log(`[squeezr] Bypass mode ${val ? 'ON — compression disabled' : 'OFF — compression active'}`);
|
|
14
|
+
}
|
|
15
|
+
export function toggleBypassed() {
|
|
16
|
+
setBypassed(!bypassed);
|
|
17
|
+
return bypassed;
|
|
18
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Circuit breaker for AI compression calls.
|
|
3
|
+
*
|
|
4
|
+
* Prevents hammering a down backend (Haiku, GPT-4o-mini, etc.)
|
|
5
|
+
* with repeated failing requests. After N consecutive failures,
|
|
6
|
+
* the circuit opens and all AI compression is skipped for a cooldown
|
|
7
|
+
* period, then a single probe is allowed to test recovery.
|
|
8
|
+
*
|
|
9
|
+
* States:
|
|
10
|
+
* closed → normal operation, AI compression enabled
|
|
11
|
+
* open → backend down, all AI calls skipped (passthrough)
|
|
12
|
+
* half-open → cooldown elapsed, allow one probe call
|
|
13
|
+
*/
|
|
14
|
+
export type CircuitState = 'closed' | 'open' | 'half-open';
|
|
15
|
+
export interface CircuitBreakerConfig {
|
|
16
|
+
failureThreshold: number;
|
|
17
|
+
resetTimeoutMs: number;
|
|
18
|
+
callTimeoutMs: number;
|
|
19
|
+
}
|
|
20
|
+
export interface CircuitSnapshot {
|
|
21
|
+
state: CircuitState;
|
|
22
|
+
consecutive_failures: number;
|
|
23
|
+
last_failure_time: number | null;
|
|
24
|
+
last_success_time: number | null;
|
|
25
|
+
total_trips: number;
|
|
26
|
+
config: CircuitBreakerConfig;
|
|
27
|
+
}
|
|
28
|
+
export declare class CircuitBreaker {
|
|
29
|
+
private state;
|
|
30
|
+
private consecutiveFailures;
|
|
31
|
+
private lastFailureTime;
|
|
32
|
+
private lastSuccessTime;
|
|
33
|
+
private totalTrips;
|
|
34
|
+
private config;
|
|
35
|
+
constructor(config?: Partial<CircuitBreakerConfig>);
|
|
36
|
+
/** Returns current state, transitioning open→half-open if cooldown elapsed. */
|
|
37
|
+
getState(): CircuitState;
|
|
38
|
+
/** Whether the next AI call should be attempted. */
|
|
39
|
+
shouldAllow(): boolean;
|
|
40
|
+
recordSuccess(): void;
|
|
41
|
+
recordFailure(): void;
|
|
42
|
+
/** Wraps an async AI call with timeout and circuit logic. */
|
|
43
|
+
call<T>(fn: () => Promise<T>): Promise<T>;
|
|
44
|
+
snapshot(): CircuitSnapshot;
|
|
45
|
+
}
|
|
46
|
+
/** Singleton circuit breaker for all AI compression backends. */
|
|
47
|
+
export declare const circuitBreaker: CircuitBreaker;
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Circuit breaker for AI compression calls.
|
|
3
|
+
*
|
|
4
|
+
* Prevents hammering a down backend (Haiku, GPT-4o-mini, etc.)
|
|
5
|
+
* with repeated failing requests. After N consecutive failures,
|
|
6
|
+
* the circuit opens and all AI compression is skipped for a cooldown
|
|
7
|
+
* period, then a single probe is allowed to test recovery.
|
|
8
|
+
*
|
|
9
|
+
* States:
|
|
10
|
+
* closed → normal operation, AI compression enabled
|
|
11
|
+
* open → backend down, all AI calls skipped (passthrough)
|
|
12
|
+
* half-open → cooldown elapsed, allow one probe call
|
|
13
|
+
*/
|
|
14
|
+
const DEFAULT_CONFIG = {
|
|
15
|
+
failureThreshold: 3,
|
|
16
|
+
resetTimeoutMs: 60_000,
|
|
17
|
+
callTimeoutMs: 5_000,
|
|
18
|
+
};
|
|
19
|
+
export class CircuitBreaker {
|
|
20
|
+
state = 'closed';
|
|
21
|
+
consecutiveFailures = 0;
|
|
22
|
+
lastFailureTime = null;
|
|
23
|
+
lastSuccessTime = null;
|
|
24
|
+
totalTrips = 0;
|
|
25
|
+
config;
|
|
26
|
+
constructor(config) {
|
|
27
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
28
|
+
}
|
|
29
|
+
/** Returns current state, transitioning open→half-open if cooldown elapsed. */
|
|
30
|
+
getState() {
|
|
31
|
+
if (this.state === 'open' &&
|
|
32
|
+
this.lastFailureTime !== null &&
|
|
33
|
+
Date.now() - this.lastFailureTime >= this.config.resetTimeoutMs) {
|
|
34
|
+
this.state = 'half-open';
|
|
35
|
+
console.log('[squeezr] Circuit breaker → HALF-OPEN (probing)');
|
|
36
|
+
}
|
|
37
|
+
return this.state;
|
|
38
|
+
}
|
|
39
|
+
/** Whether the next AI call should be attempted. */
|
|
40
|
+
shouldAllow() {
|
|
41
|
+
return this.getState() !== 'open';
|
|
42
|
+
}
|
|
43
|
+
recordSuccess() {
|
|
44
|
+
if (this.state === 'half-open') {
|
|
45
|
+
console.log('[squeezr] Circuit breaker → CLOSED (backend recovered)');
|
|
46
|
+
}
|
|
47
|
+
this.consecutiveFailures = 0;
|
|
48
|
+
this.state = 'closed';
|
|
49
|
+
this.lastSuccessTime = Date.now();
|
|
50
|
+
}
|
|
51
|
+
recordFailure() {
|
|
52
|
+
this.consecutiveFailures++;
|
|
53
|
+
this.lastFailureTime = Date.now();
|
|
54
|
+
if (this.consecutiveFailures >= this.config.failureThreshold && this.state !== 'open') {
|
|
55
|
+
this.state = 'open';
|
|
56
|
+
this.totalTrips++;
|
|
57
|
+
console.log(`[squeezr] Circuit breaker → OPEN (${this.consecutiveFailures} consecutive failures, ` +
|
|
58
|
+
`cooldown ${this.config.resetTimeoutMs / 1000}s)`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
/** Wraps an async AI call with timeout and circuit logic. */
|
|
62
|
+
async call(fn) {
|
|
63
|
+
if (!this.shouldAllow()) {
|
|
64
|
+
throw new Error('Circuit breaker is open — AI compression skipped');
|
|
65
|
+
}
|
|
66
|
+
try {
|
|
67
|
+
const result = await Promise.race([
|
|
68
|
+
fn(),
|
|
69
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('AI compression timeout')), this.config.callTimeoutMs)),
|
|
70
|
+
]);
|
|
71
|
+
this.recordSuccess();
|
|
72
|
+
return result;
|
|
73
|
+
}
|
|
74
|
+
catch (err) {
|
|
75
|
+
this.recordFailure();
|
|
76
|
+
throw err;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
snapshot() {
|
|
80
|
+
return {
|
|
81
|
+
state: this.getState(),
|
|
82
|
+
consecutive_failures: this.consecutiveFailures,
|
|
83
|
+
last_failure_time: this.lastFailureTime,
|
|
84
|
+
last_success_time: this.lastSuccessTime,
|
|
85
|
+
total_trips: this.totalTrips,
|
|
86
|
+
config: this.config,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
/** Singleton circuit breaker for all AI compression backends. */
|
|
91
|
+
export const circuitBreaker = new CircuitBreaker();
|
package/dist/compressor.d.ts
CHANGED
|
@@ -15,6 +15,8 @@ export interface Savings {
|
|
|
15
15
|
dedupSavedChars?: number;
|
|
16
16
|
aiSavedChars?: number;
|
|
17
17
|
overheadChars?: number;
|
|
18
|
+
detMs?: number;
|
|
19
|
+
aiMs?: number;
|
|
18
20
|
}
|
|
19
21
|
export declare function getCache(config: Config): CompressionCache;
|
|
20
22
|
interface AnthropicMessage {
|
|
@@ -50,4 +52,5 @@ interface GeminiContent {
|
|
|
50
52
|
}>;
|
|
51
53
|
}
|
|
52
54
|
export declare function compressGeminiContents(contents: GeminiContent[], apiKey: string, config: Config): Promise<[GeminiContent[], Savings]>;
|
|
55
|
+
export declare function emptySavings(dryRun?: boolean, detSavedChars?: number, dedupSavedChars?: number, detMs?: number): Savings;
|
|
53
56
|
export {};
|
package/dist/compressor.js
CHANGED
|
@@ -5,6 +5,7 @@ import { preprocess, preprocessForTool, hitPattern } from './deterministic.js';
|
|
|
5
5
|
import { storeOriginal } from './expand.js';
|
|
6
6
|
import { hashText, getBlock, setBlock } from './sessionCache.js';
|
|
7
7
|
import { effectiveThreshold, effectiveKeepRecent, aiEnabled } from './config.js';
|
|
8
|
+
import { circuitBreaker } from './circuitBreaker.js';
|
|
8
9
|
const COMPRESS_PROMPT = 'You are compressing a coding tool output to save tokens. ' +
|
|
9
10
|
'Extract ONLY what is essential: errors, file paths, function names, ' +
|
|
10
11
|
'test failures, key values, warnings. ' +
|
|
@@ -78,11 +79,14 @@ async function runCompression(items, compressFn, config) {
|
|
|
78
79
|
if (cached)
|
|
79
80
|
return { ...item, original: item.text, result: cached };
|
|
80
81
|
}
|
|
81
|
-
const compressed = await compressFn(preprocessed);
|
|
82
|
+
const compressed = await circuitBreaker.call(() => compressFn(preprocessed));
|
|
82
83
|
if (config.cacheEnabled)
|
|
83
84
|
cache.set(preprocessed, compressed);
|
|
84
85
|
return { ...item, original: item.text, result: compressed };
|
|
85
86
|
}));
|
|
87
|
+
const failures = results.filter(r => r.status === 'rejected').length;
|
|
88
|
+
if (failures > 0)
|
|
89
|
+
console.log(`[squeezr] ${failures} AI compression(s) failed (circuit: ${circuitBreaker.getState()})`);
|
|
86
90
|
return results
|
|
87
91
|
.filter((r) => r.status === 'fulfilled')
|
|
88
92
|
.map((r) => r.value);
|
|
@@ -186,6 +190,7 @@ export async function compressAnthropicMessages(messages, apiKey, config, system
|
|
|
186
190
|
}
|
|
187
191
|
// ── Step 1: Deterministic preprocessing on ALL tool results (turn 1+) ───────
|
|
188
192
|
// Replaces RTK: applied to recent blocks too, no manual `rtk` prefix needed.
|
|
193
|
+
const detT0 = Date.now();
|
|
189
194
|
let detSaved = 0;
|
|
190
195
|
for (const { index, subIndex, text, tool } of allResults) {
|
|
191
196
|
if (dedupedSet.has(`${index}:${subIndex}`))
|
|
@@ -197,6 +202,7 @@ export async function compressAnthropicMessages(messages, apiKey, config, system
|
|
|
197
202
|
detSaved += text.length - det.length;
|
|
198
203
|
}
|
|
199
204
|
}
|
|
205
|
+
const detMs = Date.now() - detT0;
|
|
200
206
|
if (detSaved > 0) {
|
|
201
207
|
const tokens = Math.round(detSaved / 3.5);
|
|
202
208
|
console.log(`[squeezr/det] Deterministic: -${detSaved.toLocaleString()} chars (~${tokens} tokens) across ${allResults.length} block(s)`);
|
|
@@ -205,11 +211,16 @@ export async function compressAnthropicMessages(messages, apiKey, config, system
|
|
|
205
211
|
const candidates = allResults.slice(0, Math.max(0, allResults.length - effectiveKeepRecent(config)));
|
|
206
212
|
const toProcess = candidates.filter(c => c.text.length >= threshold && !dedupedSet.has(`${c.index}:${c.subIndex}`));
|
|
207
213
|
if (toProcess.length === 0)
|
|
208
|
-
return [msgs, emptySavings(false, detSaved, readDedupSaved)];
|
|
214
|
+
return [msgs, emptySavings(false, detSaved, readDedupSaved, detMs)];
|
|
215
|
+
// Circuit breaker: skip AI compression entirely if backend is down
|
|
216
|
+
if (!circuitBreaker.shouldAllow()) {
|
|
217
|
+
console.log(`[squeezr] Circuit breaker open — skipping AI compression for ${toProcess.length} block(s)`);
|
|
218
|
+
return [msgs, emptySavings(false, detSaved, readDedupSaved, detMs)];
|
|
219
|
+
}
|
|
209
220
|
if (config.dryRun) {
|
|
210
221
|
const potential = toProcess.reduce((sum, c) => sum + c.text.length, 0);
|
|
211
222
|
console.log(`[squeezr dry-run] Would AI-compress ${toProcess.length} block(s) | potential -${potential.toLocaleString()} chars | pressure=${Math.round(pressure * 100)}%`);
|
|
212
|
-
return [msgs, emptySavings(true, detSaved, readDedupSaved)];
|
|
223
|
+
return [msgs, emptySavings(true, detSaved, readDedupSaved, detMs)];
|
|
213
224
|
}
|
|
214
225
|
// Differential: split session cache hits from uncached
|
|
215
226
|
const sessionHits = [];
|
|
@@ -226,9 +237,11 @@ export async function compressAnthropicMessages(messages, apiKey, config, system
|
|
|
226
237
|
toCompress.push(c);
|
|
227
238
|
}
|
|
228
239
|
}
|
|
240
|
+
const aiT0 = Date.now();
|
|
229
241
|
const freshlyCompressed = toCompress.length > 0
|
|
230
242
|
? await runCompression(toCompress, t => compressWithHaiku(t, apiKey), config)
|
|
231
243
|
: [];
|
|
244
|
+
const aiMs = Date.now() - aiT0;
|
|
232
245
|
let totalOriginal = 0;
|
|
233
246
|
let totalCompressed = 0;
|
|
234
247
|
let totalOverhead = 0;
|
|
@@ -266,6 +279,8 @@ export async function compressAnthropicMessages(messages, apiKey, config, system
|
|
|
266
279
|
dedupSavedChars: readDedupSaved,
|
|
267
280
|
aiSavedChars: totalAiSaved,
|
|
268
281
|
overheadChars: totalOverhead,
|
|
282
|
+
detMs,
|
|
283
|
+
aiMs,
|
|
269
284
|
}];
|
|
270
285
|
}
|
|
271
286
|
function extractOpenAIToolResults(messages) {
|
|
@@ -331,6 +346,7 @@ export async function compressOpenAIMessages(messages, apiKey, config, isLocal =
|
|
|
331
346
|
}
|
|
332
347
|
}
|
|
333
348
|
// Step 1: Deterministic preprocessing on ALL tool results
|
|
349
|
+
const oaiDetT0 = Date.now();
|
|
334
350
|
let detSaved = 0;
|
|
335
351
|
for (const { index, text, tool } of allResults) {
|
|
336
352
|
if (dedupedIndices.has(index))
|
|
@@ -341,6 +357,7 @@ export async function compressOpenAIMessages(messages, apiKey, config, isLocal =
|
|
|
341
357
|
detSaved += text.length - det.length;
|
|
342
358
|
}
|
|
343
359
|
}
|
|
360
|
+
const oaiDetMs = Date.now() - oaiDetT0;
|
|
344
361
|
if (detSaved > 0) {
|
|
345
362
|
const tag = isLocal ? 'ollama' : 'codex';
|
|
346
363
|
console.log(`[squeezr/det/${tag}] Deterministic: -${detSaved.toLocaleString()} chars across ${allResults.length} block(s)`);
|
|
@@ -349,11 +366,16 @@ export async function compressOpenAIMessages(messages, apiKey, config, isLocal =
|
|
|
349
366
|
const candidates = allResults.slice(0, Math.max(0, allResults.length - effectiveKeepRecent(config)));
|
|
350
367
|
const toProcess = candidates.filter(c => c.text.length >= threshold && !dedupedIndices.has(c.index));
|
|
351
368
|
if (toProcess.length === 0)
|
|
352
|
-
return [msgs, emptySavings(false, detSaved, readDedupSaved)];
|
|
369
|
+
return [msgs, emptySavings(false, detSaved, readDedupSaved, oaiDetMs)];
|
|
370
|
+
// Circuit breaker: skip AI compression entirely if backend is down
|
|
371
|
+
if (!circuitBreaker.shouldAllow()) {
|
|
372
|
+
console.log(`[squeezr] Circuit breaker open — skipping AI compression for ${toProcess.length} block(s)`);
|
|
373
|
+
return [msgs, emptySavings(false, detSaved, readDedupSaved, oaiDetMs)];
|
|
374
|
+
}
|
|
353
375
|
if (config.dryRun) {
|
|
354
376
|
const tag = isLocal ? 'ollama' : 'codex';
|
|
355
377
|
console.log(`[squeezr dry-run/${tag}] Would AI-compress ${toProcess.length} block(s) | potential -${toProcess.reduce((s, c) => s + c.text.length, 0).toLocaleString()} chars`);
|
|
356
|
-
return [msgs, emptySavings(true, detSaved, readDedupSaved)];
|
|
378
|
+
return [msgs, emptySavings(true, detSaved, readDedupSaved, oaiDetMs)];
|
|
357
379
|
}
|
|
358
380
|
const sessionHits = [];
|
|
359
381
|
const toCompress = [];
|
|
@@ -372,9 +394,11 @@ export async function compressOpenAIMessages(messages, apiKey, config, isLocal =
|
|
|
372
394
|
const compressFn = isLocal
|
|
373
395
|
? t => compressWithOllama(t, config.localUpstreamUrl, config.localCompressionModel)
|
|
374
396
|
: t => compressWithGptMini(t, apiKey);
|
|
397
|
+
const oaiAiT0 = Date.now();
|
|
375
398
|
const freshlyCompressed = toCompress.length > 0
|
|
376
399
|
? await runCompression(toCompress, compressFn, config)
|
|
377
400
|
: [];
|
|
401
|
+
const oaiAiMs = Date.now() - oaiAiT0;
|
|
378
402
|
let totalOriginal = 0, totalCompressed = 0, totalOverhead = 0, totalAiSaved = 0;
|
|
379
403
|
const byTool = [];
|
|
380
404
|
for (const { index, tool, block } of sessionHits) {
|
|
@@ -399,7 +423,7 @@ export async function compressOpenAIMessages(messages, apiKey, config, isLocal =
|
|
|
399
423
|
}
|
|
400
424
|
if (sessionHits.length > 0)
|
|
401
425
|
console.log(`[squeezr] Session cache: ${sessionHits.length} block(s) reused`);
|
|
402
|
-
return [msgs, { compressed: freshlyCompressed.length, savedChars: totalOriginal - totalCompressed, originalChars: totalOriginal, byTool, dryRun: false, sessionCacheHits: sessionHits.length, detSavedChars: detSaved, dedupSavedChars: readDedupSaved, aiSavedChars: totalAiSaved, overheadChars: totalOverhead }];
|
|
426
|
+
return [msgs, { compressed: freshlyCompressed.length, savedChars: totalOriginal - totalCompressed, originalChars: totalOriginal, byTool, dryRun: false, sessionCacheHits: sessionHits.length, detSavedChars: detSaved, dedupSavedChars: readDedupSaved, aiSavedChars: totalAiSaved, overheadChars: totalOverhead, detMs: oaiDetMs, aiMs: oaiAiMs }];
|
|
403
427
|
}
|
|
404
428
|
export async function compressGeminiContents(contents, apiKey, config) {
|
|
405
429
|
if (config.disabled)
|
|
@@ -456,6 +480,7 @@ export async function compressGeminiContents(contents, apiKey, config) {
|
|
|
456
480
|
}
|
|
457
481
|
}
|
|
458
482
|
// Step 1: Deterministic preprocessing on ALL tool results
|
|
483
|
+
const gemDetT0 = Date.now();
|
|
459
484
|
let detSaved = 0;
|
|
460
485
|
for (const { index, subIndex, text, tool } of allResults) {
|
|
461
486
|
if (geminiDedupedSet.has(`${index}:${subIndex}`))
|
|
@@ -466,16 +491,22 @@ export async function compressGeminiContents(contents, apiKey, config) {
|
|
|
466
491
|
detSaved += text.length - det.length;
|
|
467
492
|
}
|
|
468
493
|
}
|
|
494
|
+
const gemDetMs = Date.now() - gemDetT0;
|
|
469
495
|
if (detSaved > 0)
|
|
470
496
|
console.log(`[squeezr/det/gemini] Deterministic: -${detSaved.toLocaleString()} chars across ${allResults.length} block(s)`);
|
|
471
497
|
// Step 2: AI compression for old blocks above threshold
|
|
472
498
|
const candidates = allResults.slice(0, Math.max(0, allResults.length - effectiveKeepRecent(config)))
|
|
473
499
|
.filter(c => c.text.length >= threshold && !geminiDedupedSet.has(`${c.index}:${c.subIndex}`));
|
|
474
500
|
if (candidates.length === 0)
|
|
475
|
-
return [cts, emptySavings(false, detSaved, geminiReadDedupSaved)];
|
|
501
|
+
return [cts, emptySavings(false, detSaved, geminiReadDedupSaved, gemDetMs)];
|
|
502
|
+
// Circuit breaker: skip AI compression entirely if backend is down
|
|
503
|
+
if (!circuitBreaker.shouldAllow()) {
|
|
504
|
+
console.log(`[squeezr] Circuit breaker open — skipping AI compression for ${candidates.length} block(s)`);
|
|
505
|
+
return [cts, emptySavings(false, detSaved, geminiReadDedupSaved, gemDetMs)];
|
|
506
|
+
}
|
|
476
507
|
if (config.dryRun) {
|
|
477
508
|
console.log(`[squeezr dry-run/gemini] Would AI-compress ${candidates.length} block(s) | potential -${candidates.reduce((s, c) => s + c.text.length, 0).toLocaleString()} chars`);
|
|
478
|
-
return [cts, emptySavings(true, detSaved, geminiReadDedupSaved)];
|
|
509
|
+
return [cts, emptySavings(true, detSaved, geminiReadDedupSaved, gemDetMs)];
|
|
479
510
|
}
|
|
480
511
|
const sessionHits = [];
|
|
481
512
|
const toCompress = [];
|
|
@@ -486,9 +517,11 @@ export async function compressGeminiContents(contents, apiKey, config) {
|
|
|
486
517
|
else if (aiEnabled())
|
|
487
518
|
toCompress.push(c);
|
|
488
519
|
}
|
|
520
|
+
const gemAiT0 = Date.now();
|
|
489
521
|
const freshlyCompressed = toCompress.length > 0
|
|
490
522
|
? await runCompression(toCompress, t => compressWithGeminiFlash(t, apiKey), config)
|
|
491
523
|
: [];
|
|
524
|
+
const gemAiMs = Date.now() - gemAiT0;
|
|
492
525
|
let totalOriginal = 0, totalCompressed = 0, totalOverhead = 0, totalAiSaved = 0;
|
|
493
526
|
const byTool = [];
|
|
494
527
|
for (const { index, subIndex, tool, block } of sessionHits) {
|
|
@@ -509,8 +542,8 @@ export async function compressGeminiContents(contents, apiKey, config) {
|
|
|
509
542
|
}
|
|
510
543
|
if (sessionHits.length > 0)
|
|
511
544
|
console.log(`[squeezr/gemini] Session cache: ${sessionHits.length} block(s) reused`);
|
|
512
|
-
return [cts, { compressed: freshlyCompressed.length, savedChars: totalOriginal - totalCompressed, originalChars: totalOriginal, byTool, dryRun: false, sessionCacheHits: sessionHits.length, detSavedChars: detSaved, dedupSavedChars: geminiReadDedupSaved, aiSavedChars: totalAiSaved, overheadChars: totalOverhead }];
|
|
545
|
+
return [cts, { compressed: freshlyCompressed.length, savedChars: totalOriginal - totalCompressed, originalChars: totalOriginal, byTool, dryRun: false, sessionCacheHits: sessionHits.length, detSavedChars: detSaved, dedupSavedChars: geminiReadDedupSaved, aiSavedChars: totalAiSaved, overheadChars: totalOverhead, detMs: gemDetMs, aiMs: gemAiMs }];
|
|
513
546
|
}
|
|
514
|
-
function emptySavings(dryRun = false, detSavedChars = 0, dedupSavedChars = 0) {
|
|
515
|
-
return { compressed: 0, savedChars: 0, originalChars: 0, byTool: [], dryRun, sessionCacheHits: 0, detSavedChars, dedupSavedChars, aiSavedChars: 0, overheadChars: 0 };
|
|
547
|
+
export function emptySavings(dryRun = false, detSavedChars = 0, dedupSavedChars = 0, detMs = 0) {
|
|
548
|
+
return { compressed: 0, savedChars: 0, originalChars: 0, byTool: [], dryRun, sessionCacheHits: 0, detSavedChars, dedupSavedChars, aiSavedChars: 0, overheadChars: 0, detMs, aiMs: 0 };
|
|
516
549
|
}
|