squeezr-ai 1.21.7 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -125,6 +125,7 @@ Built-in MCP server (`squeezr-mcp`) that gives any MCP-capable AI CLI real-time
125
125
  | `squeezr_check_updates` | Check npm for newer Squeezr version |
126
126
  | `squeezr_update` | Update to latest version via `npm install -g squeezr-ai@latest` |
127
127
  | `squeezr_set_project` | Manually set/clear the current project name (overrides auto-detection) |
128
+ | `squeezr_bypass` | Toggle bypass mode — disable compression instantly without restart (runtime-only) |
128
129
 
129
130
  Every MCP tool response automatically checks for updates and appends a notification banner when a new version is available.
130
131
 
@@ -245,12 +246,25 @@ squeezr gain --session # live session savings from the running proxy
245
246
  squeezr gain --details # all-time stats with per-tool breakdown
246
247
  squeezr gain --reset # reset all-time counters
247
248
  squeezr discover # detect which AI CLIs are installed
249
+ squeezr bypass # toggle bypass mode (skip compression, keep logging)
250
+ squeezr bypass --on # enable bypass (disable compression)
251
+ squeezr bypass --off # disable bypass (resume compression)
248
252
  squeezr mcp install # register MCP server in Claude Code, Cursor, Windsurf, Cline
249
253
  squeezr mcp uninstall # remove MCP server registration
250
254
  squeezr uninstall # remove Squeezr completely (env vars, CA, auto-start, logs)
251
255
  squeezr version # print version
252
256
  ```
253
257
 
258
+ ## Resilience
259
+
260
+ Squeezr sits in the critical path between your AI CLI and the upstream API. It's designed to never break your workflow:
261
+
262
+ - **Circuit breaker** — If the AI compression backend (Haiku, GPT-4o-mini, etc.) fails 3 times in a row, Squeezr automatically skips AI compression for 60 seconds, then probes recovery. Deterministic compression continues working. Visible in dashboard, `squeezr status`, and MCP.
263
+ - **5-second AI timeout** — Each AI compression call has a hard 5s timeout. If the backend is slow, the original content passes through unmodified.
264
+ - **Bypass mode** — `squeezr bypass` instantly disables all compression without restarting. Requests still pass through and are logged. Toggle via CLI, MCP, dashboard, or REST API.
265
+ - **Expand rate tracking** — Monitors how often the model calls `squeezr_expand` to recover compressed content. High expand rate signals the compression is too aggressive.
266
+ - **Latency tracking** — p50/p95/p99 compression latency visible in dashboard and MCP stats.
267
+
254
268
  ## Compression backends
255
269
 
256
270
  Squeezr uses cheap/free models for AI compression (the deterministic layer is pure regex, no API calls):
package/bin/squeezr.js CHANGED
@@ -203,6 +203,9 @@ Usage:
203
203
  squeezr mcp uninstall Remove Squeezr MCP registration
204
204
  squeezr ports Change HTTP and MITM proxy ports
205
205
  squeezr tunnel Expose proxy via Cloudflare Tunnel for Cursor IDE
206
+ squeezr bypass Toggle bypass mode (skip compression, keep logging)
207
+ squeezr bypass --on Enable bypass (disable compression)
208
+ squeezr bypass --off Disable bypass (resume compression)
206
209
  squeezr update Kill old processes, install latest from npm, restart
207
210
  squeezr uninstall Remove Squeezr completely (env vars, CA, auto-start, logs)
208
211
  squeezr version Print version
@@ -383,6 +386,18 @@ async function checkStatus() {
383
386
  console.log(` HTTP proxy (Claude/Aider/Gemini): http://localhost:${port}`)
384
387
  console.log(` MITM proxy (Codex): http://localhost:${mitmPort}`)
385
388
  console.log(` Dashboard: http://localhost:${port}/squeezr/dashboard`)
389
+ if (json.mode) console.log(` Mode: ${json.mode}`)
390
+ if (json.uptime_seconds != null) {
391
+ const s = json.uptime_seconds
392
+ const fmt = s < 60 ? `${s}s` : s < 3600 ? `${Math.floor(s/60)}m ${s%60}s` : `${Math.floor(s/3600)}h ${Math.floor((s%3600)/60)}m`
393
+ console.log(` Uptime: ${fmt}`)
394
+ }
395
+ if (json.bypassed) console.log(` ⚠ Bypass mode is ON (compression disabled)`)
396
+ if (json.circuit_breaker) {
397
+ const cb = json.circuit_breaker
398
+ const icons = { closed: '🟢 OK', open: '🔴 OPEN', 'half-open': '🟡 PROBING' }
399
+ console.log(` Circuit: ${icons[cb.state] || cb.state}${cb.total_trips ? ` (${cb.total_trips} trip${cb.total_trips > 1 ? 's' : ''})` : ''}`)
400
+ }
386
401
  } catch {
387
402
  console.log(`Squeezr is running on port ${port}`)
388
403
  }
@@ -1532,6 +1547,33 @@ switch (command) {
1532
1547
  await startTunnel()
1533
1548
  break
1534
1549
 
1550
+ case 'bypass':
1551
+ await (async () => {
1552
+ const port = getPort()
1553
+ const body = args[1] === '--on' ? JSON.stringify({ enabled: true })
1554
+ : args[1] === '--off' ? JSON.stringify({ enabled: false })
1555
+ : '{}'
1556
+ try {
1557
+ const res = await fetch(`http://localhost:${port}/squeezr/bypass`, {
1558
+ method: 'POST',
1559
+ headers: { 'content-type': 'application/json' },
1560
+ body,
1561
+ })
1562
+ const json = await res.json()
1563
+ if (json.bypassed) {
1564
+ console.log('⏸️ Bypass mode ON — compression disabled')
1565
+ console.log(' Requests pass through uncompressed but are still logged.')
1566
+ console.log(' Turn off: squeezr bypass --off')
1567
+ } else {
1568
+ console.log('▶️ Bypass mode OFF — compression active')
1569
+ }
1570
+ } catch {
1571
+ console.log('Squeezr is NOT running')
1572
+ console.log('Start it with: squeezr start')
1573
+ }
1574
+ })()
1575
+ break
1576
+
1535
1577
  case 'uninstall':
1536
1578
  await uninstall()
1537
1579
  break
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Bypass module — runtime-only compression toggle.
3
+ *
4
+ * When bypass is ON, requests pass through uncompressed but still logged.
5
+ * Resets on process restart. Does not touch config files.
6
+ */
7
+ export declare function isBypassed(): boolean;
8
+ export declare function setBypassed(val: boolean): void;
9
+ export declare function toggleBypassed(): boolean;
package/dist/bypass.js ADDED
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Bypass module — runtime-only compression toggle.
3
+ *
4
+ * When bypass is ON, requests pass through uncompressed but still logged.
5
+ * Resets on process restart. Does not touch config files.
6
+ */
7
+ let bypassed = false;
8
+ export function isBypassed() {
9
+ return bypassed;
10
+ }
11
+ export function setBypassed(val) {
12
+ bypassed = val;
13
+ console.log(`[squeezr] Bypass mode ${val ? 'ON — compression disabled' : 'OFF — compression active'}`);
14
+ }
15
+ export function toggleBypassed() {
16
+ setBypassed(!bypassed);
17
+ return bypassed;
18
+ }
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Circuit breaker for AI compression calls.
3
+ *
4
+ * Prevents hammering a down backend (Haiku, GPT-4o-mini, etc.)
5
+ * with repeated failing requests. After N consecutive failures,
6
+ * the circuit opens and all AI compression is skipped for a cooldown
7
+ * period, then a single probe is allowed to test recovery.
8
+ *
9
+ * States:
10
+ * closed → normal operation, AI compression enabled
11
+ * open → backend down, all AI calls skipped (passthrough)
12
+ * half-open → cooldown elapsed, allow one probe call
13
+ */
14
+ export type CircuitState = 'closed' | 'open' | 'half-open';
15
+ export interface CircuitBreakerConfig {
16
+ failureThreshold: number;
17
+ resetTimeoutMs: number;
18
+ callTimeoutMs: number;
19
+ }
20
+ export interface CircuitSnapshot {
21
+ state: CircuitState;
22
+ consecutive_failures: number;
23
+ last_failure_time: number | null;
24
+ last_success_time: number | null;
25
+ total_trips: number;
26
+ config: CircuitBreakerConfig;
27
+ }
28
+ export declare class CircuitBreaker {
29
+ private state;
30
+ private consecutiveFailures;
31
+ private lastFailureTime;
32
+ private lastSuccessTime;
33
+ private totalTrips;
34
+ private config;
35
+ constructor(config?: Partial<CircuitBreakerConfig>);
36
+ /** Returns current state, transitioning open→half-open if cooldown elapsed. */
37
+ getState(): CircuitState;
38
+ /** Whether the next AI call should be attempted. */
39
+ shouldAllow(): boolean;
40
+ recordSuccess(): void;
41
+ recordFailure(): void;
42
+ /** Wraps an async AI call with timeout and circuit logic. */
43
+ call<T>(fn: () => Promise<T>): Promise<T>;
44
+ snapshot(): CircuitSnapshot;
45
+ }
46
+ /** Singleton circuit breaker for all AI compression backends. */
47
+ export declare const circuitBreaker: CircuitBreaker;
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Circuit breaker for AI compression calls.
3
+ *
4
+ * Prevents hammering a down backend (Haiku, GPT-4o-mini, etc.)
5
+ * with repeated failing requests. After N consecutive failures,
6
+ * the circuit opens and all AI compression is skipped for a cooldown
7
+ * period, then a single probe is allowed to test recovery.
8
+ *
9
+ * States:
10
+ * closed → normal operation, AI compression enabled
11
+ * open → backend down, all AI calls skipped (passthrough)
12
+ * half-open → cooldown elapsed, allow one probe call
13
+ */
14
+ const DEFAULT_CONFIG = {
15
+ failureThreshold: 3,
16
+ resetTimeoutMs: 60_000,
17
+ callTimeoutMs: 5_000,
18
+ };
19
+ export class CircuitBreaker {
20
+ state = 'closed';
21
+ consecutiveFailures = 0;
22
+ lastFailureTime = null;
23
+ lastSuccessTime = null;
24
+ totalTrips = 0;
25
+ config;
26
+ constructor(config) {
27
+ this.config = { ...DEFAULT_CONFIG, ...config };
28
+ }
29
+ /** Returns current state, transitioning open→half-open if cooldown elapsed. */
30
+ getState() {
31
+ if (this.state === 'open' &&
32
+ this.lastFailureTime !== null &&
33
+ Date.now() - this.lastFailureTime >= this.config.resetTimeoutMs) {
34
+ this.state = 'half-open';
35
+ console.log('[squeezr] Circuit breaker → HALF-OPEN (probing)');
36
+ }
37
+ return this.state;
38
+ }
39
+ /** Whether the next AI call should be attempted. */
40
+ shouldAllow() {
41
+ return this.getState() !== 'open';
42
+ }
43
+ recordSuccess() {
44
+ if (this.state === 'half-open') {
45
+ console.log('[squeezr] Circuit breaker → CLOSED (backend recovered)');
46
+ }
47
+ this.consecutiveFailures = 0;
48
+ this.state = 'closed';
49
+ this.lastSuccessTime = Date.now();
50
+ }
51
+ recordFailure() {
52
+ this.consecutiveFailures++;
53
+ this.lastFailureTime = Date.now();
54
+ if (this.consecutiveFailures >= this.config.failureThreshold && this.state !== 'open') {
55
+ this.state = 'open';
56
+ this.totalTrips++;
57
+ console.log(`[squeezr] Circuit breaker → OPEN (${this.consecutiveFailures} consecutive failures, ` +
58
+ `cooldown ${this.config.resetTimeoutMs / 1000}s)`);
59
+ }
60
+ }
61
+ /** Wraps an async AI call with timeout and circuit logic. */
62
+ async call(fn) {
63
+ if (!this.shouldAllow()) {
64
+ throw new Error('Circuit breaker is open — AI compression skipped');
65
+ }
66
+ try {
67
+ const result = await Promise.race([
68
+ fn(),
69
+ new Promise((_, reject) => setTimeout(() => reject(new Error('AI compression timeout')), this.config.callTimeoutMs)),
70
+ ]);
71
+ this.recordSuccess();
72
+ return result;
73
+ }
74
+ catch (err) {
75
+ this.recordFailure();
76
+ throw err;
77
+ }
78
+ }
79
+ snapshot() {
80
+ return {
81
+ state: this.getState(),
82
+ consecutive_failures: this.consecutiveFailures,
83
+ last_failure_time: this.lastFailureTime,
84
+ last_success_time: this.lastSuccessTime,
85
+ total_trips: this.totalTrips,
86
+ config: this.config,
87
+ };
88
+ }
89
+ }
90
+ /** Singleton circuit breaker for all AI compression backends. */
91
+ export const circuitBreaker = new CircuitBreaker();
@@ -15,6 +15,8 @@ export interface Savings {
15
15
  dedupSavedChars?: number;
16
16
  aiSavedChars?: number;
17
17
  overheadChars?: number;
18
+ detMs?: number;
19
+ aiMs?: number;
18
20
  }
19
21
  export declare function getCache(config: Config): CompressionCache;
20
22
  interface AnthropicMessage {
@@ -50,4 +52,5 @@ interface GeminiContent {
50
52
  }>;
51
53
  }
52
54
  export declare function compressGeminiContents(contents: GeminiContent[], apiKey: string, config: Config): Promise<[GeminiContent[], Savings]>;
55
+ export declare function emptySavings(dryRun?: boolean, detSavedChars?: number, dedupSavedChars?: number, detMs?: number): Savings;
53
56
  export {};
@@ -5,6 +5,7 @@ import { preprocess, preprocessForTool, hitPattern } from './deterministic.js';
5
5
  import { storeOriginal } from './expand.js';
6
6
  import { hashText, getBlock, setBlock } from './sessionCache.js';
7
7
  import { effectiveThreshold, effectiveKeepRecent, aiEnabled } from './config.js';
8
+ import { circuitBreaker } from './circuitBreaker.js';
8
9
  const COMPRESS_PROMPT = 'You are compressing a coding tool output to save tokens. ' +
9
10
  'Extract ONLY what is essential: errors, file paths, function names, ' +
10
11
  'test failures, key values, warnings. ' +
@@ -78,11 +79,14 @@ async function runCompression(items, compressFn, config) {
78
79
  if (cached)
79
80
  return { ...item, original: item.text, result: cached };
80
81
  }
81
- const compressed = await compressFn(preprocessed);
82
+ const compressed = await circuitBreaker.call(() => compressFn(preprocessed));
82
83
  if (config.cacheEnabled)
83
84
  cache.set(preprocessed, compressed);
84
85
  return { ...item, original: item.text, result: compressed };
85
86
  }));
87
+ const failures = results.filter(r => r.status === 'rejected').length;
88
+ if (failures > 0)
89
+ console.log(`[squeezr] ${failures} AI compression(s) failed (circuit: ${circuitBreaker.getState()})`);
86
90
  return results
87
91
  .filter((r) => r.status === 'fulfilled')
88
92
  .map((r) => r.value);
@@ -186,6 +190,7 @@ export async function compressAnthropicMessages(messages, apiKey, config, system
186
190
  }
187
191
  // ── Step 1: Deterministic preprocessing on ALL tool results (turn 1+) ───────
188
192
  // Replaces RTK: applied to recent blocks too, no manual `rtk` prefix needed.
193
+ const detT0 = Date.now();
189
194
  let detSaved = 0;
190
195
  for (const { index, subIndex, text, tool } of allResults) {
191
196
  if (dedupedSet.has(`${index}:${subIndex}`))
@@ -197,6 +202,7 @@ export async function compressAnthropicMessages(messages, apiKey, config, system
197
202
  detSaved += text.length - det.length;
198
203
  }
199
204
  }
205
+ const detMs = Date.now() - detT0;
200
206
  if (detSaved > 0) {
201
207
  const tokens = Math.round(detSaved / 3.5);
202
208
  console.log(`[squeezr/det] Deterministic: -${detSaved.toLocaleString()} chars (~${tokens} tokens) across ${allResults.length} block(s)`);
@@ -205,11 +211,16 @@ export async function compressAnthropicMessages(messages, apiKey, config, system
205
211
  const candidates = allResults.slice(0, Math.max(0, allResults.length - effectiveKeepRecent(config)));
206
212
  const toProcess = candidates.filter(c => c.text.length >= threshold && !dedupedSet.has(`${c.index}:${c.subIndex}`));
207
213
  if (toProcess.length === 0)
208
- return [msgs, emptySavings(false, detSaved, readDedupSaved)];
214
+ return [msgs, emptySavings(false, detSaved, readDedupSaved, detMs)];
215
+ // Circuit breaker: skip AI compression entirely if backend is down
216
+ if (!circuitBreaker.shouldAllow()) {
217
+ console.log(`[squeezr] Circuit breaker open — skipping AI compression for ${toProcess.length} block(s)`);
218
+ return [msgs, emptySavings(false, detSaved, readDedupSaved, detMs)];
219
+ }
209
220
  if (config.dryRun) {
210
221
  const potential = toProcess.reduce((sum, c) => sum + c.text.length, 0);
211
222
  console.log(`[squeezr dry-run] Would AI-compress ${toProcess.length} block(s) | potential -${potential.toLocaleString()} chars | pressure=${Math.round(pressure * 100)}%`);
212
- return [msgs, emptySavings(true, detSaved, readDedupSaved)];
223
+ return [msgs, emptySavings(true, detSaved, readDedupSaved, detMs)];
213
224
  }
214
225
  // Differential: split session cache hits from uncached
215
226
  const sessionHits = [];
@@ -226,9 +237,11 @@ export async function compressAnthropicMessages(messages, apiKey, config, system
226
237
  toCompress.push(c);
227
238
  }
228
239
  }
240
+ const aiT0 = Date.now();
229
241
  const freshlyCompressed = toCompress.length > 0
230
242
  ? await runCompression(toCompress, t => compressWithHaiku(t, apiKey), config)
231
243
  : [];
244
+ const aiMs = Date.now() - aiT0;
232
245
  let totalOriginal = 0;
233
246
  let totalCompressed = 0;
234
247
  let totalOverhead = 0;
@@ -266,6 +279,8 @@ export async function compressAnthropicMessages(messages, apiKey, config, system
266
279
  dedupSavedChars: readDedupSaved,
267
280
  aiSavedChars: totalAiSaved,
268
281
  overheadChars: totalOverhead,
282
+ detMs,
283
+ aiMs,
269
284
  }];
270
285
  }
271
286
  function extractOpenAIToolResults(messages) {
@@ -331,6 +346,7 @@ export async function compressOpenAIMessages(messages, apiKey, config, isLocal =
331
346
  }
332
347
  }
333
348
  // Step 1: Deterministic preprocessing on ALL tool results
349
+ const oaiDetT0 = Date.now();
334
350
  let detSaved = 0;
335
351
  for (const { index, text, tool } of allResults) {
336
352
  if (dedupedIndices.has(index))
@@ -341,6 +357,7 @@ export async function compressOpenAIMessages(messages, apiKey, config, isLocal =
341
357
  detSaved += text.length - det.length;
342
358
  }
343
359
  }
360
+ const oaiDetMs = Date.now() - oaiDetT0;
344
361
  if (detSaved > 0) {
345
362
  const tag = isLocal ? 'ollama' : 'codex';
346
363
  console.log(`[squeezr/det/${tag}] Deterministic: -${detSaved.toLocaleString()} chars across ${allResults.length} block(s)`);
@@ -349,11 +366,16 @@ export async function compressOpenAIMessages(messages, apiKey, config, isLocal =
349
366
  const candidates = allResults.slice(0, Math.max(0, allResults.length - effectiveKeepRecent(config)));
350
367
  const toProcess = candidates.filter(c => c.text.length >= threshold && !dedupedIndices.has(c.index));
351
368
  if (toProcess.length === 0)
352
- return [msgs, emptySavings(false, detSaved, readDedupSaved)];
369
+ return [msgs, emptySavings(false, detSaved, readDedupSaved, oaiDetMs)];
370
+ // Circuit breaker: skip AI compression entirely if backend is down
371
+ if (!circuitBreaker.shouldAllow()) {
372
+ console.log(`[squeezr] Circuit breaker open — skipping AI compression for ${toProcess.length} block(s)`);
373
+ return [msgs, emptySavings(false, detSaved, readDedupSaved, oaiDetMs)];
374
+ }
353
375
  if (config.dryRun) {
354
376
  const tag = isLocal ? 'ollama' : 'codex';
355
377
  console.log(`[squeezr dry-run/${tag}] Would AI-compress ${toProcess.length} block(s) | potential -${toProcess.reduce((s, c) => s + c.text.length, 0).toLocaleString()} chars`);
356
- return [msgs, emptySavings(true, detSaved, readDedupSaved)];
378
+ return [msgs, emptySavings(true, detSaved, readDedupSaved, oaiDetMs)];
357
379
  }
358
380
  const sessionHits = [];
359
381
  const toCompress = [];
@@ -372,9 +394,11 @@ export async function compressOpenAIMessages(messages, apiKey, config, isLocal =
372
394
  const compressFn = isLocal
373
395
  ? t => compressWithOllama(t, config.localUpstreamUrl, config.localCompressionModel)
374
396
  : t => compressWithGptMini(t, apiKey);
397
+ const oaiAiT0 = Date.now();
375
398
  const freshlyCompressed = toCompress.length > 0
376
399
  ? await runCompression(toCompress, compressFn, config)
377
400
  : [];
401
+ const oaiAiMs = Date.now() - oaiAiT0;
378
402
  let totalOriginal = 0, totalCompressed = 0, totalOverhead = 0, totalAiSaved = 0;
379
403
  const byTool = [];
380
404
  for (const { index, tool, block } of sessionHits) {
@@ -399,7 +423,7 @@ export async function compressOpenAIMessages(messages, apiKey, config, isLocal =
399
423
  }
400
424
  if (sessionHits.length > 0)
401
425
  console.log(`[squeezr] Session cache: ${sessionHits.length} block(s) reused`);
402
- return [msgs, { compressed: freshlyCompressed.length, savedChars: totalOriginal - totalCompressed, originalChars: totalOriginal, byTool, dryRun: false, sessionCacheHits: sessionHits.length, detSavedChars: detSaved, dedupSavedChars: readDedupSaved, aiSavedChars: totalAiSaved, overheadChars: totalOverhead }];
426
+ return [msgs, { compressed: freshlyCompressed.length, savedChars: totalOriginal - totalCompressed, originalChars: totalOriginal, byTool, dryRun: false, sessionCacheHits: sessionHits.length, detSavedChars: detSaved, dedupSavedChars: readDedupSaved, aiSavedChars: totalAiSaved, overheadChars: totalOverhead, detMs: oaiDetMs, aiMs: oaiAiMs }];
403
427
  }
404
428
  export async function compressGeminiContents(contents, apiKey, config) {
405
429
  if (config.disabled)
@@ -456,6 +480,7 @@ export async function compressGeminiContents(contents, apiKey, config) {
456
480
  }
457
481
  }
458
482
  // Step 1: Deterministic preprocessing on ALL tool results
483
+ const gemDetT0 = Date.now();
459
484
  let detSaved = 0;
460
485
  for (const { index, subIndex, text, tool } of allResults) {
461
486
  if (geminiDedupedSet.has(`${index}:${subIndex}`))
@@ -466,16 +491,22 @@ export async function compressGeminiContents(contents, apiKey, config) {
466
491
  detSaved += text.length - det.length;
467
492
  }
468
493
  }
494
+ const gemDetMs = Date.now() - gemDetT0;
469
495
  if (detSaved > 0)
470
496
  console.log(`[squeezr/det/gemini] Deterministic: -${detSaved.toLocaleString()} chars across ${allResults.length} block(s)`);
471
497
  // Step 2: AI compression for old blocks above threshold
472
498
  const candidates = allResults.slice(0, Math.max(0, allResults.length - effectiveKeepRecent(config)))
473
499
  .filter(c => c.text.length >= threshold && !geminiDedupedSet.has(`${c.index}:${c.subIndex}`));
474
500
  if (candidates.length === 0)
475
- return [cts, emptySavings(false, detSaved, geminiReadDedupSaved)];
501
+ return [cts, emptySavings(false, detSaved, geminiReadDedupSaved, gemDetMs)];
502
+ // Circuit breaker: skip AI compression entirely if backend is down
503
+ if (!circuitBreaker.shouldAllow()) {
504
+ console.log(`[squeezr] Circuit breaker open — skipping AI compression for ${candidates.length} block(s)`);
505
+ return [cts, emptySavings(false, detSaved, geminiReadDedupSaved, gemDetMs)];
506
+ }
476
507
  if (config.dryRun) {
477
508
  console.log(`[squeezr dry-run/gemini] Would AI-compress ${candidates.length} block(s) | potential -${candidates.reduce((s, c) => s + c.text.length, 0).toLocaleString()} chars`);
478
- return [cts, emptySavings(true, detSaved, geminiReadDedupSaved)];
509
+ return [cts, emptySavings(true, detSaved, geminiReadDedupSaved, gemDetMs)];
479
510
  }
480
511
  const sessionHits = [];
481
512
  const toCompress = [];
@@ -486,9 +517,11 @@ export async function compressGeminiContents(contents, apiKey, config) {
486
517
  else if (aiEnabled())
487
518
  toCompress.push(c);
488
519
  }
520
+ const gemAiT0 = Date.now();
489
521
  const freshlyCompressed = toCompress.length > 0
490
522
  ? await runCompression(toCompress, t => compressWithGeminiFlash(t, apiKey), config)
491
523
  : [];
524
+ const gemAiMs = Date.now() - gemAiT0;
492
525
  let totalOriginal = 0, totalCompressed = 0, totalOverhead = 0, totalAiSaved = 0;
493
526
  const byTool = [];
494
527
  for (const { index, subIndex, tool, block } of sessionHits) {
@@ -509,8 +542,8 @@ export async function compressGeminiContents(contents, apiKey, config) {
509
542
  }
510
543
  if (sessionHits.length > 0)
511
544
  console.log(`[squeezr/gemini] Session cache: ${sessionHits.length} block(s) reused`);
512
- return [cts, { compressed: freshlyCompressed.length, savedChars: totalOriginal - totalCompressed, originalChars: totalOriginal, byTool, dryRun: false, sessionCacheHits: sessionHits.length, detSavedChars: detSaved, dedupSavedChars: geminiReadDedupSaved, aiSavedChars: totalAiSaved, overheadChars: totalOverhead }];
545
+ return [cts, { compressed: freshlyCompressed.length, savedChars: totalOriginal - totalCompressed, originalChars: totalOriginal, byTool, dryRun: false, sessionCacheHits: sessionHits.length, detSavedChars: detSaved, dedupSavedChars: geminiReadDedupSaved, aiSavedChars: totalAiSaved, overheadChars: totalOverhead, detMs: gemDetMs, aiMs: gemAiMs }];
513
546
  }
514
- function emptySavings(dryRun = false, detSavedChars = 0, dedupSavedChars = 0) {
515
- return { compressed: 0, savedChars: 0, originalChars: 0, byTool: [], dryRun, sessionCacheHits: 0, detSavedChars, dedupSavedChars, aiSavedChars: 0, overheadChars: 0 };
547
+ export function emptySavings(dryRun = false, detSavedChars = 0, dedupSavedChars = 0, detMs = 0) {
548
+ return { compressed: 0, savedChars: 0, originalChars: 0, byTool: [], dryRun, sessionCacheHits: 0, detSavedChars, dedupSavedChars, aiSavedChars: 0, overheadChars: 0, detMs, aiMs: 0 };
516
549
  }