claude-code-cache-fix 3.9.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/proxy/server.mjs CHANGED
@@ -3,9 +3,50 @@ import { pathToFileURL, URL } from "node:url";
3
3
  import config from "./config.mjs";
4
4
  import { forwardRequest } from "./upstream.mjs";
5
5
  import { streamResponse, createTelemetryRecord } from "./stream.mjs";
6
- import { loadExtensions, snapshotRegistry, runOnRequest, runOnResponseStart, runOnResponse } from "./pipeline.mjs";
6
+ import { loadExtensions, snapshotRegistry, runOnRequest, runOnResponseStart, runOnResponse, getFailedExtensions } from "./pipeline.mjs";
7
7
  import { startWatcher } from "./watcher.mjs";
8
8
 
9
+ // Debug logging — writes to ~/.claude/cache-fix-debug.log (override path with
10
+ // CACHE_FIX_DEBUG_LOG). Self-gated on CACHE_FIX_DEBUG=1; a no-op otherwise.
11
+ // Env is read on every call so tests (and operators flipping the flag at
12
+ // runtime) see live behavior — same pattern as image-strip's #98 gate.
13
+ import { appendFileSync, mkdirSync } from "node:fs";
14
+ import { homedir } from "node:os";
15
+ import { dirname, join } from "node:path";
16
+ import util from "node:util";
17
+
18
+ function debugLogPath() {
19
+ return process.env.CACHE_FIX_DEBUG_LOG ||
20
+ join(homedir(), ".claude", "cache-fix-debug.log");
21
+ }
22
+
23
+ // Never spread raw headers to the log: Authorization / x-api-key / cookies
24
+ // must never persist to disk. Same discipline as bootstrap-defense.mjs's
25
+ // audit-record contract — extract named scalars only.
26
+ const SENSITIVE_HEADERS = new Set([
27
+ "authorization",
28
+ "x-api-key",
29
+ "cookie",
30
+ "set-cookie",
31
+ "proxy-authorization",
32
+ ]);
33
+
34
+ function redactHeaders(headers) {
35
+ const out = {};
36
+ for (const [k, v] of Object.entries(headers || {})) {
37
+ out[k] = SENSITIVE_HEADERS.has(k.toLowerCase()) ? "[REDACTED]" : v;
38
+ }
39
+ return out;
40
+ }
41
+
42
+ function debugLog(...args) {
43
+ if (process.env.CACHE_FIX_DEBUG !== "1") return;
44
+ const path = debugLogPath();
45
+ try { mkdirSync(dirname(path), { recursive: true }); } catch {}
46
+ const line = `[${new Date().toISOString()}] ${util.format(...args)}\n`;
47
+ try { appendFileSync(path, line); } catch {}
48
+ }
49
+
9
50
  function collectBody(req) {
10
51
  return new Promise((resolve, reject) => {
11
52
  const chunks = [];
@@ -74,7 +115,13 @@ async function handleMessages(clientReq, clientRes) {
74
115
  });
75
116
 
76
117
  const pre = await preForward(clientReq, clientRes, abortController, extSnapshot, "messages");
77
- if (pre.handled) return;
118
+ if (pre.handled) {
119
+ debugLog("[PROXY] handled internally without upstream request",
120
+ "method:", clientReq.method, "url:", clientReq.url,
121
+ "status:", clientRes.statusCode,
122
+ "response headers:", redactHeaders(clientRes.getHeaders()));
123
+ return;
124
+ }
78
125
  const { parsed, forwardBody, meta } = pre;
79
126
 
80
127
  const requestedModel = parsed?.model || null;
@@ -88,6 +135,7 @@ async function handleMessages(clientReq, clientRes) {
88
135
  abortController.signal
89
136
  ));
90
137
  } catch (err) {
138
+ debugLog("[PROXY] forwardRequest error:", err.message);
91
139
  if (abortController.signal.aborted) return;
92
140
  clientRes.writeHead(502, { "content-type": "application/json" });
93
141
  clientRes.end(JSON.stringify({ error: "upstream_error", message: err.message }));
@@ -99,6 +147,11 @@ async function handleMessages(clientReq, clientRes) {
99
147
  // socket carried the request without each one re-instrumenting upstream.
100
148
  meta._upstreamConnectionId = upstreamConnectionId ?? null;
101
149
 
150
+ debugLog("[UPSTREAM -> PROXY -> CLAUDE] RESPONSE",
151
+ "status:", statusCode, "message:", upstreamRes.statusMessage,
152
+ "upstream headers:", redactHeaders(upstreamRes.headers),
153
+ "proxy headers:", redactHeaders(responseHeaders));
154
+
102
155
  if (extSnapshot.length > 0) {
103
156
  const resCtx = { status: statusCode, headers: responseHeaders, meta };
104
157
  await runOnResponseStart(resCtx, extSnapshot);
@@ -238,6 +291,21 @@ async function handleBootstrap(clientReq, clientRes) {
238
291
  }
239
292
 
240
293
  function handleHealth(_req, res) {
294
+ // Surface extension-load failures so callers (operators, monitoring) see
295
+ // a degraded proxy state instead of a misleading "ok". See #196: a Node
296
+ // ESM cache stale-import race silently broke thinking-block-sanitize v2
297
+ // for 17 hours post-merge before anyone noticed. /health returning "ok"
298
+ // through that window was load-bearing in the silence.
299
+ const failed = getFailedExtensions();
300
+ if (failed.length > 0) {
301
+ res.writeHead(503, { "content-type": "application/json" });
302
+ res.end(JSON.stringify({
303
+ status: "degraded",
304
+ failed_extensions: failed,
305
+ hint: "restart the proxy via your supervisor to recover (in-process reload cannot fix stale ESM cache; #196)",
306
+ }));
307
+ return;
308
+ }
241
309
  res.writeHead(200, { "content-type": "application/json" });
242
310
  res.end(JSON.stringify({ status: "ok" }));
243
311
  }
@@ -259,16 +327,44 @@ function handleNotFound(_req, res) {
259
327
  */
260
328
  export function createProxyServer() {
261
329
  return http.createServer((req, res) => {
262
- if (req.method === "GET" && req.url === "/health") {
263
- return handleHealth(req, res);
264
- }
265
- if (req.method === "POST" && req.url?.startsWith("/v1/messages")) {
266
- return handleMessages(req, res);
267
- }
268
- if (req.url?.startsWith("/api/claude_cli/bootstrap")) {
269
- return handleBootstrap(req, res);
270
- }
271
- handleNotFound(req, res);
330
+ // Async IIFE: handleMessages/handleBootstrap return promises, so we have
331
+ // to await them inside the try/catch — a bare return would let rejections
332
+ // escape to unhandledRejection and (on Node 15+) crash the process.
333
+ (async () => {
334
+ try {
335
+ debugLog("[CLAUDE -> PROXY] REQUEST",
336
+ "method:", req.method, "url:", req.url,
337
+ "headers:", redactHeaders(req.headers));
338
+
339
+ // Wrap res.write/res.end to log chunk-level activity when debug is on.
340
+ // These are sync monkey-patches; the inner debugLog self-gates so the
341
+ // overhead is negligible when CACHE_FIX_DEBUG is unset.
342
+ const originalWrite = res.write;
343
+ const originalEnd = res.end;
344
+ res.write = function (chunk, ...args) {
345
+ debugLog(`[PROXY -> CLAUDE] Send chunk. Size: ${chunk ? chunk.length : 0} bytes`);
346
+ return originalWrite.apply(res, [chunk, ...args]);
347
+ };
348
+ res.end = function (chunk, ...args) {
349
+ debugLog("[PROXY -> CLAUDE] Close connection (res.end)");
350
+ return originalEnd.apply(res, [chunk, ...args]);
351
+ };
352
+
353
+ if (req.method === "GET" && req.url === "/health") return handleHealth(req, res);
354
+ if (req.method === "POST" && req.url?.startsWith("/v1/messages")) return await handleMessages(req, res);
355
+ if (req.url?.startsWith("/api/claude_cli/bootstrap")) return await handleBootstrap(req, res);
356
+ debugLog("ERROR: handler not found for req.url=", req.url, "method=", req.method);
357
+ handleNotFound(req, res);
358
+ } catch (error) {
359
+ debugLog("REQUEST HANDLER ERROR:", error?.message, error?.stack);
360
+ // Generic body: do NOT echo error.message (may include internal paths,
361
+ // upstream URLs, or other server state).
362
+ if (!res.headersSent) {
363
+ res.writeHead(500, { "content-type": "application/json" });
364
+ res.end(JSON.stringify({ error: "internal_proxy_error" }));
365
+ }
366
+ }
367
+ })();
272
368
  });
273
369
  }
274
370
 
@@ -290,7 +386,34 @@ export async function startProxy(options = {}) {
290
386
  const bind = options.bind ?? config.bind;
291
387
  const extensionsDir = options.extensionsDir ?? config.extensionsDir;
292
388
  const extensionsConfig = options.extensionsConfig ?? config.extensionsConfig;
293
- const watch = options.watch !== false;
389
+ // Hot-reload is opt-in as of v4.0.0 (#196). The in-process watcher is the
390
+ // only code path that triggers the Node ESM stale-import race; cold starts
391
+ // have an empty module cache and load extensions cleanly. Strict `=== "on"`
392
+ // means any other value (including "true"/"1"/"yes") is treated as off —
393
+ // the safe default. Note this is the opposite stance from
394
+ // CACHE_FIX_THINKING_SANITIZE (default-on; only literal "off" disables):
395
+ // a hot-reload enable is a footgun, so we require the operator to type the
396
+ // exact opt-in token; a sanitize disable is also a footgun (loses the
397
+ // wedge mitigation), so we require the exact disable token there.
398
+ const hotReloadOptIn = process.env.CACHE_FIX_HOT_RELOAD === "on";
399
+ const watch = options.watch !== false && hotReloadOptIn;
400
+
401
+ // Boot banner on stderr so the EFFECTIVE hot-reload mode is visible in the
402
+ // supervisor's log (journalctl --user / ~/Library/Logs/) without being
403
+ // noisy for monitoring tools that line-grep stderr. Keyed off the effective
404
+ // `watch` value, not the raw envvar, so an embedder calling startProxy({
405
+ // watch: false }) with the envvar set sees "off" (which is the truth — the
406
+ // watcher is suppressed regardless of envvar in that case). Supervisor-
407
+ // neutral wording — no version pin (lives in CHANGELOG/README instead).
408
+ if (watch) {
409
+ process.stderr.write(
410
+ "[cache-fix] hot-reload: on (CACHE_FIX_HOT_RELOAD=on) — long-running processes can hit a Node ESM stale-import race; see #196. Restart the proxy via your supervisor to recover.\n",
411
+ );
412
+ } else {
413
+ process.stderr.write(
414
+ "[cache-fix] hot-reload: off (set CACHE_FIX_HOT_RELOAD=on to enable). Extension changes require a supervisor-level proxy restart.\n",
415
+ );
416
+ }
294
417
 
295
418
  let watcher = null;
296
419
  try {
@@ -183,9 +183,23 @@ function getAgent(isHTTPS, hostname) {
183
183
  return agent;
184
184
  }
185
185
 
186
+ // Build the upstream URL by concatenating the configured base (with any path
187
+ // component preserved) with the client request URL. The historical
188
+ // `new URL(clientReq.url, base)` approach is RFC 3986 relative-resolution,
189
+ // which drops the base's path component when the relative is path-absolute
190
+ // (`/v1/messages`). That breaks corp-proxy / mirror setups where the
191
+ // configured upstream is `https://corp-proxy.example.net/anthropic-mirror`
192
+ // — the request would land at `https://corp-proxy.example.net/v1/messages`
193
+ // with `/anthropic-mirror` silently dropped. See PR #188 / @nisqatsi.
194
+ export function buildUpstreamUrl(base, clientUrl) {
195
+ const trimmedBase = base.endsWith("/") ? base.slice(0, -1) : base;
196
+ const relative = clientUrl.startsWith("/") ? clientUrl : "/" + clientUrl;
197
+ return new URL(trimmedBase + relative);
198
+ }
199
+
186
200
  export function forwardRequest(clientReq, body, signal) {
187
201
  return new Promise((resolve, reject) => {
188
- const upstreamUrl = new URL(clientReq.url, config.upstream);
202
+ const upstreamUrl = buildUpstreamUrl(config.upstream, clientReq.url);
189
203
 
190
204
  const headers = buildUpstreamHeaders(clientReq.headers, upstreamUrl.hostname);
191
205
  if (body) {
@@ -10,7 +10,10 @@ Restart=on-failure
10
10
  RestartSec=5
11
11
  Environment=CACHE_FIX_PROXY_PORT={{PORT}}
12
12
  {{UPSTREAM_LINE}}
13
+ {{CA_FILE_LINE}}
14
+ {{REJECT_UNAUTHORIZED_LINE}}
13
15
  {{DEBUG_LINE}}
16
+ {{HOT_RELOAD_LINE}}
14
17
  WorkingDirectory={{WORKING_DIR}}
15
18
 
16
19
  [Install]
@@ -14,7 +14,10 @@
14
14
  <key>CACHE_FIX_PROXY_PORT</key>
15
15
  <string>{{PORT}}</string>
16
16
  {{UPSTREAM_PLIST}}
17
+ {{CA_FILE_PLIST}}
18
+ {{REJECT_UNAUTHORIZED_PLIST}}
17
19
  {{DEBUG_PLIST}}
20
+ {{HOT_RELOAD_PLIST}}
18
21
  </dict>
19
22
  <key>WorkingDirectory</key>
20
23
  <string>{{WORKING_DIR}}</string>
@@ -56,7 +56,7 @@ Always:
56
56
  ```
57
57
 
58
58
  ```
59
- Project directory: /home/manager/git_repos/kanfei_nowcast_e3b
59
+ Project directory: ~/git_repos/your-project
60
60
  Auto-detected session: db11f377-4ca8-4fc3-9b6d-1069da58c1b2.jsonl
61
61
  Modified: 2026-04-19 13:26:42
62
62
  Size: 4.8M
@@ -155,6 +155,21 @@ The cold rebuild consumed ~15% Q5h in one call on our Max 5x account. After that
155
155
 
156
156
  **Total cost of a manual compact cycle:** roughly ~15% cold rebuild plus a few % for the Opus summarization. Compare to hitting the 1M wall and losing the session entirely.
157
157
 
158
+ ### Stale transcripts get swept (CC's `cleanupPeriodDays`)
159
+
160
+ Heads up if you're treating the on-disk `.jsonl` as a "keep just in case" backup after `/clear`: it isn't durable. Claude Code maintains a transcript-retention setting `cleanupPeriodDays` in `~/.claude/settings.json` (default 30 days). CC runs a transcript cleanup at startup when its `~/.claude/.last-cleanup` sentinel is past the 24h freshness window — when that fires, CC walks every `.jsonl` under `~/.claude/projects/` and deletes any whose `mtime` is past the cutoff, along with the matching `<session-id>/` companion directory next to it. A session you compacted, `/clear`-ed, and stopped retaining ~31 days ago will be gone after the next launch that crosses the cleanup gate, even if you'd planned to grep it for context.
161
+
162
+ Practical implications:
163
+
164
+ - **If you need the post-compact JSONL preserved**, copy it out of `~/.claude/projects/` to a path that isn't subject to CC's cleanup — e.g. `~/snapshots/cc-jsonl-backups/`.
165
+ - **A stopped session held in heal-and-await state is especially vulnerable** — it's idle by definition, so it crosses `cleanupPeriodDays` faster than an actively-used session whose appends keep mtime fresh. If you've stopped a session intending to resume later, either resume promptly, `touch` the `.jsonl` to refresh mtime, or copy it out of the tree.
166
+ - Cleanup keys off `mtime`, and plain reads (`cat`/`grep`/`less`) don't refresh `mtime` — inspection doesn't extend retention.
167
+ - **Raise the retention setting on every machine you use CC on.** Adding `"cleanupPeriodDays": 36500` (~100 years) to `~/.claude/settings.json` defangs the documented cleanup path entirely. There's no documented upper bound; the schema just wants a positive integer. The cleanup logic re-reads the setting at each sweep, so you can land this even on machines where prior sweeps already happened.
168
+
169
+ **If a transcript was already swept** and you need to recover it, [`vsits/restore-claude-history-linux`](https://github.com/vsits/restore-claude-history-linux) (RCB) restores deleted `.jsonl` files from Linux filesystem snapshots — **ZFS**, **Btrfs**, or **Timeshift**. End-to-end-verified on Ubuntu 24.04; a real Btrfs dogfood confirmed a recovered transcript loads and resumes via `/resume` in a fresh CC session. macOS users have the same shape via the upstream [`garrettmoss/restore-claude-history`](https://github.com/garrettmoss/restore-claude-history) (Time Machine). Both tools also remind you to set `cleanupPeriodDays` afterward — otherwise the restored transcript gets re-swept on the next cleanup pass.
170
+
171
+ Tracked upstream as [anthropics/claude-code#62272](https://github.com/anthropics/claude-code/issues/62272) — cache-fix doesn't touch this surface, but documenting it because manual-compact users are the population most likely to bank on the `.jsonl` sticking around.
172
+
158
173
  ### Summarizer model
159
174
 
160
175
  The tool defaults to `claude --print --model claude-opus-4-7` for the highest-fidelity summary. Override with the `MANUAL_COMPACT_MODEL` env var — e.g. `MANUAL_COMPACT_MODEL=claude-sonnet-4-6` to minimize Q5h impact, or to point at a different model if Opus is rate-limited or retired.
@@ -0,0 +1,229 @@
1
+ """Shared cache analysis helpers for hooks and MCP tools.
2
+
3
+ Reference Python helper for consumers that want to read cache-fix's
4
+ ``quota-status`` output and reason about cache-state from a Claude Code
5
+ transcript. Used by host-side hooks (e.g. ``~/.claude/hooks/
6
+ context-advisor-analyze.py``) and MCP tools that need quota-aware
7
+ behavior.
8
+
9
+ Consumer pattern: copy or symlink this file into ``~/.claude/mcp/`` (or
10
+ wherever your hook / tool expects to import from) and ``from cache_analysis
11
+ import read_quota_status, analyze_transcript`` etc. The file ships in the
12
+ cache-fix npm package's ``tools/`` directory; npm consumers can reference
13
+ ``node_modules/claude-code-cache-fix/tools/cache_analysis.py`` directly or
14
+ copy it out for non-npm installs.
15
+
16
+ The ``read_quota_status()`` helper handles both cache-fix v3.5.0+ (proxy
17
+ mode, per-session split at ``~/.claude/quota-status/account.json``) and
18
+ v3.4.x and earlier / preload mode (single global
19
+ ``~/.claude/quota-status.json``). See the README's "Migration:
20
+ v3.4.x → v3.5.0+" section.
21
+ """
22
+
23
+ import json
24
+ import subprocess
25
+ from datetime import datetime, timezone
26
+
27
+ CACHE_TTL_5M = 300 # 5-minute ephemeral TTL
28
+ CACHE_TTL_1H = 3600 # 1-hour extended TTL
29
+ CONTEXT_THRESHOLD = 50_000 # Minimum tokens to recommend compact
30
+ COMPACT_RESULT_ESTIMATE = 12_000 # Estimated tokens after compaction
31
+ CACHE_CREATE_RATE_5M = 3.75 # Opus $/MTok for 5min cache writes
32
+ CACHE_CREATE_RATE_1H = 7.50 # Opus $/MTok for 1h cache writes
33
+
34
+
35
+ def read_tail_lines(filepath, n=300):
36
+ """Read last N lines efficiently using tail."""
37
+ try:
38
+ result = subprocess.run(
39
+ ["tail", "-n", str(n), filepath],
40
+ capture_output=True, text=True, timeout=5,
41
+ )
42
+ return result.stdout.splitlines()
43
+ except Exception:
44
+ return []
45
+
46
+
47
+ def parse_assistant_usage(lines):
48
+ """Extract assistant messages with usage data from transcript lines."""
49
+ messages = []
50
+ for line in lines:
51
+ line = line.strip()
52
+ if not line:
53
+ continue
54
+ try:
55
+ obj = json.loads(line)
56
+ except json.JSONDecodeError:
57
+ continue
58
+ if obj.get("type") != "assistant":
59
+ continue
60
+ msg = obj.get("message", {})
61
+ usage = msg.get("usage")
62
+ ts = obj.get("timestamp")
63
+ if not usage or not ts:
64
+ continue
65
+ cr = usage.get("cache_creation_input_tokens", 0)
66
+ rd = usage.get("cache_read_input_tokens", 0)
67
+ inp = usage.get("input_tokens", 0)
68
+ out = usage.get("output_tokens", 0)
69
+ if cr == 0 and rd == 0 and inp == 0:
70
+ continue
71
+ # Extract TTL tier breakdown if available
72
+ cr_detail = usage.get("cache_creation", {})
73
+ cr_1h = cr_detail.get("ephemeral_1h_input_tokens", 0) if isinstance(cr_detail, dict) else 0
74
+ cr_5m = cr_detail.get("ephemeral_5m_input_tokens", 0) if isinstance(cr_detail, dict) else 0
75
+ messages.append({
76
+ "timestamp": ts,
77
+ "input_tokens": inp,
78
+ "cache_creation": cr,
79
+ "cache_read": rd,
80
+ "output_tokens": out,
81
+ "total_in": cr + rd + inp,
82
+ "cr_1h": cr_1h,
83
+ "cr_5m": cr_5m,
84
+ })
85
+ return messages
86
+
87
+
88
+ def detect_cache_ttl(messages):
89
+ """Detect the effective cache TTL from recent API call usage data.
90
+
91
+ If any recent calls show ephemeral_1h_input_tokens > 0, the account
92
+ is on the 1-hour tier. Otherwise, assume 5-minute ephemeral.
93
+ Returns (ttl_seconds, tier_name).
94
+ """
95
+ recent = messages[-10:] if len(messages) >= 10 else messages
96
+ has_1h = any(m.get("cr_1h", 0) > 0 for m in recent)
97
+ has_5m = any(m.get("cr_5m", 0) > 0 for m in recent)
98
+
99
+ if has_1h:
100
+ return CACHE_TTL_1H, "1h"
101
+ if has_5m:
102
+ return CACHE_TTL_5M, "5m"
103
+ # No cache_creation breakdown available — conservative default
104
+ return CACHE_TTL_5M, "5m (default)"
105
+
106
+
107
+ def estimate_thinking_overhead(messages):
108
+ """Estimate thinking block replay overhead.
109
+
110
+ Thinking blocks from prior turns replay as input tokens. Heuristic:
111
+ cumulative output_tokens approximates thinking content that gets replayed.
112
+ """
113
+ if len(messages) < 2:
114
+ return 0
115
+ return sum(m["output_tokens"] for m in messages[:-1])
116
+
117
+
118
+ def format_tokens(n):
119
+ if n >= 1_000_000:
120
+ return f"{n / 1_000_000:.1f}M"
121
+ if n >= 1_000:
122
+ return f"{n / 1_000:.0f}k"
123
+ return str(n)
124
+
125
+
126
+ def format_duration(seconds):
127
+ if seconds >= 3600:
128
+ return f"{seconds / 3600:.1f}h"
129
+ return f"{int(seconds / 60)}m"
130
+
131
+
132
+ def estimate_savings(total_context, ttl_tier="5m"):
133
+ """Estimate $ savings from compacting before a cold start.
134
+
135
+ Rate depends on the active cache TTL tier — 1h cache writes are 2x the
136
+ 5m rate. Caller should pass the tier returned by detect_cache_ttl().
137
+ Default is the conservative 5m rate for backward compatibility.
138
+ """
139
+ rate = CACHE_CREATE_RATE_1H if ttl_tier.startswith("1h") else CACHE_CREATE_RATE_5M
140
+ cold_cost = (total_context / 1_000_000) * rate
141
+ compact_cost = (COMPACT_RESULT_ESTIMATE / 1_000_000) * rate
142
+ return cold_cost - compact_cost
143
+
144
+
145
+ def read_quota_status():
146
+ """Read current quota utilization from cache-fix's quota-status file.
147
+
148
+ Written by the cache-fix interceptor from API response headers. Path
149
+ depends on cache-fix version:
150
+ - v3.5.0+ (proxy mode, per-session split): ~/.claude/quota-status/account.json
151
+ - v3.4.x and earlier (or preload mode): ~/.claude/quota-status.json (flat)
152
+
153
+ Tries the v3.5.0+ path first, falls back to the legacy flat path. A
154
+ candidate file whose JSON parses but isn't a dict (e.g. a partial write
155
+ that lands as ``[]`` or ``null``) is skipped so the next candidate gets
156
+ a chance — and so callers never receive a non-dict and break on
157
+ ``status.get(...)`` accessors downstream.
158
+
159
+ Returns dict with five_hour/seven_day pct (and other fields written by
160
+ cache-fix's response-header capture), or None if no candidate yields a
161
+ dict-shaped payload.
162
+ """
163
+ import os
164
+ for quota_file in (
165
+ os.path.expanduser("~/.claude/quota-status/account.json"),
166
+ os.path.expanduser("~/.claude/quota-status.json"),
167
+ ):
168
+ try:
169
+ with open(quota_file) as f:
170
+ data = json.load(f)
171
+ except (OSError, json.JSONDecodeError):
172
+ continue
173
+ if isinstance(data, dict):
174
+ return data
175
+ # Valid JSON but wrong shape — try the next candidate.
176
+ return None
177
+
178
+
179
+ def analyze_transcript(transcript_path):
180
+ """Full analysis of a transcript. Returns a dict with all cache state info.
181
+
182
+ Returns None if analysis can't be performed (no data, etc).
183
+ """
184
+ lines = read_tail_lines(transcript_path, 300)
185
+ if not lines:
186
+ return None
187
+
188
+ messages = parse_assistant_usage(lines)
189
+ if not messages:
190
+ return None
191
+
192
+ last = messages[-1]
193
+ try:
194
+ last_ts = datetime.fromisoformat(last["timestamp"].replace("Z", "+00:00"))
195
+ except (ValueError, KeyError):
196
+ return None
197
+
198
+ now = datetime.now(timezone.utc)
199
+ gap_seconds = (now - last_ts).total_seconds()
200
+
201
+ context_tokens = last["total_in"]
202
+ thinking_overhead = estimate_thinking_overhead(messages)
203
+ total_with_thinking = context_tokens + thinking_overhead
204
+
205
+ ttl_seconds, ttl_tier = detect_cache_ttl(messages)
206
+ cache_expired = gap_seconds > ttl_seconds
207
+
208
+ # Last few turns' cache efficiency
209
+ recent = messages[-5:] if len(messages) >= 5 else messages
210
+ recent_cr = sum(m["cache_creation"] for m in recent)
211
+ recent_total = sum(m["total_in"] for m in recent)
212
+ cr_pct = (recent_cr / recent_total * 100) if recent_total else 0
213
+
214
+ quota = read_quota_status()
215
+
216
+ return {
217
+ "context_tokens": context_tokens,
218
+ "thinking_overhead": thinking_overhead,
219
+ "total_with_thinking": total_with_thinking,
220
+ "gap_seconds": gap_seconds,
221
+ "cache_expired": cache_expired,
222
+ "ttl_seconds": ttl_seconds,
223
+ "ttl_tier": ttl_tier,
224
+ "last_timestamp": last["timestamp"],
225
+ "num_messages": len(messages),
226
+ "recent_cr_pct": cr_pct,
227
+ "savings": estimate_savings(total_with_thinking, ttl_tier) if cache_expired else 0,
228
+ "quota": quota,
229
+ }