claude-code-cache-fix 3.8.0 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -0
- package/README.zh.md +691 -159
- package/hooks/README.md +36 -0
- package/hooks/examples/worktree-edit-guard.py +93 -0
- package/package.json +2 -1
- package/proxy/extensions/auto-1m-guard.mjs +117 -0
- package/proxy/extensions/cache-telemetry.mjs +5 -0
- package/tools/MANUAL-COMPACT.md +15 -8
- package/tools/manual-compact.sh +17 -11
- package/tools/quota-statusline.sh +4 -2
package/hooks/README.md
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# cache-fix hook examples
|
|
2
|
+
|
|
3
|
+
Standalone `PreToolUse` / `PostToolUse` / `SessionStart` hook scripts that address specific Claude Code behaviors. These are **examples** — you install them by pointing at them from your own `~/.claude/settings.json` (or per-project `.claude/settings.json`). cache-fix does not register them automatically.
|
|
4
|
+
|
|
5
|
+
Independent of the proxy. Hooks run client-side via CC's hooks contract; they don't touch the API request path.
|
|
6
|
+
|
|
7
|
+
## Available examples
|
|
8
|
+
|
|
9
|
+
| Script | Event | Purpose | Docs |
|
|
10
|
+
|---|---|---|---|
|
|
11
|
+
| `examples/worktree-edit-guard.py` | `PreToolUse` | Block `Edit`/`Write`/`MultiEdit`/`NotebookEdit` calls whose target path falls outside the active git worktree root. Addresses [CC#59628](https://github.com/anthropics/claude-code/issues/59628). | [`docs/hooks/worktree-edit-guard.md`](../docs/hooks/worktree-edit-guard.md) |
|
|
12
|
+
|
|
13
|
+
## Installing a hook
|
|
14
|
+
|
|
15
|
+
Each script's docs page has its own settings.json snippet. The general shape:
|
|
16
|
+
|
|
17
|
+
```jsonc
|
|
18
|
+
{
|
|
19
|
+
"hooks": {
|
|
20
|
+
"<EventName>": [
|
|
21
|
+
{
|
|
22
|
+
"matcher": "<ToolName1>|<ToolName2>",
|
|
23
|
+
"hooks": [
|
|
24
|
+
{ "type": "command", "command": "/abs/path/to/hooks/examples/<script>" }
|
|
25
|
+
]
|
|
26
|
+
}
|
|
27
|
+
]
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
The `command` field must be an absolute path per CC's hooks contract. Make sure the script is executable.
|
|
33
|
+
|
|
34
|
+
## CC hooks reference
|
|
35
|
+
|
|
36
|
+
https://code.claude.com/docs/en/hooks — exit-code semantics, structured output schema, matcher patterns, the full event taxonomy.
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""PreToolUse hook: refuse Edit/Write/MultiEdit/NotebookEdit calls whose
|
|
3
|
+
target path falls outside the active git worktree root.
|
|
4
|
+
|
|
5
|
+
Addresses anthropics/claude-code#59628 (worktree sessions can corrupt the
|
|
6
|
+
parent main checkout). See docs/hooks/worktree-edit-guard.md for install.
|
|
7
|
+
|
|
8
|
+
Exit codes (per CC PreToolUse hook contract):
|
|
9
|
+
0 pass-through (allow)
|
|
10
|
+
2 block (CC feeds stderr back to the agent)
|
|
11
|
+
Posture: environmental failures fail open (exit 0); protocol-shape failures
|
|
12
|
+
(missing expected path field on an in-scope tool) fail closed (exit 2)."""
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
import subprocess
|
|
17
|
+
import sys
|
|
18
|
+
|
|
19
|
+
IN_SCOPE = {"Edit", "Write", "MultiEdit", "NotebookEdit"}
|
|
20
|
+
PATH_FIELD = {"Edit": "file_path", "Write": "file_path",
|
|
21
|
+
"MultiEdit": "file_path", "NotebookEdit": "notebook_path"}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def git(*args, cwd):
|
|
25
|
+
"""Run git; return stripped stdout on success, None on any failure."""
|
|
26
|
+
try:
|
|
27
|
+
r = subprocess.run(("git",) + args, cwd=cwd, timeout=2,
|
|
28
|
+
capture_output=True, text=True, check=False)
|
|
29
|
+
return r.stdout.strip() if r.returncode == 0 else None
|
|
30
|
+
except (subprocess.TimeoutExpired, OSError):
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def worktree_root(cwd):
|
|
35
|
+
"""Return the worktree root if cwd is inside a linked worktree, else None.
|
|
36
|
+
|
|
37
|
+
Detection: realpath-equality of --git-dir and --git-common-dir. They are
|
|
38
|
+
equal in a regular checkout (from any depth) and differ inside a linked
|
|
39
|
+
worktree. Compare realpaths because --git-common-dir returns paths
|
|
40
|
+
relative to cwd, so raw string compare breaks below the repo root."""
|
|
41
|
+
top = git("rev-parse", "--show-toplevel", cwd=cwd)
|
|
42
|
+
gd = git("rev-parse", "--git-dir", cwd=cwd)
|
|
43
|
+
gcd = git("rev-parse", "--git-common-dir", cwd=cwd)
|
|
44
|
+
if not (top and gd and gcd):
|
|
45
|
+
return None
|
|
46
|
+
if os.path.realpath(os.path.join(cwd, gd)) == os.path.realpath(os.path.join(cwd, gcd)):
|
|
47
|
+
return None
|
|
48
|
+
return os.path.realpath(top)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def resolved_target(target):
|
|
52
|
+
"""Realpath the target. If the target exists (including as a broken
|
|
53
|
+
symlink), realpath it directly so a target that IS a symlink resolves
|
|
54
|
+
to its destination (not back to itself). If it doesn't exist, fall
|
|
55
|
+
back to realpath(parent_dir) + basename so a symlinked PARENT still
|
|
56
|
+
gets caught even when the leaf will be created by the tool."""
|
|
57
|
+
if os.path.lexists(target):
|
|
58
|
+
return os.path.realpath(target)
|
|
59
|
+
return os.path.join(os.path.realpath(os.path.dirname(target)),
|
|
60
|
+
os.path.basename(target))
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def main():
|
|
64
|
+
try:
|
|
65
|
+
payload = json.load(sys.stdin)
|
|
66
|
+
except (json.JSONDecodeError, ValueError):
|
|
67
|
+
return 0 # fail-open: malformed input is an environmental fault
|
|
68
|
+
tool = payload.get("tool_name")
|
|
69
|
+
if tool not in IN_SCOPE:
|
|
70
|
+
return 0
|
|
71
|
+
field = PATH_FIELD[tool]
|
|
72
|
+
target = (payload.get("tool_input") or {}).get(field)
|
|
73
|
+
if not isinstance(target, str) or not target:
|
|
74
|
+
sys.stderr.write(f"worktree-edit-guard: refusing {tool} — "
|
|
75
|
+
f"missing tool_input.{field}.\n")
|
|
76
|
+
return 2 # fail-closed: protocol-shape mismatch
|
|
77
|
+
cwd = payload.get("cwd") or os.getcwd()
|
|
78
|
+
root = worktree_root(cwd)
|
|
79
|
+
if root is None:
|
|
80
|
+
return 0 # not in a linked worktree; nothing to enforce
|
|
81
|
+
if not os.path.isabs(target):
|
|
82
|
+
target = os.path.join(cwd, target)
|
|
83
|
+
abs_target = resolved_target(target)
|
|
84
|
+
if abs_target == root or abs_target.startswith(root + os.sep):
|
|
85
|
+
return 0
|
|
86
|
+
sys.stderr.write(f"worktree-edit-guard: refusing {tool} on {abs_target} — "
|
|
87
|
+
f"outside worktree {root}. Use a path inside the worktree, "
|
|
88
|
+
f"or disable this hook in settings.json.\n")
|
|
89
|
+
return 2
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
if __name__ == "__main__":
|
|
93
|
+
sys.exit(main())
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-code-cache-fix",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.9.0",
|
|
4
4
|
"description": "Cache optimization proxy and interceptor for Claude Code. Fixes prompt cache bugs, stabilizes prefix, reduces quota burn.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": {
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
"preload.mjs",
|
|
16
16
|
"postinstall.js",
|
|
17
17
|
"tools/",
|
|
18
|
+
"hooks/",
|
|
18
19
|
"claude-fixed.bat",
|
|
19
20
|
"proxy/",
|
|
20
21
|
"bin/",
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
// auto-1m-guard — detect/warn/strip the 1M-context beta token on outbound
|
|
2
|
+
// requests. Addresses anthropics/claude-code#64919 (VS Code Extension forcing
|
|
3
|
+
// 1M context on Pro Plan).
|
|
4
|
+
//
|
|
5
|
+
// Binary-walk (CC v2.1.148 / v2.1.161 — same code body, names churned):
|
|
6
|
+
// sL→kJ: function strips /\[(1|2)m\]/gi from the model string
|
|
7
|
+
// W2→bZ: gates 1M-beta inclusion on /\[1m\]/i.test(model)
|
|
8
|
+
// xKH→E9H: kill switch keys off CLAUDE_CODE_DISABLE_1M_CONTEXT
|
|
9
|
+
// CC always applies the sanitizer at messages.create call sites:
|
|
10
|
+
// messages.create({...J, model: kJ(J.model)})
|
|
11
|
+
// So req.body.model NEVER carries [1m] on the wire — the proxy-visible
|
|
12
|
+
// signal is the anthropic-beta REQUEST HEADER carrying context-1m-2025-08-07.
|
|
13
|
+
//
|
|
14
|
+
// Three modes (env: CACHE_FIX_AUTO_1M_GUARD):
|
|
15
|
+
// off no-op
|
|
16
|
+
// warn (default) stash _auto1mGuard annotation + stderr line; no mutation
|
|
17
|
+
// strip also remove context-1m-2025-08-07 from the anthropic-beta header
|
|
18
|
+
//
|
|
19
|
+
// Order 520: after ttl-management (500) and before thinking-block-sanitize
|
|
20
|
+
// (550) / session-health (590) / cache-telemetry (600). The stashed flat
|
|
21
|
+
// object at ctx.meta._auto1mGuard is spread top-level into the per-session
|
|
22
|
+
// JSON by cache-telemetry, matching the _sessionHealth / _thinkingSanitize
|
|
23
|
+
// pattern.
|
|
24
|
+
//
|
|
25
|
+
// See docs/directives/proxy-auto-1m-guard.md.
|
|
26
|
+
|
|
27
|
+
const BETA_TOKEN_1M = "context-1m-2025-08-07";
|
|
28
|
+
const HEADER_NAME = "anthropic-beta";
|
|
29
|
+
const ADVICE =
|
|
30
|
+
"Outbound request carries the context-1m-2025-08-07 beta header, which enables 1M context. " +
|
|
31
|
+
"On Pro plans this consumes overage credits immediately. To prevent CC from auto-selecting 1M: " +
|
|
32
|
+
"set CLAUDE_CODE_DISABLE_1M_CONTEXT=1 in your env, or use /model with a non-[1m] model variant " +
|
|
33
|
+
"in-session. Strip mode (CACHE_FIX_AUTO_1M_GUARD=strip) intercepts the header at the proxy.";
|
|
34
|
+
|
|
35
|
+
function modeFromEnv() {
|
|
36
|
+
const v = process.env.CACHE_FIX_AUTO_1M_GUARD;
|
|
37
|
+
if (v === "off" || v === "strip") return v;
|
|
38
|
+
return "warn";
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Case-insensitive read of the anthropic-beta header. Mirrors
|
|
42
|
+
// upstream-change-detection.mjs:200-207. Returns { key, raw } where key is
|
|
43
|
+
// the actual property name found (so the rewrite can replace in-place),
|
|
44
|
+
// or null if absent.
|
|
45
|
+
export function findBetaHeader(headers) {
|
|
46
|
+
if (!headers) return null;
|
|
47
|
+
for (const k of Object.keys(headers)) {
|
|
48
|
+
if (k.toLowerCase() === HEADER_NAME) {
|
|
49
|
+
return { key: k, raw: headers[k] };
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Parse the comma-separated header value into a trimmed token array.
|
|
56
|
+
// Tolerates string or array input.
|
|
57
|
+
export function parseBetaTokens(raw) {
|
|
58
|
+
if (!raw) return [];
|
|
59
|
+
if (Array.isArray(raw)) return raw.map(String).map((s) => s.trim()).filter(Boolean);
|
|
60
|
+
if (typeof raw === "string") return raw.split(",").map((s) => s.trim()).filter(Boolean);
|
|
61
|
+
return [];
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Pure planner: returns { detected, stripped, tokensAfter } given the
|
|
65
|
+
// parsed token array. Strip removes ALL occurrences (defensive against
|
|
66
|
+
// duplicates introduced by intermediaries).
|
|
67
|
+
export function planSanitizeBetaHeader(tokens, mode) {
|
|
68
|
+
const detected = tokens.includes(BETA_TOKEN_1M);
|
|
69
|
+
if (!detected || mode !== "strip") {
|
|
70
|
+
return { detected, stripped: false, tokensAfter: tokens };
|
|
71
|
+
}
|
|
72
|
+
const tokensAfter = tokens.filter((t) => t !== BETA_TOKEN_1M);
|
|
73
|
+
return { detected, stripped: true, tokensAfter };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Rejoin tokens with the CC-canonical ", " separator. Empty array → "".
|
|
77
|
+
export function joinBetaTokens(tokens) {
|
|
78
|
+
return tokens.join(", ");
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export default {
|
|
82
|
+
name: "auto-1m-guard",
|
|
83
|
+
description:
|
|
84
|
+
"Detect (warn) or remove (strip) the context-1m-2025-08-07 token from the outbound anthropic-beta header. " +
|
|
85
|
+
"Addresses CC#64919 (VS Code Extension forcing 1M context on Pro Plan). " +
|
|
86
|
+
"Modes via CACHE_FIX_AUTO_1M_GUARD: off | warn (default) | strip.",
|
|
87
|
+
order: 520,
|
|
88
|
+
|
|
89
|
+
async onRequest(ctx) {
|
|
90
|
+
const mode = modeFromEnv();
|
|
91
|
+
if (mode === "off") return;
|
|
92
|
+
|
|
93
|
+
const found = findBetaHeader(ctx.headers);
|
|
94
|
+
if (!found) return;
|
|
95
|
+
|
|
96
|
+
const tokens = parseBetaTokens(found.raw);
|
|
97
|
+
const plan = planSanitizeBetaHeader(tokens, mode);
|
|
98
|
+
if (!plan.detected) return;
|
|
99
|
+
|
|
100
|
+
if (plan.stripped) {
|
|
101
|
+
ctx.headers[found.key] = joinBetaTokens(plan.tokensAfter);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
ctx.meta._auto1mGuard = {
|
|
105
|
+
auto_1m_detected: true,
|
|
106
|
+
auto_1m_action: plan.stripped ? "stripped" : "warn",
|
|
107
|
+
auto_1m_advice: ADVICE,
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
process.stderr.write(
|
|
111
|
+
`[auto-1m-guard] ${BETA_TOKEN_1M} detected in outbound betas` +
|
|
112
|
+
(plan.stripped ? " — stripped" : "") +
|
|
113
|
+
` — see CACHE_FIX_AUTO_1M_GUARD=strip to intercept. ` +
|
|
114
|
+
`Set CLAUDE_CODE_DISABLE_1M_CONTEXT=1 to prevent CC from sending it.\n`,
|
|
115
|
+
);
|
|
116
|
+
},
|
|
117
|
+
};
|
|
@@ -236,6 +236,11 @@ export default {
|
|
|
236
236
|
// Additive thinking-block-sanitize drop count (order 550, opt-in).
|
|
237
237
|
// Optional — absent unless CACHE_FIX_THINKING_SANITIZE=on.
|
|
238
238
|
...(ctx.meta._thinkingSanitize || {}),
|
|
239
|
+
// Additive auto-1m-guard annotation (order 520). Optional — absent
|
|
240
|
+
// unless the outbound request carried context-1m-2025-08-07 and the
|
|
241
|
+
// mode wasn't off. Keys: auto_1m_detected / auto_1m_action /
|
|
242
|
+
// auto_1m_advice.
|
|
243
|
+
...(ctx.meta._auto1mGuard || {}),
|
|
239
244
|
timestamp,
|
|
240
245
|
session_id: rawSid,
|
|
241
246
|
},
|
package/tools/MANUAL-COMPACT.md
CHANGED
|
@@ -10,10 +10,10 @@ When using the 1M context window hack (`DISABLE_COMPACT=1` + `CLAUDE_CODE_MAX_CO
|
|
|
10
10
|
|
|
11
11
|
1. Extracts conversation turns from the session JSONL transcript
|
|
12
12
|
2. Splits turns into three weighted segments:
|
|
13
|
-
- **Foundational** (first 20%) — truncated to
|
|
14
|
-
- **Working** (middle 40%) — truncated to
|
|
15
|
-
- **Active** (last 40%) — preserved up to
|
|
16
|
-
3. Sends the weighted extract to Claude
|
|
13
|
+
- **Foundational** (first 20%) — truncated to 300 chars each
|
|
14
|
+
- **Working** (middle 40%) — truncated to 1500 chars each
|
|
15
|
+
- **Active** (last 40%) — preserved up to 8000 chars each
|
|
16
|
+
3. Sends the weighted extract to Claude Opus for summarization
|
|
17
17
|
4. Produces a structured summary optimized for agent handoff
|
|
18
18
|
|
|
19
19
|
The weighting ensures recent active work (the part you're most likely to need) gets full detail, while earlier completed work is compressed.
|
|
@@ -142,7 +142,7 @@ Use the user context file to fill known gaps.
|
|
|
142
142
|
|
|
143
143
|
Two costs to account for:
|
|
144
144
|
|
|
145
|
-
1. **Summarization call** — the `claude --print` call through
|
|
145
|
+
1. **Summarization call** — the `claude --print` call through Opus. With the relaxed recent-turn caps the extract is larger (and Opus costs more per token than Sonnet), so expect a few % Q5h rather than ~1-2%. The tradeoff buys markedly higher-fidelity summaries; override with `MANUAL_COMPACT_MODEL=claude-sonnet-4-6` if you need to minimize cost.
|
|
146
146
|
2. **Cold start after /clear** — the first API call rebuilds the full cache from scratch. Real-world example from a 954K-token session:
|
|
147
147
|
|
|
148
148
|
```
|
|
@@ -153,11 +153,18 @@ Second call: cache_read=957,253 cache_creation=5,569 (warm again)
|
|
|
153
153
|
|
|
154
154
|
The cold rebuild consumed ~15% Q5h in one call on our Max 5x account. After that single rebuild, the session is warm again and cache hits resume at 99%+.
|
|
155
155
|
|
|
156
|
-
**Total cost of a manual compact cycle:** ~
|
|
156
|
+
**Total cost of a manual compact cycle:** roughly ~15% cold rebuild plus a few % for the Opus summarization. Compare to hitting the 1M wall and losing the session entirely.
|
|
157
157
|
|
|
158
|
-
###
|
|
158
|
+
### Summarizer model
|
|
159
159
|
|
|
160
|
-
The tool
|
|
160
|
+
The tool defaults to `claude --print --model claude-opus-4-7` for the highest-fidelity summary. Override with the `MANUAL_COMPACT_MODEL` env var — e.g. `MANUAL_COMPACT_MODEL=claude-sonnet-4-6` to minimize Q5h impact, or to point at a different model if Opus is rate-limited or retired.
|
|
161
|
+
|
|
162
|
+
### Troubleshooting: empty summary output
|
|
163
|
+
|
|
164
|
+
If `$OUTPUT` comes back empty, the most likely cause is that the extract exceeded the summarizer's context window — this tool runs near the 1M wall, and the relaxed recent-turn caps (active turns up to 8000 chars) make the extract large on exactly those big sessions. The summarizer call swallows stderr, so an oversized-input rejection surfaces as an empty file rather than a visible error. Fixes, in order of preference:
|
|
165
|
+
|
|
166
|
+
- Use a 1M-window model for the summarization: `MANUAL_COMPACT_MODEL='claude-opus-4-7[1m]' manual-compact.sh ...`
|
|
167
|
+
- Or lower the per-turn caps in the script's extraction block (the `text[:8000]` / `text[:1500]` / `text[:300]` slices).
|
|
161
168
|
|
|
162
169
|
## Why the 1M Hack Disables /compact
|
|
163
170
|
|
package/tools/manual-compact.sh
CHANGED
|
@@ -145,31 +145,33 @@ if total == 0:
|
|
|
145
145
|
sys.exit(1)
|
|
146
146
|
|
|
147
147
|
# Split into three segments with different detail levels:
|
|
148
|
-
# - First 20%: truncate to
|
|
149
|
-
# - Middle 40%: truncate to
|
|
150
|
-
# - Last 40%: full text up to
|
|
148
|
+
# - First 20%: truncate to 300 chars each (foundational context)
|
|
149
|
+
# - Middle 40%: truncate to 1500 chars each (working context)
|
|
150
|
+
# - Last 40%: full text up to 8000 chars each (active work — most important)
|
|
151
|
+
# Recent-turn caps were relaxed (was 200/400/2000) so the summarizer sees the
|
|
152
|
+
# active work in near-full detail; the stronger model (Opus, below) handles it.
|
|
151
153
|
seg1_end = int(total * 0.2)
|
|
152
154
|
seg2_end = int(total * 0.6)
|
|
153
155
|
|
|
154
156
|
with open("$EXTRACT", 'w') as f:
|
|
155
157
|
f.write("=== FOUNDATIONAL CONTEXT (early session) ===\n\n")
|
|
156
158
|
for role, text in conversation[:seg1_end]:
|
|
157
|
-
f.write(f"[{role}]: {text[:
|
|
159
|
+
f.write(f"[{role}]: {text[:300]}\n\n")
|
|
158
160
|
|
|
159
161
|
f.write("\n=== WORKING CONTEXT (mid session) ===\n\n")
|
|
160
162
|
for role, text in conversation[seg1_end:seg2_end]:
|
|
161
|
-
f.write(f"[{role}]: {text[:
|
|
163
|
+
f.write(f"[{role}]: {text[:1500]}\n\n")
|
|
162
164
|
|
|
163
165
|
f.write("\n=== ACTIVE WORK (recent — preserve in full detail) ===\n\n")
|
|
164
166
|
for role, text in conversation[seg2_end:]:
|
|
165
|
-
f.write(f"[{role}]: {text[:
|
|
167
|
+
f.write(f"[{role}]: {text[:8000]}\n\n")
|
|
166
168
|
|
|
167
169
|
import os
|
|
168
170
|
size = os.path.getsize("$EXTRACT")
|
|
169
171
|
print(f"Extracted {total} turns ({size:,} bytes, ~{size//4:,} est. tokens)")
|
|
170
|
-
print(f" Foundational: {seg1_end} turns (truncated to
|
|
171
|
-
print(f" Working: {seg2_end - seg1_end} turns (truncated to
|
|
172
|
-
print(f" Active: {total - seg2_end} turns (up to
|
|
172
|
+
print(f" Foundational: {seg1_end} turns (truncated to 300 chars)")
|
|
173
|
+
print(f" Working: {seg2_end - seg1_end} turns (truncated to 1500 chars)")
|
|
174
|
+
print(f" Active: {total - seg2_end} turns (up to 8000 chars)")
|
|
173
175
|
PYEOF
|
|
174
176
|
|
|
175
177
|
# Build the summarization prompt
|
|
@@ -199,10 +201,14 @@ ADDITIONAL USER CONTEXT TO PRESERVE:
|
|
|
199
201
|
$USER_CONTEXT"
|
|
200
202
|
fi
|
|
201
203
|
|
|
204
|
+
# Summarizer model. Defaults to Opus for highest-fidelity summaries; override
|
|
205
|
+
# with MANUAL_COMPACT_MODEL (e.g. when Opus is rate-limited or retired).
|
|
206
|
+
COMPACT_MODEL="${MANUAL_COMPACT_MODEL:-claude-opus-4-7}"
|
|
207
|
+
|
|
202
208
|
echo ""
|
|
203
|
-
echo "Sending to Claude for summarization..."
|
|
209
|
+
echo "Sending to Claude ($COMPACT_MODEL) for summarization..."
|
|
204
210
|
|
|
205
|
-
cat "$EXTRACT" | claude --print --model
|
|
211
|
+
cat "$EXTRACT" | claude --print --model "$COMPACT_MODEL" "$PROMPT" > "$OUTPUT" 2>/dev/null
|
|
206
212
|
|
|
207
213
|
SIZE=$(wc -c < "$OUTPUT")
|
|
208
214
|
echo ""
|
|
@@ -115,11 +115,13 @@ def draw_bar(consumed_pct, elapsed_pct, width=BAR_WIDTH):
|
|
|
115
115
|
# Tick overlays a fill cell when consumed > elapsed, keeping bar width
|
|
116
116
|
# constant — that's what makes the over-pace state legible (┃ inside the
|
|
117
117
|
# filled run) rather than just pushing fill cells around.
|
|
118
|
-
|
|
118
|
+
def to_cells(pct):
|
|
119
|
+
return int(round(max(0, min(100, pct)) / 100 * width))
|
|
120
|
+
fill = to_cells(consumed_pct)
|
|
119
121
|
if elapsed_pct is None:
|
|
120
122
|
tick = -1
|
|
121
123
|
else:
|
|
122
|
-
tick = min(
|
|
124
|
+
tick = min(to_cells(elapsed_pct), width - 1)
|
|
123
125
|
cells = []
|
|
124
126
|
remaining = fill
|
|
125
127
|
for i in range(width):
|