@ictechgy/context-guard 0.4.0 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/README.ko.md +61 -32
  3. package/README.md +90 -22
  4. package/context-guard-kit/README.md +39 -26
  5. package/context-guard-kit/benchmark_runner.py +273 -8
  6. package/context-guard-kit/claude_transcript_cost_audit.py +325 -12
  7. package/context-guard-kit/context_compress.py +153 -1
  8. package/context-guard-kit/context_filter.py +446 -0
  9. package/context-guard-kit/context_guard_cli.py +3 -0
  10. package/context-guard-kit/context_guard_diet.py +677 -2
  11. package/context-guard-kit/context_pack.py +1694 -2
  12. package/context-guard-kit/cost_guard.py +1870 -0
  13. package/context-guard-kit/setup_wizard.py +820 -29
  14. package/context-guard-kit/trim_command_output.py +396 -45
  15. package/docs/benchmark-fixtures/learned-compression.tasks.example.json +24 -0
  16. package/docs/benchmark-fixtures/learned-compression.variants.example.json +10 -0
  17. package/docs/benchmark-fixtures/visual-ocr.tasks.example.json +24 -0
  18. package/docs/benchmark-fixtures/visual-ocr.variants.example.json +10 -0
  19. package/docs/benchmark-workflow-examples.md +40 -0
  20. package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +169 -0
  21. package/docs/benchmark-workflows/measured-token-workflow.example.json +170 -0
  22. package/docs/benchmark-workflows/provider-cache-telemetry.example.json +170 -0
  23. package/docs/cache-diagnostics-schema.md +75 -0
  24. package/docs/cache-diagnostics.example.json +116 -0
  25. package/docs/cache-diagnostics.schema.json +460 -0
  26. package/docs/distribution.md +4 -2
  27. package/docs/experimental-benchmark-fixtures.md +36 -0
  28. package/package.json +11 -2
  29. package/packaging/homebrew/context-guard.rb.template +3 -2
  30. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  31. package/plugins/context-guard/README.ko.md +21 -13
  32. package/plugins/context-guard/README.md +24 -10
  33. package/plugins/context-guard/bin/context-guard +3 -0
  34. package/plugins/context-guard/bin/context-guard-audit +325 -12
  35. package/plugins/context-guard/bin/context-guard-bench +273 -8
  36. package/plugins/context-guard/bin/context-guard-compress +153 -1
  37. package/plugins/context-guard/bin/context-guard-cost +1870 -0
  38. package/plugins/context-guard/bin/context-guard-diet +677 -2
  39. package/plugins/context-guard/bin/context-guard-filter +446 -0
  40. package/plugins/context-guard/bin/context-guard-pack +1694 -2
  41. package/plugins/context-guard/bin/context-guard-setup +820 -29
  42. package/plugins/context-guard/bin/context-guard-trim-output +396 -45
  43. package/plugins/context-guard/brief/README.md +10 -3
  44. package/plugins/context-guard/skills/optimize/SKILL.md +5 -2
  45. package/plugins/context-guard/skills/setup/SKILL.md +3 -1
@@ -0,0 +1,1870 @@
1
+ #!/usr/bin/env python3
2
+ """Passive Anthropic prompt-cache cost guardrails for ContextGuard.
3
+
4
+ This helper is intentionally advisory. It never calls Anthropic, never claims a
5
+ provider cache hit as billing authority, and never stores raw request text. The
6
+ local ledger stores keyed HMAC fingerprints over confirmed provider observations
7
+ so future preflights can warn about likely cache misses without leaking prompts.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import base64
13
+ import binascii
14
+ import hashlib
15
+ import hmac
16
+ import json
17
+ import math
18
+ import os
19
+ from pathlib import Path
20
+ import re
21
+ import secrets
22
+ import shutil
23
+ import stat
24
+ import sys
25
+ import time
26
+ from dataclasses import dataclass
27
+ from typing import Any, NoReturn
28
+
29
+ TOOL_NAME = "context-guard-cost"
30
+ SCHEMA_VERSION = "contextguard.cost.v1"
31
+ DEFAULT_STORE_DIR = ".context-guard/cost-ledger"
32
+ LEDGER_NAME = "ledger.jsonl"
33
+ KEY_NAME = "hmac.key"
34
+ LOCK_OWNER_NAME = "owner.json"
35
+ HMAC_KEY_RE = re.compile(r"^[A-Za-z0-9_-]{43}=$")
36
+ KEY_LOCK_WAIT_ATTEMPTS = 100
37
+ KEY_LOCK_POLL_SECONDS = 0.05
38
+ KEY_LOCK_STALE_SECONDS = 60.0
39
+ KEY_LOCK_METADATA_CLOCK_SKEW_SECONDS = 5.0
40
+ DEFAULT_MAX_BYTES = 10_000_000
41
+ MAX_MAX_BYTES = 100_000_000
42
+ TOKEN_PROXY_CHARS_PER_TOKEN = 4
43
+ DEFAULT_USD_TO_KRW = 1350.0
44
+ DEFAULT_SAFETY_FACTOR = 1.25
45
+ DEFAULT_LARGE_SECTION_BYTES = 64_000
46
+ MAX_LEDGER_ROWS = 20_000
47
+ TTL_SECONDS = {"5m": 5 * 60, "1h": 60 * 60}
48
+ ANTHROPIC_DOCS_URL = "https://docs.anthropic.com/en/build-with-claude/prompt-caching"
49
+ ANTHROPIC_PRICING_URL = "https://platform.claude.com/docs/en/about-claude/pricing"
50
+
51
+ SECRET_RE = re.compile(
52
+ r"(?is)("
53
+ r"-----BEGIN (?:[A-Z0-9 ]*PRIVATE KEY|PGP PRIVATE KEY BLOCK)-----.*?-----END (?:[A-Z0-9 ]*PRIVATE KEY|PGP PRIVATE KEY BLOCK)-----|"
54
+ r"AKIA[0-9A-Z]{16}|"
55
+ r"gh[pousr]_[A-Za-z0-9_]{20,}|"
56
+ r"github_pat_[A-Za-z0-9_]{20,}|"
57
+ r"glpat-[A-Za-z0-9_-]{12,}|"
58
+ r"xox[abprs]-[A-Za-z0-9-]{10,}|"
59
+ r"sk-(?:ant|proj)-[A-Za-z0-9_-]{8,}|"
60
+ r"sk-[A-Za-z0-9][A-Za-z0-9_-]{20,}|"
61
+ r"AIza[0-9A-Za-z_\-]{20,}|"
62
+ r"(?i:Authorization)\s*:\s*(?:Bearer|Basic)\s+[A-Za-z0-9._~+/=-]+|"
63
+ r"[?&](?:X-Amz-Signature|X-Amz-Credential|X-Amz-Security-Token|AWSAccessKeyId|Signature|sig|access_token|refresh_token|id_token|auth|authorization|api[_-]?key|apikey|token|secret|password|client[_-]?secret|private[_-]?key|privatekey|pgp[_-]?private[_-]?key|pgpprivatekey|ssh[_-]?key|sshkey|(?:aws[_-]?)?access[_-]?key(?:[_-]?id)?|awsaccesskeyid)=[^&#\s,}\]]+|"
64
+ r"(?<![A-Za-z0-9])(?:api[_-]?key|apikey|token|secret|password|client[_-]?secret|authorization|credential|signature|sig|private[_-]?key|privatekey|pgp[_-]?private[_-]?key|pgpprivatekey|ssh[_-]?key|sshkey|(?:aws[_-]?)?access[_-]?key(?:[_-]?id)?|awsaccesskeyid)\s*[:=]\s*[^\s,}\]]+"
65
+ r")"
66
+ )
67
+
68
+
69
+ class CostGuardError(ValueError):
70
+ """User-facing deterministic failure."""
71
+
72
+
73
+ def fail(message: str) -> NoReturn:
74
+ raise CostGuardError(message)
75
+
76
+
77
+ def reject_json_constant(value: str) -> NoReturn:
78
+ raise ValueError(f"invalid JSON constant: {value}")
79
+
80
+
81
+ def json_bytes(data: Any) -> str:
82
+ try:
83
+ return json.dumps(data, ensure_ascii=False, sort_keys=True, separators=(",", ":"), allow_nan=False)
84
+ except ValueError as exc:
85
+ fail(f"JSON value contained a non-finite number: {exc}")
86
+
87
+
88
+ def require_json_object(data: Any, label: str) -> dict[str, Any]:
89
+ if not isinstance(data, dict):
90
+ fail(f"{label} must be a JSON object")
91
+ return data
92
+
93
+
94
+ def safe_int(value: Any, default: int = 0) -> int:
95
+ try:
96
+ return int(value)
97
+ except (TypeError, ValueError, OverflowError):
98
+ return default
99
+
100
+
101
+ def finite_float_arg(value: Any, label: str, *, minimum: float = 0.0, allow_zero: bool = True) -> float:
102
+ try:
103
+ number = float(value)
104
+ except (TypeError, ValueError, OverflowError):
105
+ fail(f"{label} must be numeric")
106
+ if not math.isfinite(number):
107
+ fail(f"{label} must be finite")
108
+ if allow_zero:
109
+ if number < minimum:
110
+ fail(f"{label} must be >= {minimum:g}")
111
+ elif number <= minimum:
112
+ fail(f"{label} must be > {minimum:g}")
113
+ return number
114
+
115
+
116
+ def non_negative_int_arg(value: str) -> int:
117
+ try:
118
+ number = int(value)
119
+ except ValueError as exc:
120
+ raise argparse.ArgumentTypeError("must be an integer") from exc
121
+ if number < 0:
122
+ raise argparse.ArgumentTypeError("must be >= 0")
123
+ return number
124
+
125
+
126
+ def byte_len_text(text: str) -> int:
127
+ return len(text.encode("utf-8", errors="replace"))
128
+
129
+
130
+ def token_proxy_text(text: str) -> int:
131
+ if not text:
132
+ return 0
133
+ return max(1, math.ceil(len(text) / TOKEN_PROXY_CHARS_PER_TOKEN))
134
+
135
+
136
+ def token_proxy_obj(data: Any) -> int:
137
+ return token_proxy_text(json_bytes(data))
138
+
139
+
140
+ def read_text_path(path: str, *, max_bytes: int = DEFAULT_MAX_BYTES) -> tuple[str, bool]:
141
+ if max_bytes < 1 or max_bytes > MAX_MAX_BYTES:
142
+ fail(f"max bytes must be between 1 and {MAX_MAX_BYTES}")
143
+ if path == "-":
144
+ raw = sys.stdin.buffer.read(max_bytes + 1)
145
+ else:
146
+ p = Path(path)
147
+ try:
148
+ st = p.stat()
149
+ except OSError as exc:
150
+ fail(f"could not read input file: {exc}")
151
+ if not stat.S_ISREG(st.st_mode):
152
+ fail("input path must be a regular file")
153
+ if st.st_size > max_bytes + 1:
154
+ # Read only the bounded prefix so large requests cannot exhaust memory.
155
+ with p.open("rb") as fh:
156
+ raw = fh.read(max_bytes + 1)
157
+ else:
158
+ raw = p.read_bytes()
159
+ truncated = len(raw) > max_bytes
160
+ if truncated:
161
+ raw = raw[:max_bytes]
162
+ return raw.decode("utf-8", errors="replace"), truncated
163
+
164
+
165
+ def load_json_input(path: str, *, max_bytes: int = DEFAULT_MAX_BYTES) -> tuple[Any, bool]:
166
+ text, truncated = read_text_path(path, max_bytes=max_bytes)
167
+ if truncated:
168
+ fail("JSON input exceeded max bytes")
169
+ try:
170
+ data = json.loads(text, parse_constant=reject_json_constant)
171
+ except json.JSONDecodeError as exc:
172
+ fail(f"invalid JSON input at line {exc.lineno}: {exc.msg}")
173
+ except ValueError as exc:
174
+ fail(f"invalid JSON input: {exc}")
175
+ return data, truncated
176
+
177
+
178
+ def secret_count_in_text(text: str) -> int:
179
+ return sum(1 for _ in SECRET_RE.finditer(text))
180
+
181
+
182
+ def is_provider_cache_control(value: Any) -> bool:
183
+ if not isinstance(value, dict):
184
+ return False
185
+ raw_type = value.get("type")
186
+ raw_ttl = value.get("ttl")
187
+ if raw_type is not None:
188
+ return str(raw_type).strip().lower() == "ephemeral"
189
+ if raw_ttl is None:
190
+ return False
191
+ ttl = str(raw_ttl).strip().lower()
192
+ return ttl in {"5m", "1h", "60m", "hour"}
193
+
194
+
195
+ def clone_jsonish(value: Any) -> Any:
196
+ if isinstance(value, dict):
197
+ return {str(k): clone_jsonish(v) for k, v in value.items()}
198
+ if isinstance(value, list):
199
+ return [clone_jsonish(item) for item in value]
200
+ return value
201
+
202
+
203
+ def strip_cache_control(value: Any) -> Any:
204
+ """Strip a provider cache_control marker from this object only.
205
+
206
+ `cache_control` can also be legitimate user/application data nested inside
207
+ tool schemas. Keep nested values intact unless the caller explicitly selects
208
+ a recognized provider container.
209
+ """
210
+ if isinstance(value, dict):
211
+ return {
212
+ str(k): clone_jsonish(v)
213
+ for k, v in value.items()
214
+ if not (k == "cache_control" and is_provider_cache_control(v))
215
+ }
216
+ if isinstance(value, list):
217
+ return [clone_jsonish(item) for item in value]
218
+ return value
219
+
220
+
221
+ def strip_cache_control_at_path(value: Any, path: tuple[str, ...]) -> Any:
222
+ if not path:
223
+ return strip_cache_control(value)
224
+ if isinstance(value, dict):
225
+ head, *tail = path
226
+ return {
227
+ str(k): strip_cache_control_at_path(v, tuple(tail)) if str(k) == head else clone_jsonish(v)
228
+ for k, v in value.items()
229
+ }
230
+ return clone_jsonish(value)
231
+
232
+
233
+ def strip_known_cache_controls(request: Any) -> Any:
234
+ """Strip provider cache_control markers only from recognized request slots."""
235
+ if not isinstance(request, dict):
236
+ return clone_jsonish(request)
237
+ out = clone_jsonish(request)
238
+
239
+ explicit = out.get("cache_breakpoints")
240
+ if isinstance(explicit, list):
241
+ out["cache_breakpoints"] = [
242
+ strip_cache_control(item) if isinstance(item, dict) else clone_jsonish(item)
243
+ for item in explicit
244
+ ]
245
+
246
+ tools = out.get("tools")
247
+ if isinstance(tools, list):
248
+ out["tools"] = [strip_cache_control(tool) if isinstance(tool, dict) else clone_jsonish(tool) for tool in tools]
249
+
250
+ system = out.get("system")
251
+ if isinstance(system, list):
252
+ out["system"] = [
253
+ strip_cache_control(block) if isinstance(block, dict) else clone_jsonish(block)
254
+ for block in system
255
+ ]
256
+ system_cache = out.get("system_cache")
257
+ if isinstance(system_cache, dict):
258
+ out["system_cache"] = strip_cache_control(system_cache)
259
+
260
+ messages = out.get("messages")
261
+ if isinstance(messages, list):
262
+ stripped_messages = []
263
+ for message in messages:
264
+ if not isinstance(message, dict):
265
+ stripped_messages.append(clone_jsonish(message))
266
+ continue
267
+ stripped_message = strip_cache_control(message)
268
+ content = stripped_message.get("content")
269
+ if isinstance(content, list):
270
+ stripped_message["content"] = [
271
+ strip_cache_control(block) if isinstance(block, dict) else clone_jsonish(block)
272
+ for block in content
273
+ ]
274
+ stripped_messages.append(stripped_message)
275
+ out["messages"] = stripped_messages
276
+
277
+ return out
278
+
279
+
280
+ def cache_ttl(cache_control: Any) -> str:
281
+ if not isinstance(cache_control, dict):
282
+ return "5m"
283
+ ttl = str(cache_control.get("ttl") or "5m").strip().lower()
284
+ if ttl in {"1h", "60m", "hour"}:
285
+ return "1h"
286
+ return "5m"
287
+
288
+
289
+ def find_cache_control(value: Any) -> dict[str, Any] | None:
290
+ if isinstance(value, dict):
291
+ cc = value.get("cache_control")
292
+ if is_provider_cache_control(cc):
293
+ return cc
294
+ return None
295
+
296
+
297
+ def has_unsupported_cache_control(value: Any) -> bool:
298
+ return (
299
+ isinstance(value, dict)
300
+ and "cache_control" in value
301
+ and not is_provider_cache_control(value.get("cache_control"))
302
+ )
303
+
304
+
305
+ @dataclass(frozen=True)
306
+ class CacheBreakpoint:
307
+ index: int
308
+ kind: str
309
+ ttl: str
310
+ prefix: list[Any]
311
+ section: Any
312
+ unsupported: bool = False
313
+
314
+ @property
315
+ def breakpoint_id(self) -> str:
316
+ return f"bp{self.index:03d}"
317
+
318
+
319
+ def _prompt_unit(kind: str, value: Any, *, cache_control_path: tuple[str, ...] = (), **meta: Any) -> dict[str, Any]:
320
+ out = {"kind": kind, "value": strip_cache_control_at_path(value, cache_control_path)}
321
+ for key, val in sorted(meta.items()):
322
+ if val is not None:
323
+ out[key] = val
324
+ return out
325
+
326
+
327
+ def _append_unit(
328
+ units: list[Any],
329
+ breakpoints: list[CacheBreakpoint],
330
+ *,
331
+ kind: str,
332
+ value: Any,
333
+ cc: Any,
334
+ cache_control_path: tuple[str, ...] = (),
335
+ **meta: Any,
336
+ ) -> None:
337
+ unit = _prompt_unit(kind, value, cache_control_path=cache_control_path, **meta)
338
+ units.append(unit)
339
+ if isinstance(cc, dict):
340
+ breakpoints.append(
341
+ CacheBreakpoint(
342
+ index=len(breakpoints) + 1,
343
+ kind=kind,
344
+ ttl=cache_ttl(cc),
345
+ prefix=list(units),
346
+ section=unit,
347
+ )
348
+ )
349
+
350
+
351
+ def extract_cache_breakpoints(request: Any) -> tuple[list[CacheBreakpoint], dict[str, Any]]:
352
+ """Return cache breakpoints as ordered canonical prompt prefixes.
353
+
354
+ Anthropic prompt caching is prefix-oriented. This parser therefore hashes the
355
+ canonical prompt material from the beginning of the request through each
356
+ cache_control breakpoint, rather than hashing arbitrary snippets. The parser
357
+ is intentionally conservative and emits confidence warnings for unrecognized
358
+ cache_control layouts.
359
+ """
360
+ units: list[Any] = []
361
+ breakpoints: list[CacheBreakpoint] = []
362
+ unsupported_cache_controls = 0
363
+
364
+ if not isinstance(request, dict):
365
+ return [], {"request_shape": "unsupported", "unsupported_cache_controls": 0}
366
+
367
+ explicit = request.get("cache_breakpoints")
368
+ if isinstance(explicit, list):
369
+ for item in explicit:
370
+ if not isinstance(item, dict):
371
+ unsupported_cache_controls += 1
372
+ continue
373
+ if "cache_control" in item:
374
+ cc = find_cache_control(item)
375
+ if cc is None:
376
+ unsupported_cache_controls += 1
377
+ else:
378
+ cc = {"type": "ephemeral", "ttl": item.get("ttl", "5m")}
379
+ _append_unit(units, breakpoints, kind=str(item.get("kind") or "explicit"), value=item, cc=cc)
380
+
381
+ tools = request.get("tools")
382
+ if isinstance(tools, list):
383
+ for i, tool in enumerate(tools):
384
+ cc = find_cache_control(tool)
385
+ if has_unsupported_cache_control(tool):
386
+ unsupported_cache_controls += 1
387
+ _append_unit(units, breakpoints, kind="tool", value=tool, cc=cc, index=i)
388
+ elif tools is not None:
389
+ units.append(_prompt_unit("tools", tools))
390
+
391
+ system = request.get("system")
392
+ if isinstance(system, list):
393
+ for i, block in enumerate(system):
394
+ cc = find_cache_control(block)
395
+ if has_unsupported_cache_control(block):
396
+ unsupported_cache_controls += 1
397
+ _append_unit(units, breakpoints, kind="system", value=block, cc=cc, index=i)
398
+ elif system is not None:
399
+ system_cache = request.get("system_cache") or {}
400
+ cc = find_cache_control(system_cache)
401
+ if has_unsupported_cache_control(system_cache):
402
+ unsupported_cache_controls += 1
403
+ _append_unit(units, breakpoints, kind="system", value=system, cc=cc)
404
+
405
+ messages = request.get("messages")
406
+ if isinstance(messages, list):
407
+ for mi, message in enumerate(messages):
408
+ if not isinstance(message, dict):
409
+ _append_unit(units, breakpoints, kind="message", value=message, cc=None, index=mi)
410
+ continue
411
+ role = str(message.get("role") or "unknown")
412
+ content = message.get("content")
413
+ msg_cc = find_cache_control(message)
414
+ if has_unsupported_cache_control(message):
415
+ unsupported_cache_controls += 1
416
+ if isinstance(content, list):
417
+ for ci, block in enumerate(content):
418
+ cc = find_cache_control(block)
419
+ if has_unsupported_cache_control(block):
420
+ unsupported_cache_controls += 1
421
+ _append_unit(
422
+ units,
423
+ breakpoints,
424
+ kind="message_content",
425
+ value={"role": role, "content": block},
426
+ cc=cc,
427
+ cache_control_path=("content",),
428
+ message_index=mi,
429
+ content_index=ci,
430
+ )
431
+ if msg_cc and not any(find_cache_control(block) for block in content if isinstance(block, dict)):
432
+ # Message-level cache_control around a list is less common, but keep a
433
+ # conservative prefix fingerprint over the whole message.
434
+ _append_unit(units, breakpoints, kind="message", value=message, cc=msg_cc, index=mi)
435
+ else:
436
+ _append_unit(units, breakpoints, kind="message", value=message, cc=msg_cc, index=mi)
437
+ elif messages is not None:
438
+ units.append(_prompt_unit("messages", messages))
439
+
440
+ raw = json_bytes(request)
441
+ found_cc = raw.count('"cache_control"')
442
+ metadata = {
443
+ "request_shape": "anthropic_like",
444
+ "prompt_units": len(units),
445
+ "unsupported_cache_controls": unsupported_cache_controls,
446
+ "cache_control_markers": found_cc,
447
+ }
448
+ return breakpoints, metadata
449
+
450
+
451
+ def ensure_private_dir(path: Path) -> None:
452
+ path.mkdir(parents=True, exist_ok=True)
453
+ try:
454
+ os.chmod(path, 0o700)
455
+ except OSError:
456
+ pass
457
+
458
+
459
+ def os_error_detail(exc: OSError) -> str:
460
+ detail = exc.strerror or exc.__class__.__name__
461
+ if exc.errno is not None:
462
+ return f"{detail} (errno {exc.errno})"
463
+ return detail
464
+
465
+
466
+ def lock_guidance() -> str:
467
+ return f"<store-dir>/{KEY_NAME}.lock"
468
+
469
+
470
+ def ensure_hmac_key_private_mode(key_path: Path) -> None:
471
+ try:
472
+ os.chmod(key_path, 0o600)
473
+ except OSError as exc:
474
+ if os.name == "posix":
475
+ fail(f"could not secure local HMAC key file: {os_error_detail(exc)}")
476
+ return
477
+ if os.name == "posix":
478
+ try:
479
+ mode = stat.S_IMODE(key_path.stat().st_mode)
480
+ except OSError as exc:
481
+ fail(f"could not verify local HMAC key file privacy: {os_error_detail(exc)}")
482
+ if mode != 0o600:
483
+ fail("could not verify local HMAC key file privacy: expected mode 0600")
484
+
485
+
486
+ def read_hmac_key(key_path: Path) -> bytes:
487
+ try:
488
+ raw = key_path.read_text(encoding="utf-8")
489
+ except UnicodeError:
490
+ fail("invalid local HMAC key file: expected UTF-8 canonical URL-safe base64 text")
491
+ except OSError as exc:
492
+ fail(f"could not read local HMAC key file: {os_error_detail(exc)}")
493
+ try:
494
+ raw_ascii = raw.encode("ascii")
495
+ except UnicodeEncodeError:
496
+ fail("invalid local HMAC key file: expected ASCII canonical URL-safe base64 text")
497
+ if not HMAC_KEY_RE.fullmatch(raw):
498
+ fail("invalid local HMAC key file: expected canonical URL-safe 32-byte key")
499
+ try:
500
+ key = base64.b64decode(raw_ascii, altchars=b"-_", validate=True)
501
+ except (binascii.Error, ValueError):
502
+ fail("invalid local HMAC key file: invalid canonical URL-safe base64")
503
+ if base64.urlsafe_b64encode(key).decode("ascii") != raw:
504
+ fail("invalid local HMAC key file: expected canonical URL-safe 32-byte key")
505
+ if len(key) != 32:
506
+ fail("invalid local HMAC key file: expected 32 decoded bytes")
507
+ ensure_hmac_key_private_mode(key_path)
508
+ return key
509
+
510
+
511
+ def fsync_parent_dir(path: Path) -> None:
512
+ if os.name != "posix":
513
+ return
514
+ try:
515
+ fd = os.open(path.parent, os.O_RDONLY)
516
+ except OSError:
517
+ return
518
+ try:
519
+ os.fsync(fd)
520
+ except OSError:
521
+ pass
522
+ finally:
523
+ try:
524
+ os.close(fd)
525
+ except OSError:
526
+ pass
527
+
528
+
529
+ def write_all(fd: int, data: bytes) -> None:
530
+ view = memoryview(data)
531
+ total = 0
532
+ while total < len(data):
533
+ written = os.write(fd, view[total:])
534
+ if written <= 0:
535
+ raise OSError("short write to local HMAC key file")
536
+ total += written
537
+
538
+
539
+ @dataclass(frozen=True)
540
+ class KeyLock:
541
+ nonce: str
542
+ metadata_written: bool
543
+
544
+
545
+ def write_key_lock_metadata(lock_dir: Path) -> KeyLock:
546
+ nonce = secrets.token_hex(8)
547
+ metadata = {
548
+ "pid": os.getpid(),
549
+ "created_at_unix": time.time(),
550
+ "nonce": nonce,
551
+ }
552
+ path = lock_dir / LOCK_OWNER_NAME
553
+ try:
554
+ path.write_text(json_bytes(metadata), encoding="utf-8")
555
+ os.chmod(path, 0o600)
556
+ fsync_parent_dir(path)
557
+ return KeyLock(nonce=nonce, metadata_written=True)
558
+ except OSError:
559
+ return KeyLock(nonce=nonce, metadata_written=False)
560
+
561
+
562
+ def key_lock_age_seconds(lock_dir: Path, now: float | None = None) -> float:
563
+ current = time.time() if now is None else now
564
+ metadata_path = lock_dir / LOCK_OWNER_NAME
565
+ try:
566
+ metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
567
+ if isinstance(metadata, dict):
568
+ created = metadata.get("created_at_unix")
569
+ if type(created) in (int, float) and math.isfinite(float(created)):
570
+ created_float = float(created)
571
+ if 0 <= created_float <= current + KEY_LOCK_METADATA_CLOCK_SKEW_SECONDS:
572
+ return max(0.0, current - created_float)
573
+ except (OSError, UnicodeError, json.JSONDecodeError, TypeError, ValueError, OverflowError):
574
+ pass
575
+ try:
576
+ return max(0.0, current - lock_dir.stat().st_mtime)
577
+ except OSError:
578
+ return 0.0
579
+
580
+
581
+ def path_mtime_age_seconds(path: Path, now: float | None = None) -> float:
582
+ current = time.time() if now is None else now
583
+ try:
584
+ return max(0.0, current - path.stat().st_mtime)
585
+ except OSError:
586
+ return 0.0
587
+
588
+
589
+ def reclaim_stale_key_lock(lock_dir: Path, key_path: Path) -> bool:
590
+ if key_path.exists():
591
+ return False
592
+ if key_lock_age_seconds(lock_dir) < KEY_LOCK_STALE_SECONDS:
593
+ return False
594
+ if key_path.exists():
595
+ return False
596
+ stale_dir = lock_dir.with_name(f"{lock_dir.name}.stale.{os.getpid()}.{secrets.token_hex(8)}")
597
+ try:
598
+ os.rename(lock_dir, stale_dir)
599
+ except OSError:
600
+ return False
601
+ try:
602
+ shutil.rmtree(stale_dir)
603
+ except OSError:
604
+ pass
605
+ return True
606
+
607
+
608
+ def key_lock_owner_matches(lock_dir: Path, lock: KeyLock) -> bool:
609
+ if not lock.metadata_written:
610
+ return False
611
+ try:
612
+ metadata = json.loads((lock_dir / LOCK_OWNER_NAME).read_text(encoding="utf-8"))
613
+ except (OSError, UnicodeError, json.JSONDecodeError):
614
+ return False
615
+ return (
616
+ isinstance(metadata, dict)
617
+ and metadata.get("nonce") == lock.nonce
618
+ and metadata.get("pid") == os.getpid()
619
+ )
620
+
621
+
622
+ def cleanup_orphaned_stale_key_locks(store_dir: Path) -> None:
623
+ stale_prefix = f"{KEY_NAME}.lock.stale."
624
+ cleanup_prefix = f"{KEY_NAME}.lock.cleanup."
625
+ try:
626
+ candidates = list(store_dir.iterdir())
627
+ except OSError:
628
+ return
629
+ for candidate in candidates:
630
+ should_remove = candidate.name.startswith(stale_prefix)
631
+ if candidate.name.startswith(cleanup_prefix):
632
+ should_remove = path_mtime_age_seconds(candidate) >= KEY_LOCK_STALE_SECONDS
633
+ if not should_remove:
634
+ continue
635
+ try:
636
+ if candidate.is_dir():
637
+ shutil.rmtree(candidate)
638
+ else:
639
+ candidate.unlink()
640
+ except OSError:
641
+ pass
642
+
643
+
644
+ def cleanup_key_lock(lock_dir: Path, lock: KeyLock) -> None:
645
+ if not key_lock_owner_matches(lock_dir, lock):
646
+ return
647
+ cleanup_dir = lock_dir.with_name(f"{lock_dir.name}.cleanup.{os.getpid()}.{secrets.token_hex(8)}")
648
+ try:
649
+ os.rename(lock_dir, cleanup_dir)
650
+ except OSError:
651
+ return
652
+ if not key_lock_owner_matches(cleanup_dir, lock):
653
+ try:
654
+ if not lock_dir.exists():
655
+ os.rename(cleanup_dir, lock_dir)
656
+ except OSError:
657
+ pass
658
+ return
659
+ try:
660
+ shutil.rmtree(cleanup_dir)
661
+ except OSError:
662
+ pass
663
+
664
+
665
+ def acquire_key_lock(lock_dir: Path, key_path: Path) -> KeyLock | None:
666
+ for _ in range(KEY_LOCK_WAIT_ATTEMPTS):
667
+ try:
668
+ os.mkdir(lock_dir, 0o700)
669
+ try:
670
+ os.chmod(lock_dir, 0o700)
671
+ except OSError:
672
+ pass
673
+ lock = write_key_lock_metadata(lock_dir)
674
+ if not lock.metadata_written:
675
+ try:
676
+ shutil.rmtree(lock_dir)
677
+ except OSError:
678
+ pass
679
+ fail("could not write local HMAC key lock metadata; retry")
680
+ return lock
681
+ except FileExistsError:
682
+ if key_path.exists():
683
+ return None
684
+ if reclaim_stale_key_lock(lock_dir, key_path):
685
+ continue
686
+ if key_path.exists():
687
+ return None
688
+ time.sleep(KEY_LOCK_POLL_SECONDS)
689
+ except OSError as exc:
690
+ fail(f"could not create local HMAC key lock at {lock_guidance()}: {os_error_detail(exc)}")
691
+ if key_path.exists():
692
+ return None
693
+ fail(f"timed out waiting for local HMAC key lock; remove stale {lock_guidance()}")
694
+
695
+
696
+ def load_or_create_hmac_key(store_dir: Path) -> bytes:
697
+ ensure_private_dir(store_dir)
698
+ cleanup_orphaned_stale_key_locks(store_dir)
699
+ key_path = store_dir / KEY_NAME
700
+ if key_path.exists():
701
+ return read_hmac_key(key_path)
702
+
703
+ lock_dir = store_dir / f"{KEY_NAME}.lock"
704
+ locked = acquire_key_lock(lock_dir, key_path)
705
+ if locked is None:
706
+ return read_hmac_key(key_path)
707
+
708
+ tmp_path: Path | None = None
709
+ try:
710
+ if key_path.exists():
711
+ return read_hmac_key(key_path)
712
+ key = secrets.token_bytes(32)
713
+ encoded = base64.urlsafe_b64encode(key)
714
+ tmp_path = store_dir / f"{KEY_NAME}.{os.getpid()}.{secrets.token_hex(8)}.tmp"
715
+ try:
716
+ fd = os.open(tmp_path, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600)
717
+ except OSError as exc:
718
+ fail(f"could not create local HMAC key file: {os_error_detail(exc)}")
719
+ close_error: OSError | None = None
720
+ try:
721
+ try:
722
+ os.fchmod(fd, 0o600)
723
+ except (AttributeError, OSError):
724
+ pass
725
+ write_all(fd, encoded)
726
+ os.fsync(fd)
727
+ except OSError as exc:
728
+ fail(f"could not write local HMAC key file: {os_error_detail(exc)}")
729
+ finally:
730
+ try:
731
+ os.close(fd)
732
+ except OSError as exc:
733
+ close_error = exc
734
+ if close_error is not None:
735
+ fail(f"could not write local HMAC key file: {os_error_detail(close_error)}")
736
+ ensure_hmac_key_private_mode(tmp_path)
737
+ if locked.metadata_written and not key_lock_owner_matches(lock_dir, locked):
738
+ if key_path.exists():
739
+ return read_hmac_key(key_path)
740
+ fail("lost local HMAC key lock; retry")
741
+ try:
742
+ os.replace(tmp_path, key_path)
743
+ except OSError as exc:
744
+ fail(f"could not persist local HMAC key file: {os_error_detail(exc)}")
745
+ tmp_path = None
746
+ fsync_parent_dir(key_path)
747
+ # Re-read the persisted file so callers always use the same bytes future
748
+ # ledger lookups will use. The lock prevents first-use races without
749
+ # relying on hard links or replacing another process's winner key.
750
+ return read_hmac_key(key_path)
751
+ finally:
752
+ if tmp_path is not None:
753
+ try:
754
+ tmp_path.unlink()
755
+ except OSError:
756
+ pass
757
+ cleanup_key_lock(lock_dir, locked)
758
+
759
+
760
+ def keyed_hmac(key: bytes, text: str) -> str:
761
+ return hmac.new(key, text.encode("utf-8", errors="replace"), hashlib.sha256).hexdigest()
762
+
763
+
764
+ def ledger_path(store_dir: Path) -> Path:
765
+ return store_dir / LEDGER_NAME
766
+
767
+
768
+ def load_ledger(store_dir: Path) -> list[dict[str, Any]]:
769
+ path = ledger_path(store_dir)
770
+ if not path.exists():
771
+ return []
772
+ rows: list[dict[str, Any]] = []
773
+ with path.open("r", encoding="utf-8") as fh:
774
+ for line in fh:
775
+ line = line.strip()
776
+ if not line:
777
+ continue
778
+ try:
779
+ row = json.loads(line, parse_constant=reject_json_constant)
780
+ except (json.JSONDecodeError, ValueError):
781
+ continue
782
+ if isinstance(row, dict):
783
+ rows.append(row)
784
+ return rows[-MAX_LEDGER_ROWS:]
785
+
786
+
787
+ def append_ledger(store_dir: Path, entry: dict[str, Any]) -> None:
788
+ ensure_private_dir(store_dir)
789
+ path = ledger_path(store_dir)
790
+ # JSONL is append-only. Use a single O_APPEND write plus fsync so concurrent
791
+ # local wrappers cannot interleave bytes; load_ledger also tolerates any
792
+ # pre-existing malformed/partial line by skipping it.
793
+ flags = os.O_WRONLY | os.O_CREAT | os.O_APPEND
794
+ fd = os.open(path, flags, 0o600)
795
+ try:
796
+ os.write(fd, (json_bytes(entry) + "\n").encode("utf-8"))
797
+ os.fsync(fd)
798
+ finally:
799
+ os.close(fd)
800
+ try:
801
+ os.chmod(path, 0o600)
802
+ except OSError:
803
+ pass
804
+
805
+
806
+ def latest_fingerprint_rows(rows: list[dict[str, Any]]) -> dict[tuple[str, str], dict[str, Any]]:
807
+ latest: dict[tuple[str, str], dict[str, Any]] = {}
808
+ for row in rows:
809
+ if row.get("kind") != "observe":
810
+ continue
811
+ model = str(row.get("model") or "unknown")
812
+ created = safe_int(row.get("created_at_unix") or 0, 0)
813
+ for fp in row.get("fingerprints", []) if isinstance(row.get("fingerprints"), list) else []:
814
+ if not isinstance(fp, dict):
815
+ continue
816
+ digest = fp.get("hmac")
817
+ if not isinstance(digest, str):
818
+ continue
819
+ key = (model, digest)
820
+ old = latest.get(key)
821
+ if old is None or created >= safe_int(old.get("created_at_unix") or 0, 0):
822
+ merged = dict(fp)
823
+ merged["created_at_unix"] = created
824
+ merged["model"] = model
825
+ latest[key] = merged
826
+ return latest
827
+
828
+
829
+ def default_pricing_profile() -> dict[str, Any]:
830
+ return {
831
+ "name": "anthropic-default-2026-06",
832
+ "source": "Anthropic pricing docs retrieved 2026-06-05; recheck before release or billing assertions.",
833
+ "source_urls": [ANTHROPIC_DOCS_URL, ANTHROPIC_PRICING_URL],
834
+ "checked_at": "2026-06-05",
835
+ "release_recheck_required": True,
836
+ "usd_to_krw": DEFAULT_USD_TO_KRW,
837
+ "cache_write_multipliers": {"5m": 1.25, "1h": 2.0},
838
+ "cache_read_multiplier": 0.10,
839
+ "default_input_usd_per_mtok": 3.0,
840
+ "default_output_usd_per_mtok": 15.0,
841
+ "models": {
842
+ "opus 4.8": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
843
+ "opus-4-8": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
844
+ "opus 4.7": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
845
+ "opus-4-7": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
846
+ "opus 4.6": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
847
+ "opus-4-6": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
848
+ "opus 4.5": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
849
+ "opus-4-5": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
850
+ "opus 4.1": {"input_usd_per_mtok": 15.0, "output_usd_per_mtok": 75.0},
851
+ "opus-4-1": {"input_usd_per_mtok": 15.0, "output_usd_per_mtok": 75.0},
852
+ "opus 4": {"input_usd_per_mtok": 15.0, "output_usd_per_mtok": 75.0},
853
+ "opus-4": {"input_usd_per_mtok": 15.0, "output_usd_per_mtok": 75.0},
854
+ "sonnet 4.6": {"input_usd_per_mtok": 3.0, "output_usd_per_mtok": 15.0},
855
+ "sonnet-4-6": {"input_usd_per_mtok": 3.0, "output_usd_per_mtok": 15.0},
856
+ "sonnet 4.5": {"input_usd_per_mtok": 3.0, "output_usd_per_mtok": 15.0},
857
+ "sonnet-4-5": {"input_usd_per_mtok": 3.0, "output_usd_per_mtok": 15.0},
858
+ "sonnet 4": {"input_usd_per_mtok": 3.0, "output_usd_per_mtok": 15.0},
859
+ "sonnet-4": {"input_usd_per_mtok": 3.0, "output_usd_per_mtok": 15.0},
860
+ "haiku 4.5": {"input_usd_per_mtok": 1.0, "output_usd_per_mtok": 5.0},
861
+ "haiku-4-5": {"input_usd_per_mtok": 1.0, "output_usd_per_mtok": 5.0},
862
+ "haiku 3.5": {"input_usd_per_mtok": 0.80, "output_usd_per_mtok": 4.0},
863
+ "haiku-3-5": {"input_usd_per_mtok": 0.80, "output_usd_per_mtok": 4.0},
864
+ "sonnet": {"input_usd_per_mtok": 3.0, "output_usd_per_mtok": 15.0},
865
+ "haiku": {"input_usd_per_mtok": 1.0, "output_usd_per_mtok": 5.0},
866
+ "opus": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
867
+ },
868
+ }
869
+
870
+
871
+ def load_pricing_profile(raw: str | None) -> dict[str, Any]:
872
+ profile = default_pricing_profile()
873
+ if not raw:
874
+ return profile
875
+ try:
876
+ if raw.lstrip().startswith("{"):
877
+ override = json.loads(raw, parse_constant=reject_json_constant)
878
+ else:
879
+ override = json.loads(Path(raw).read_text(encoding="utf-8"), parse_constant=reject_json_constant)
880
+ except (OSError, json.JSONDecodeError, ValueError) as exc:
881
+ fail(f"could not load pricing profile: {exc}")
882
+ if not isinstance(override, dict):
883
+ fail("pricing profile must be a JSON object")
884
+ merged = merge_dict(profile, override)
885
+ if "models" in override:
886
+ # A user-supplied model map is an explicit pricing contract for this
887
+ # run. Do not let bundled release-time defaults shadow a generic custom
888
+ # key such as "sonnet" with a more specific built-in key.
889
+ merged["models"] = override["models"]
890
+ return merged
891
+
892
+
893
+ def merge_dict(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
894
+ out = dict(base)
895
+ for key, value in override.items():
896
+ if isinstance(value, dict) and isinstance(out.get(key), dict):
897
+ out[key] = merge_dict(out[key], value) # type: ignore[arg-type]
898
+ else:
899
+ out[key] = value
900
+ return out
901
+
902
+
903
+ def float_field(data: dict[str, Any], key: str, default: float) -> float:
904
+ try:
905
+ val = float(data.get(key, default))
906
+ except (TypeError, ValueError, OverflowError):
907
+ return default
908
+ if not math.isfinite(val) or val < 0:
909
+ return default
910
+ return val
911
+
912
+
913
+ def rates_for_model(profile: dict[str, Any], model: str) -> tuple[float, float, str]:
914
+ model_l = model.lower()
915
+ model_norm = re.sub(r"[^a-z0-9]+", "-", model_l).strip("-")
916
+ model_tokens = set(tok for tok in model_norm.split("-") if tok)
917
+ models = profile.get("models") if isinstance(profile.get("models"), dict) else {}
918
+ if isinstance(models, dict):
919
+ def match_specificity(item: tuple[Any, Any]) -> tuple[int, int]:
920
+ key_norm = re.sub(r"[^a-z0-9]+", "-", str(item[0]).lower()).strip("-")
921
+ return (len([tok for tok in key_norm.split("-") if tok]), len(key_norm))
922
+
923
+ for key, raw in sorted(models.items(), key=match_specificity, reverse=True):
924
+ key_l = str(key).lower()
925
+ key_norm = re.sub(r"[^a-z0-9]+", "-", key_l).strip("-")
926
+ key_tokens = [tok for tok in key_norm.split("-") if tok]
927
+ token_subset_match = bool(key_tokens) and all(tok in model_tokens for tok in key_tokens)
928
+ if isinstance(raw, dict) and (key_l in model_l or key_norm in model_norm or token_subset_match):
929
+ return (
930
+ float_field(raw, "input_usd_per_mtok", float_field(profile, "default_input_usd_per_mtok", 3.0)),
931
+ float_field(raw, "output_usd_per_mtok", float_field(profile, "default_output_usd_per_mtok", 15.0)),
932
+ str(key),
933
+ )
934
+ return (
935
+ float_field(profile, "default_input_usd_per_mtok", 3.0),
936
+ float_field(profile, "default_output_usd_per_mtok", 15.0),
937
+ "default",
938
+ )
939
+
940
+
941
+ def pricing_multipliers(profile: dict[str, Any]) -> tuple[dict[str, float], float]:
942
+ raw = profile.get("cache_write_multipliers")
943
+ write = {"5m": 1.25, "1h": 2.0}
944
+ if isinstance(raw, dict):
945
+ for ttl in ("5m", "1h"):
946
+ try:
947
+ value = float(raw.get(ttl, write[ttl]))
948
+ except (TypeError, ValueError, OverflowError):
949
+ value = write[ttl]
950
+ if math.isfinite(value) and value >= 0:
951
+ write[ttl] = value
952
+ read = float_field(profile, "cache_read_multiplier", 0.10)
953
+ return write, read
954
+
955
+
956
+ def usd_to_krw(profile: dict[str, Any], override: float | None = None) -> float:
957
+ if override is not None:
958
+ return finite_float_arg(override, "--usd-to-krw", minimum=0.0, allow_zero=False)
959
+ rate = float_field(profile, "usd_to_krw", DEFAULT_USD_TO_KRW)
960
+ if rate <= 0:
961
+ fail("pricing profile usd_to_krw must be > 0")
962
+ return rate
963
+
964
+
965
+ def money(tokens: int, usd_per_mtok: float, multiplier: float = 1.0) -> float:
966
+ return (max(0, tokens) / 1_000_000.0) * usd_per_mtok * multiplier
967
+
968
+
969
+ def krw(usd: float, rate: float) -> float:
970
+ return usd * rate
971
+
972
+
973
+ def uncertainty(mid_tokens: int, safety_factor: float) -> dict[str, int]:
974
+ high = max(mid_tokens, math.ceil(mid_tokens * max(1.0, safety_factor)))
975
+ low = min(mid_tokens, math.floor(mid_tokens * 0.75))
976
+ return {"low": low, "mid": mid_tokens, "high": high}
977
+
978
+
979
+ def cost_range(mid_usd: float, safety_factor: float) -> dict[str, float]:
980
+ return {
981
+ "low": round(mid_usd * 0.75, 8),
982
+ "mid": round(mid_usd, 8),
983
+ "high": round(mid_usd * max(1.0, safety_factor), 8),
984
+ }
985
+
986
+
987
+ def budget_state(cost_usd_range: dict[str, float], args: argparse.Namespace, profile: dict[str, Any]) -> dict[str, Any]:
988
+ budgets: list[tuple[str, float, float]] = []
989
+ if getattr(args, "budget_usd", None) is not None:
990
+ budget_usd = finite_float_arg(args.budget_usd, "--budget-usd", minimum=0.0, allow_zero=True)
991
+ budgets.append(("USD", budget_usd, budget_usd))
992
+ if getattr(args, "budget_krw", None) is not None:
993
+ budget_krw = finite_float_arg(args.budget_krw, "--budget-krw", minimum=0.0, allow_zero=True)
994
+ rate = usd_to_krw(profile, getattr(args, "usd_to_krw", None))
995
+ budgets.append(("KRW", budget_krw, budget_krw / rate))
996
+ if not budgets:
997
+ return {"configured": False, "near_threshold": False, "over_budget": False}
998
+ high = float(cost_usd_range.get("high", 0.0))
999
+ mid = float(cost_usd_range.get("mid", 0.0))
1000
+ low = float(cost_usd_range.get("low", 0.0))
1001
+ checks = []
1002
+ over = False
1003
+ near = False
1004
+ for currency, display_value, budget_usd in budgets:
1005
+ is_over = high > budget_usd
1006
+ is_near = low <= budget_usd < high or mid <= budget_usd < high
1007
+ over = over or is_over
1008
+ near = near or is_near
1009
+ checks.append({"currency": currency, "budget": display_value, "budget_usd": round(budget_usd, 8), "over_high_estimate": is_over, "near_threshold": is_near})
1010
+ return {"configured": True, "near_threshold": near, "over_budget": over, "checks": checks}
1011
+
1012
+
1013
+ def model_from_request(request: Any) -> str:
1014
+ if isinstance(request, dict) and isinstance(request.get("model"), str):
1015
+ return str(request["model"])
1016
+ return "unknown"
1017
+
1018
+
1019
+ def build_fingerprints(breakpoints: list[CacheBreakpoint], key: bytes) -> tuple[list[dict[str, Any]], int]:
1020
+ fingerprints: list[dict[str, Any]] = []
1021
+ redactions = 0
1022
+ previous_prefix_tokens = 0
1023
+ previous_prefix_bytes = 0
1024
+ for bp in breakpoints:
1025
+ canonical = json_bytes(bp.prefix)
1026
+ section_canonical = json_bytes(bp.section)
1027
+ bp_redactions = secret_count_in_text(canonical)
1028
+ redactions += bp_redactions
1029
+ prefix_tokens = token_proxy_text(canonical)
1030
+ prefix_bytes = byte_len_text(canonical)
1031
+ prefix_delta_tokens = max(0, prefix_tokens - previous_prefix_tokens)
1032
+ prefix_delta_bytes = max(0, prefix_bytes - previous_prefix_bytes)
1033
+ previous_prefix_tokens = max(previous_prefix_tokens, prefix_tokens)
1034
+ previous_prefix_bytes = max(previous_prefix_bytes, prefix_bytes)
1035
+ fingerprints.append(
1036
+ {
1037
+ "breakpoint_id": bp.breakpoint_id,
1038
+ "kind": bp.kind,
1039
+ "ttl": bp.ttl,
1040
+ "hmac": keyed_hmac(key, canonical),
1041
+ "display_hmac": "hmac-sha256:" + keyed_hmac(key, canonical)[:16],
1042
+ "prefix_bytes": prefix_bytes,
1043
+ "prefix_delta_bytes": prefix_delta_bytes,
1044
+ "section_bytes": byte_len_text(section_canonical),
1045
+ "tokens_estimated": prefix_tokens,
1046
+ "prefix_delta_tokens_estimated": prefix_delta_tokens,
1047
+ "section_tokens_estimated": token_proxy_text(section_canonical),
1048
+ "redactions_detected": bp_redactions,
1049
+ }
1050
+ )
1051
+ return fingerprints, redactions
1052
+
1053
+
1054
+ def annotate_cache_state(
1055
+ fingerprints: list[dict[str, Any]],
1056
+ rows: list[dict[str, Any]],
1057
+ now: int,
1058
+ *,
1059
+ model: str,
1060
+ input_rate: float,
1061
+ write_mult: dict[str, float],
1062
+ read_mult: float,
1063
+ exchange_rate: float,
1064
+ ) -> list[dict[str, Any]]:
1065
+ latest = latest_fingerprint_rows(rows)
1066
+ has_prior = bool(latest)
1067
+ out: list[dict[str, Any]] = []
1068
+ for fp in fingerprints:
1069
+ digest = str(fp["hmac"])
1070
+ ttl = str(fp.get("ttl") or "5m")
1071
+ prev = latest.get((model, digest))
1072
+ status = "miss"
1073
+ age_seconds: int | None = None
1074
+ expires_at_unix = 0
1075
+ ttl_remaining_seconds = 0
1076
+ reasons: list[str] = []
1077
+ if prev:
1078
+ created = int(prev.get("created_at_unix") or 0)
1079
+ age_seconds = max(0, now - created)
1080
+ previous_ttl = str(prev.get("ttl") or "5m")
1081
+ expires_at_unix = created + TTL_SECONDS.get(previous_ttl, TTL_SECONDS["5m"])
1082
+ ttl_remaining_seconds = max(0, expires_at_unix - now)
1083
+ if previous_ttl != ttl:
1084
+ status = "miss"
1085
+ reasons.append("ttl_mismatch")
1086
+ else:
1087
+ status = "hit" if ttl_remaining_seconds > 0 else "expired"
1088
+ if status == "hit":
1089
+ matched = True
1090
+ risk = "low"
1091
+ elif status == "expired":
1092
+ matched = False
1093
+ risk = "medium"
1094
+ reasons.append("ttl_expired")
1095
+ else:
1096
+ matched = False
1097
+ risk = "high"
1098
+ reasons.append("prefix_hash_changed" if has_prior else "no_previous_cache_entry")
1099
+ if has_prior and str(fp.get("kind")) == "tool":
1100
+ reasons.append("tool_schema_changed")
1101
+ if int(fp.get("redactions_detected") or 0) > 0:
1102
+ reasons.append("redaction_changed_cacheable_material")
1103
+ tokens = int(fp.get("prefix_delta_tokens_estimated") or 0)
1104
+ miss_usd = money(tokens, input_rate, write_mult.get(ttl, write_mult["5m"]))
1105
+ hit_usd = money(tokens, input_rate, read_mult)
1106
+ confidence = "medium" if int(fp.get("redactions_detected") or 0) > 0 else "high"
1107
+ visible = {k: v for k, v in fp.items() if k != "hmac"}
1108
+ visible.update(
1109
+ {
1110
+ "id": fp.get("breakpoint_id"),
1111
+ "fingerprint": fp.get("display_hmac"),
1112
+ "matched": matched,
1113
+ "risk": risk,
1114
+ "confidence": confidence,
1115
+ "projected_tokens": tokens,
1116
+ "cost_delta_if_miss": round(krw(max(0.0, miss_usd - hit_usd), exchange_rate), 2),
1117
+ "cost_delta_if_miss_usd": round(max(0.0, miss_usd - hit_usd), 8),
1118
+ "expires_at_unix": expires_at_unix,
1119
+ "ttl_remaining_seconds": ttl_remaining_seconds,
1120
+ "reasons": reasons,
1121
+ "predicted_cache_state": status,
1122
+ }
1123
+ )
1124
+ if age_seconds is not None:
1125
+ visible["age_seconds"] = age_seconds
1126
+ out.append(visible)
1127
+ return out
1128
+
1129
+
1130
+ def preflight_command(args: argparse.Namespace) -> int:
1131
+ request_raw, _truncated = load_json_input(args.request, max_bytes=args.max_bytes)
1132
+ request = require_json_object(request_raw, "request")
1133
+ profile = load_pricing_profile(args.pricing_profile)
1134
+ if args.usd_to_krw is not None:
1135
+ profile["usd_to_krw"] = usd_to_krw(profile, args.usd_to_krw)
1136
+ if args.budget_usd is not None:
1137
+ args.budget_usd = finite_float_arg(args.budget_usd, "--budget-usd", minimum=0.0, allow_zero=True)
1138
+ if args.budget_krw is not None:
1139
+ args.budget_krw = finite_float_arg(args.budget_krw, "--budget-krw", minimum=0.0, allow_zero=True)
1140
+ safety = float(args.safety_factor)
1141
+ if not math.isfinite(safety) or safety < 1.0:
1142
+ fail("--safety-factor must be >= 1.0")
1143
+
1144
+ store_dir = Path(args.store_dir)
1145
+ key = load_or_create_hmac_key(store_dir)
1146
+ rows = load_ledger(store_dir)
1147
+ now = int(time.time())
1148
+ breakpoints, parse_meta = extract_cache_breakpoints(request)
1149
+ fingerprints_private, redactions = build_fingerprints(breakpoints, key)
1150
+
1151
+ model = model_from_request(request)
1152
+ input_rate, output_rate, model_rate_key = rates_for_model(profile, model)
1153
+ write_mult, read_mult = pricing_multipliers(profile)
1154
+ exchange = usd_to_krw(profile, args.usd_to_krw)
1155
+ cache_breakdowns = annotate_cache_state(
1156
+ fingerprints_private,
1157
+ rows,
1158
+ now,
1159
+ model=model,
1160
+ input_rate=input_rate,
1161
+ write_mult=write_mult,
1162
+ read_mult=read_mult,
1163
+ exchange_rate=exchange,
1164
+ )
1165
+ full_prompt_tokens_mid = token_proxy_obj(strip_known_cache_controls(request))
1166
+ cacheable_tokens_mid = max((int(fp.get("tokens_estimated") or 0) for fp in fingerprints_private), default=0)
1167
+ noncacheable_tokens_mid = max(0, full_prompt_tokens_mid - cacheable_tokens_mid)
1168
+ output_tokens_max = usage_int(request, "max_tokens")
1169
+ output_usd_mid = money(output_tokens_max, output_rate)
1170
+ predicted_mid_usd = money(noncacheable_tokens_mid, input_rate) + output_usd_mid
1171
+ all_miss_mid_usd = predicted_mid_usd
1172
+ all_hit_mid_usd = predicted_mid_usd
1173
+ for public, private in zip(cache_breakdowns, fingerprints_private):
1174
+ tokens = int(private.get("prefix_delta_tokens_estimated") or 0)
1175
+ ttl = str(private.get("ttl") or "5m")
1176
+ if public.get("predicted_cache_state") == "hit":
1177
+ predicted_mid_usd += money(tokens, input_rate, read_mult)
1178
+ else:
1179
+ predicted_mid_usd += money(tokens, input_rate, write_mult.get(ttl, write_mult["5m"]))
1180
+ all_miss_mid_usd += money(tokens, input_rate, write_mult.get(ttl, write_mult["5m"]))
1181
+ all_hit_mid_usd += money(tokens, input_rate, read_mult)
1182
+
1183
+ token_estimate = uncertainty(full_prompt_tokens_mid, safety)
1184
+ cost_usd = cost_range(predicted_mid_usd, safety)
1185
+ budget = budget_state(cost_usd, args, profile)
1186
+ hit_count = sum(1 for bp in cache_breakdowns if bp.get("predicted_cache_state") == "hit")
1187
+ miss_count = sum(1 for bp in cache_breakdowns if bp.get("predicted_cache_state") == "miss")
1188
+ expired_count = sum(1 for bp in cache_breakdowns if bp.get("predicted_cache_state") == "expired")
1189
+ aggregate_reasons = sorted(
1190
+ {
1191
+ reason
1192
+ for bp in cache_breakdowns
1193
+ for reason in bp.get("reasons", [])
1194
+ if isinstance(reason, str)
1195
+ }
1196
+ )
1197
+ if not cache_breakdowns:
1198
+ cache_level = "unknown"
1199
+ elif miss_count > 0:
1200
+ cache_level = "high"
1201
+ elif expired_count > 0:
1202
+ cache_level = "medium"
1203
+ else:
1204
+ cache_level = "low"
1205
+ matched_previous_entry = bool(cache_breakdowns) and all(bool(bp.get("matched")) for bp in cache_breakdowns)
1206
+ ttl_remaining_values = [
1207
+ int(bp.get("ttl_remaining_seconds") or 0)
1208
+ for bp in cache_breakdowns
1209
+ if int(bp.get("ttl_remaining_seconds") or 0) > 0
1210
+ ]
1211
+ aggregate_ttl_remaining = min(ttl_remaining_values) if ttl_remaining_values else 0
1212
+ aggregate_fingerprint = cache_breakdowns[-1].get("fingerprint") if cache_breakdowns else None
1213
+
1214
+ confidence = "high"
1215
+ reasons: list[str] = []
1216
+ if redactions:
1217
+ confidence = "medium"
1218
+ reasons.append("redaction_changed_cacheable_material")
1219
+ if int(parse_meta.get("unsupported_cache_controls") or 0) > 0:
1220
+ confidence = "medium" if confidence == "high" else confidence
1221
+ reasons.append("unsupported_cache_control_layout")
1222
+ if not breakpoints:
1223
+ confidence = "low"
1224
+ reasons.append("no_cache_control")
1225
+ if full_prompt_tokens_mid >= int(args.large_context_tokens):
1226
+ reasons.append("no_cache_control_large_context")
1227
+ for reason in reasons:
1228
+ if reason not in aggregate_reasons:
1229
+ aggregate_reasons.append(reason)
1230
+
1231
+ findings: list[dict[str, Any]] = []
1232
+ if budget.get("over_budget"):
1233
+ findings.append({"severity": "warn", "code": "cost_budget_risk", "message": "high estimate exceeds configured budget"})
1234
+ elif budget.get("near_threshold"):
1235
+ findings.append({"severity": "info", "code": "near_cost_budget", "message": "uncertainty range crosses configured budget"})
1236
+ if args.max_input_tokens and token_estimate["high"] > int(args.max_input_tokens):
1237
+ findings.append({"severity": "warn", "code": "input_token_limit_risk", "message": "high estimate exceeds configured input-token threshold"})
1238
+ if len(breakpoints) > 4:
1239
+ findings.append({"severity": "warn", "code": "too_many_cache_breakpoints", "message": "Anthropic prompt caching supports up to four cache breakpoints; reduce or compile layout"})
1240
+
1241
+ block = bool(args.enforce and any(f.get("severity") == "warn" for f in findings))
1242
+ decision = "block_if_enforced" if block else "warn" if findings else "allow"
1243
+ report = {
1244
+ "schema_version": SCHEMA_VERSION,
1245
+ "tool": TOOL_NAME,
1246
+ "mode": "preflight",
1247
+ "decision": decision,
1248
+ "enforcement": "enforced" if args.enforce else "passive",
1249
+ "policy": {"action": decision, "passive": not args.enforce, "enforced": bool(args.enforce)},
1250
+ "model": model,
1251
+ "confidence": {"level": confidence, "reasons": reasons},
1252
+ "request": {"model": model, "model_rate_key": model_rate_key, "source_omitted": True},
1253
+ "token_estimate": {
1254
+ "measurement": "estimated",
1255
+ "method": f"chars_div_{TOKEN_PROXY_CHARS_PER_TOKEN}",
1256
+ "estimator": f"chars_div_{TOKEN_PROXY_CHARS_PER_TOKEN}",
1257
+ "safety_factor": safety,
1258
+ "near_threshold": bool(budget.get("near_threshold")),
1259
+ "input_tokens_low": token_estimate["low"],
1260
+ "input_tokens_mid": token_estimate["mid"],
1261
+ "input_tokens_high": token_estimate["high"],
1262
+ "cacheable_tokens_mid": cacheable_tokens_mid,
1263
+ "volatile_tokens_mid": noncacheable_tokens_mid,
1264
+ "output_tokens_max": output_tokens_max,
1265
+ **token_estimate,
1266
+ },
1267
+ "pricing": {
1268
+ "profile": str(profile.get("name") or "custom"),
1269
+ "release_recheck_required": bool(profile.get("release_recheck_required", True)),
1270
+ "source_urls": profile.get("source_urls", [ANTHROPIC_DOCS_URL, ANTHROPIC_PRICING_URL]),
1271
+ "input_usd_per_mtok": input_rate,
1272
+ "output_usd_per_mtok": output_rate,
1273
+ "usd_to_krw": exchange,
1274
+ "cache_write_multipliers": write_mult,
1275
+ "cache_read_multiplier": read_mult,
1276
+ },
1277
+ "cost_estimate": {
1278
+ "measurement": "estimated",
1279
+ "currency": "USD",
1280
+ **cost_usd,
1281
+ "krw": {k: round(krw(v, exchange), 2) for k, v in cost_usd.items()},
1282
+ "if_cache_hit": cost_range(all_hit_mid_usd, safety),
1283
+ "if_cache_miss_5m_write": cost_range(
1284
+ money(noncacheable_tokens_mid, input_rate)
1285
+ + output_usd_mid
1286
+ + sum(
1287
+ money(int(fp.get("prefix_delta_tokens_estimated") or 0), input_rate, write_mult["5m"])
1288
+ for fp in fingerprints_private
1289
+ ),
1290
+ safety,
1291
+ ),
1292
+ "if_cache_miss_1h_write": cost_range(
1293
+ money(noncacheable_tokens_mid, input_rate)
1294
+ + output_usd_mid
1295
+ + sum(
1296
+ money(int(fp.get("prefix_delta_tokens_estimated") or 0), input_rate, write_mult["1h"])
1297
+ for fp in fingerprints_private
1298
+ ),
1299
+ safety,
1300
+ ),
1301
+ "worst_case": cost_usd["high"],
1302
+ "pricing_profile_id": str(profile.get("name") or "custom"),
1303
+ "if_all_cache_miss_usd_mid": round(all_miss_mid_usd, 8),
1304
+ "if_all_cache_hit_usd_mid": round(all_hit_mid_usd, 8),
1305
+ "estimated_cache_delta_usd_mid": round(max(0.0, all_miss_mid_usd - all_hit_mid_usd), 8),
1306
+ "output_usd_mid": round(output_usd_mid, 8),
1307
+ "includes_output_token_budget": output_tokens_max > 0,
1308
+ },
1309
+ "budget": budget,
1310
+ "cache_risk": {
1311
+ "level": cache_level,
1312
+ "confidence": confidence,
1313
+ "reasons": aggregate_reasons,
1314
+ "aggregate_fingerprint": aggregate_fingerprint,
1315
+ "matched_previous_entry": matched_previous_entry,
1316
+ "ttl_remaining_seconds": aggregate_ttl_remaining,
1317
+ "breakpoints": cache_breakdowns,
1318
+ "summary": {"total": len(cache_breakdowns), "predicted_hit": hit_count, "predicted_miss": miss_count, "expired": expired_count},
1319
+ "ledger": {
1320
+ "uses_keyed_hmac": True,
1321
+ "raw_prompt_stored": False,
1322
+ "path_omitted": True,
1323
+ "append_mode": "o_append_single_write_fsync",
1324
+ "malformed_rows_skipped": True,
1325
+ },
1326
+ },
1327
+ "redaction": {"secret_like_values_detected": redactions, "redacted_before_output_or_storage": True},
1328
+ "privacy": {
1329
+ "raw_prompt_emitted": False,
1330
+ "raw_prompt_stored": False,
1331
+ "raw_paths_emitted": False,
1332
+ "hmac_key_emitted": False,
1333
+ "redacted_values": redactions,
1334
+ },
1335
+ "parse": parse_meta,
1336
+ "findings": findings,
1337
+ "recommendations": recommendations_for_findings(
1338
+ findings,
1339
+ cache_level=cache_level,
1340
+ confidence=confidence,
1341
+ breakpoints=cache_breakdowns,
1342
+ ),
1343
+ "local_artifact_retrieval": {
1344
+ "helps_reduce_sent_context": True,
1345
+ "replaces_provider_prompt_cache": False,
1346
+ "recommended_helper": "context-guard-artifact/context-guard-pack for large local evidence",
1347
+ },
1348
+ }
1349
+
1350
+ if not args.no_ledger_write:
1351
+ entry: dict[str, Any] = {
1352
+ "schema_version": SCHEMA_VERSION,
1353
+ "kind": "preflight_blocked" if block else "preflight",
1354
+ "created_at_unix": now,
1355
+ "model": model,
1356
+ "summary": {
1357
+ "breakpoints": len(fingerprints_private),
1358
+ "secret_like_values_detected": redactions,
1359
+ "raw_prompt_stored": False,
1360
+ "cache_seeded": False,
1361
+ },
1362
+ }
1363
+ append_ledger(store_dir, entry)
1364
+
1365
+ emit(report, json_mode=args.json)
1366
+ return 3 if block else 0
1367
+
1368
+
1369
+ def usage_int(data: dict[str, Any], key: str) -> int:
1370
+ value = data.get(key, 0)
1371
+ try:
1372
+ number = int(value)
1373
+ except (TypeError, ValueError, OverflowError):
1374
+ return 0
1375
+ return max(0, number)
1376
+
1377
+
1378
+ def cache_creation_buckets(usage: dict[str, Any]) -> tuple[int, int]:
1379
+ cache_creation = usage.get("cache_creation")
1380
+ if isinstance(cache_creation, dict):
1381
+ return (
1382
+ usage_int(cache_creation, "ephemeral_5m_input_tokens"),
1383
+ usage_int(cache_creation, "ephemeral_1h_input_tokens"),
1384
+ )
1385
+ flat_5m = usage_int(usage, "cache_creation_input_tokens_5m")
1386
+ flat_1h = usage_int(usage, "cache_creation_input_tokens_1h")
1387
+ if flat_5m or flat_1h:
1388
+ return flat_5m, flat_1h
1389
+ return usage_int(usage, "cache_creation_input_tokens"), 0
1390
+
1391
+
1392
+ def observe_command(args: argparse.Namespace) -> int:
1393
+ usage_raw, _truncated = load_json_input(args.usage, max_bytes=args.max_bytes)
1394
+ if isinstance(usage_raw, dict) and isinstance(usage_raw.get("usage"), dict):
1395
+ usage = usage_raw["usage"]
1396
+ else:
1397
+ usage = usage_raw
1398
+ if not isinstance(usage, dict):
1399
+ fail("usage must be a JSON object or an object containing a usage object")
1400
+ profile = load_pricing_profile(args.pricing_profile)
1401
+ if args.usd_to_krw is not None:
1402
+ profile["usd_to_krw"] = usd_to_krw(profile, args.usd_to_krw)
1403
+ model = str(args.model or (usage_raw.get("model") if isinstance(usage_raw, dict) else "") or "unknown")
1404
+ input_rate, output_rate, model_rate_key = rates_for_model(profile, model)
1405
+ write_mult, read_mult = pricing_multipliers(profile)
1406
+ exchange = usd_to_krw(profile, args.usd_to_krw)
1407
+
1408
+ input_tokens = usage_int(usage, "input_tokens")
1409
+ output_tokens = usage_int(usage, "output_tokens")
1410
+ cache_creation_5m, cache_creation_1h = cache_creation_buckets(usage)
1411
+ cache_read = usage_int(usage, "cache_read_input_tokens")
1412
+ cost_usd_mid = (
1413
+ money(input_tokens, input_rate)
1414
+ + money(output_tokens, output_rate)
1415
+ + money(cache_creation_5m, input_rate, write_mult["5m"])
1416
+ + money(cache_creation_1h, input_rate, write_mult["1h"])
1417
+ + money(cache_read, input_rate, read_mult)
1418
+ )
1419
+ report = {
1420
+ "schema_version": SCHEMA_VERSION,
1421
+ "tool": TOOL_NAME,
1422
+ "mode": "observe",
1423
+ "measurement": "from_usage",
1424
+ "usage_source": "provider_usage_fields",
1425
+ "request": {"model": model, "model_rate_key": model_rate_key, "source_omitted": True},
1426
+ "usage": {
1427
+ "input_tokens": input_tokens,
1428
+ "output_tokens": output_tokens,
1429
+ "cache_creation_input_tokens_5m": cache_creation_5m,
1430
+ "cache_creation_input_tokens_1h": cache_creation_1h,
1431
+ "cache_read_input_tokens": cache_read,
1432
+ },
1433
+ "cost_estimate": {
1434
+ "currency": "USD",
1435
+ "mid": round(cost_usd_mid, 8),
1436
+ "krw_mid": round(krw(cost_usd_mid, exchange), 2),
1437
+ "pricing_profile": str(profile.get("name") or "custom"),
1438
+ "release_recheck_required": bool(profile.get("release_recheck_required", True)),
1439
+ "source_urls": profile.get("source_urls", [ANTHROPIC_DOCS_URL, ANTHROPIC_PRICING_URL]),
1440
+ },
1441
+ "cache_effect": {
1442
+ "observed_cache_read_tokens": cache_read,
1443
+ "observed_cache_write_tokens": cache_creation_5m + cache_creation_1h,
1444
+ "provider_measured": True,
1445
+ },
1446
+ "privacy": {"raw_request_stored": False, "raw_usage_stored": False, "path_omitted": True},
1447
+ }
1448
+ confirmed_cache_tokens = cache_creation_5m + cache_creation_1h + cache_read
1449
+ if args.request and confirmed_cache_tokens > 0:
1450
+ request_raw, _ = load_json_input(args.request, max_bytes=args.max_bytes)
1451
+ request = require_json_object(request_raw, "request")
1452
+ store_dir = Path(args.store_dir)
1453
+ key = load_or_create_hmac_key(store_dir)
1454
+ breakpoints, _meta = extract_cache_breakpoints(request)
1455
+ fingerprints_private, redactions = build_fingerprints(breakpoints, key)
1456
+ confirmed_fingerprints = [
1457
+ fp
1458
+ for fp in fingerprints_private
1459
+ if int(fp.get("tokens_estimated") or 0) <= confirmed_cache_tokens
1460
+ ]
1461
+ if not confirmed_fingerprints:
1462
+ report["ledger"] = {
1463
+ "updated": False,
1464
+ "reason": "insufficient_provider_cache_tokens",
1465
+ "uses_keyed_hmac": True,
1466
+ "raw_prompt_stored": False,
1467
+ "path_omitted": True,
1468
+ }
1469
+ emit(report, json_mode=args.json)
1470
+ return 0
1471
+ append_ledger(
1472
+ store_dir,
1473
+ {
1474
+ "schema_version": SCHEMA_VERSION,
1475
+ "kind": "observe",
1476
+ "created_at_unix": int(time.time()),
1477
+ "model": model,
1478
+ "fingerprints": [
1479
+ {k: v for k, v in fp.items() if k in {"breakpoint_id", "kind", "ttl", "hmac", "prefix_bytes", "section_bytes", "tokens_estimated", "section_tokens_estimated", "redactions_detected"}}
1480
+ for fp in confirmed_fingerprints
1481
+ ],
1482
+ "usage": report["usage"],
1483
+ "summary": {"breakpoints": len(confirmed_fingerprints), "secret_like_values_detected": redactions, "raw_prompt_stored": False},
1484
+ },
1485
+ )
1486
+ report["ledger"] = {"updated": True, "confirmed_fingerprints": len(confirmed_fingerprints), "uses_keyed_hmac": True, "raw_prompt_stored": False, "path_omitted": True}
1487
+ elif args.request:
1488
+ report["ledger"] = {
1489
+ "updated": False,
1490
+ "reason": "no_provider_cache_tokens",
1491
+ "uses_keyed_hmac": True,
1492
+ "raw_prompt_stored": False,
1493
+ "path_omitted": True,
1494
+ }
1495
+ emit(report, json_mode=args.json)
1496
+ return 0
1497
+
1498
+
1499
+ def ledger_command(args: argparse.Namespace) -> int:
1500
+ rows = load_ledger(Path(args.store_dir))
1501
+ latest = rows[-1] if rows else None
1502
+ counts: dict[str, int] = {}
1503
+ for row in rows:
1504
+ kind = str(row.get("kind") or "unknown")
1505
+ counts[kind] = counts.get(kind, 0) + 1
1506
+ visible_rows = []
1507
+ limit = int(args.limit)
1508
+ recent_rows = [] if limit == 0 else rows[-limit:]
1509
+ for row in recent_rows:
1510
+ visible_rows.append(
1511
+ {
1512
+ "kind": row.get("kind"),
1513
+ "created_at_unix": row.get("created_at_unix"),
1514
+ "model": row.get("model"),
1515
+ "fingerprint_count": len(row.get("fingerprints", [])) if isinstance(row.get("fingerprints"), list) else 0,
1516
+ "raw_prompt_stored": False,
1517
+ }
1518
+ )
1519
+ report = {
1520
+ "schema_version": SCHEMA_VERSION,
1521
+ "tool": TOOL_NAME,
1522
+ "mode": "ledger",
1523
+ "summary": {"entries": len(rows), "counts": counts, "latest_created_at_unix": latest.get("created_at_unix") if isinstance(latest, dict) else None},
1524
+ "ledger": {"uses_keyed_hmac": True, "raw_prompt_stored": False, "path_omitted": True},
1525
+ "entries": visible_rows,
1526
+ }
1527
+ emit(report, json_mode=args.json)
1528
+ return 0
1529
+
1530
+
1531
+ def safe_section_id(section: dict[str, Any], index: int) -> str:
1532
+ raw = section.get("id") or section.get("name") or f"section-{index + 1}"
1533
+ text = re.sub(r"[^A-Za-z0-9_.:-]+", "-", str(raw)).strip("-")[:80]
1534
+ return text or f"section-{index + 1}"
1535
+
1536
+
1537
+ def section_ttl(section: dict[str, Any]) -> str:
1538
+ ttl = str(section.get("ttl") or section.get("cache_ttl") or "5m").lower()
1539
+ return "1h" if ttl in {"1h", "60m", "hour"} else "5m"
1540
+
1541
+
1542
+ PROTECTED_ALLOWED_TRANSFORMS = ["exact_dedupe", "structural_window", "line_truncate", "whitespace_normalize", "json_compact", "artifact_retrieval"]
1543
+ PROTECTED_DENIED_TRANSFORMS = ["semantic_compress", "paraphrase", "identifier_rewrite", "numeric_rewrite", "hash_rewrite", "path_rewrite", "quoted_literal_rewrite"]
1544
+ PROTECTED_ZONE_CLASS_RE = re.compile(r"[^a-z0-9]+")
1545
+ KNOWN_PROTECTED_CONTENT_TYPES = {"json", "diff", "log", "search", "code", "prose", "unknown"}
1546
+ KNOWN_PROTECTED_ZONE_CLASSES = {
1547
+ "code_fence",
1548
+ "diff",
1549
+ "identifier",
1550
+ "numeric_constant",
1551
+ "hash",
1552
+ "path",
1553
+ "stack_frame",
1554
+ "quoted_string",
1555
+ "json_key",
1556
+ }
1557
+
1558
+
1559
+ def manifest_bool(value: Any) -> bool:
1560
+ if isinstance(value, bool):
1561
+ return value
1562
+ if isinstance(value, str):
1563
+ return value.strip().lower() in {"1", "true", "yes", "y", "on"}
1564
+ return bool(value)
1565
+
1566
+
1567
+ def protected_zone_classes(raw: dict[str, Any]) -> list[str]:
1568
+ value = raw.get("protected_zone_classes") or raw.get("zone_classes") or []
1569
+ if isinstance(value, str):
1570
+ items = [item.strip() for item in value.split(",")]
1571
+ elif isinstance(value, list):
1572
+ items = [str(item).strip() for item in value]
1573
+ else:
1574
+ items = []
1575
+ cleaned = sorted({PROTECTED_ZONE_CLASS_RE.sub("_", item.lower()).strip("_") for item in items if item})
1576
+ return [item for item in cleaned if item in KNOWN_PROTECTED_ZONE_CLASSES]
1577
+
1578
+
1579
+ def protected_content_type(raw: dict[str, Any]) -> str:
1580
+ """Return a known content-type label without echoing raw manifest strings."""
1581
+ value = str(raw.get("content_type") or raw.get("type") or "unknown").strip().lower()
1582
+ return value if value in KNOWN_PROTECTED_CONTENT_TYPES else "unknown"
1583
+
1584
+
1585
+ def section_is_protected(raw: dict[str, Any], zone_classes: list[str]) -> bool:
1586
+ return (
1587
+ manifest_bool(raw.get("protected"))
1588
+ or manifest_bool(raw.get("semantic_sensitive"))
1589
+ or bool(zone_classes)
1590
+ )
1591
+
1592
+
1593
+ def compile_command(args: argparse.Namespace) -> int:
1594
+ manifest, _truncated = load_json_input(args.manifest, max_bytes=args.max_bytes)
1595
+ if isinstance(manifest, dict):
1596
+ raw_sections = manifest.get("sections") or manifest.get("cache_breakpoints") or []
1597
+ elif isinstance(manifest, list):
1598
+ raw_sections = manifest
1599
+ else:
1600
+ raw_sections = []
1601
+ if not isinstance(raw_sections, list):
1602
+ fail("manifest sections must be a list")
1603
+ sections: list[dict[str, Any]] = []
1604
+ for i, raw in enumerate(raw_sections):
1605
+ if not isinstance(raw, dict):
1606
+ continue
1607
+ zone_classes = protected_zone_classes(raw)
1608
+ protected = section_is_protected(raw, zone_classes)
1609
+ sec = {
1610
+ "id": f"protected-section-{i + 1}" if protected else safe_section_id(raw, i),
1611
+ "source_id_omitted": protected,
1612
+ "ttl": section_ttl(raw),
1613
+ "volatile": manifest_bool(raw.get("volatile")) or manifest_bool(raw.get("changes_often")),
1614
+ "bytes": safe_int(raw.get("bytes") or raw.get("estimated_bytes") or 0),
1615
+ "tokens_estimated": safe_int(raw.get("tokens") or raw.get("estimated_tokens") or 0),
1616
+ "has_path": "path" in raw or "file" in raw,
1617
+ "protected": protected,
1618
+ "content_type": protected_content_type(raw),
1619
+ "protected_zone_classes": zone_classes,
1620
+ }
1621
+ sections.append(sec)
1622
+
1623
+ recommended = sorted(sections, key=lambda sec: (bool(sec["volatile"]), 0 if sec["ttl"] == "1h" else 1, -int(sec["bytes"] or 0), str(sec["id"])))
1624
+ findings: list[dict[str, Any]] = []
1625
+ for i, sec in enumerate(sections):
1626
+ if sec["ttl"] == "5m" and any(later["ttl"] == "1h" for later in sections[i + 1 :]):
1627
+ findings.append({"severity": "warn", "code": "ttl_order_violation", "section_id": sec["id"], "message": "place 1h cacheable stable sections before 5m sections"})
1628
+ break
1629
+ for i, sec in enumerate(sections):
1630
+ if sec["volatile"] and any(not later["volatile"] for later in sections[i + 1 :]):
1631
+ findings.append({"severity": "warn", "code": "volatile_prefix_before_stable_context", "section_id": sec["id"], "message": "move volatile context toward the tail so stable prefixes can be reused"})
1632
+ break
1633
+ if len(sections) > 4:
1634
+ findings.append({"severity": "warn", "code": "too_many_cache_breakpoints", "message": "reduce to four or fewer provider cache breakpoints"})
1635
+ for sec in sections:
1636
+ if int(sec["bytes"] or 0) > int(args.large_section_bytes):
1637
+ findings.append(
1638
+ {
1639
+ "severity": "info",
1640
+ "code": "use_local_artifact_retrieval",
1641
+ "section_id": sec["id"],
1642
+ "message": "store/query large local evidence with context-guard-artifact or context-guard-pack; RAM/disk can reduce sent context but does not replace provider prompt cache",
1643
+ }
1644
+ )
1645
+ if sec.get("protected"):
1646
+ findings.append(
1647
+ {
1648
+ "severity": "info",
1649
+ "code": "protected_zone_structural_only",
1650
+ "section_id": sec["id"],
1651
+ "message": "protected sections deny semantic/paraphrase compression; use structural transforms and exact retrieval",
1652
+ }
1653
+ )
1654
+ if sec.get("protected") and sec.get("volatile"):
1655
+ findings.append(
1656
+ {
1657
+ "severity": "info",
1658
+ "code": "protected_volatile_tail",
1659
+ "section_id": sec["id"],
1660
+ "message": "volatile controls cache ordering toward the tail; protection controls transforms and retrieval",
1661
+ }
1662
+ )
1663
+ if sec.get("protected") and int(sec["bytes"] or 0) > int(args.large_section_bytes):
1664
+ findings.append(
1665
+ {
1666
+ "severity": "info",
1667
+ "code": "protected_zone_artifact_retrieval",
1668
+ "section_id": sec["id"],
1669
+ "message": "large protected evidence should be stored locally and sent as exact retrieved slices, not semantically compressed",
1670
+ }
1671
+ )
1672
+ protected_sections = [sec for sec in sections if sec.get("protected")]
1673
+ protected_policy_sections = [
1674
+ {
1675
+ "section_id": sec["id"],
1676
+ "content_type": sec["content_type"],
1677
+ "volatile": sec["volatile"],
1678
+ "ttl": sec["ttl"],
1679
+ "large": int(sec["bytes"] or 0) > int(args.large_section_bytes),
1680
+ "zone_classes": sec["protected_zone_classes"],
1681
+ "semantic_compress": False,
1682
+ "retrieval_required": int(sec["bytes"] or 0) > int(args.large_section_bytes),
1683
+ "cache_ordering": "volatile_tail" if sec["volatile"] else "stable_prefix_eligible",
1684
+ "source_id_omitted": bool(sec["source_id_omitted"]),
1685
+ }
1686
+ for sec in protected_sections
1687
+ ]
1688
+ report = {
1689
+ "schema_version": SCHEMA_VERSION,
1690
+ "tool": TOOL_NAME,
1691
+ "mode": "compile",
1692
+ "provider_cache": {"replaced_by_local_ram_or_disk": False, "stable_prefix_required": True, "max_breakpoints_advisory": 4},
1693
+ "recommended_order": [
1694
+ {
1695
+ "section_id": sec["id"],
1696
+ "ttl": sec["ttl"],
1697
+ "volatile": sec["volatile"],
1698
+ "protected": sec["protected"],
1699
+ "content_type": sec["content_type"],
1700
+ "path_omitted": bool(sec["has_path"]),
1701
+ "source_id_omitted": bool(sec["source_id_omitted"]),
1702
+ "transform_policy": "structural_only" if sec["protected"] else "default",
1703
+ }
1704
+ for sec in recommended
1705
+ ],
1706
+ "findings": findings,
1707
+ "protected_zone_policy": {
1708
+ "enabled": bool(protected_sections),
1709
+ "section_count": len(protected_sections),
1710
+ "semantic_compress": False,
1711
+ "allowed_transforms": PROTECTED_ALLOWED_TRANSFORMS,
1712
+ "denied_transforms": PROTECTED_DENIED_TRANSFORMS,
1713
+ "raw_spans_stored": False,
1714
+ "protected_volatile_precedence": "volatile controls cache ordering; protection controls transforms and retrieval",
1715
+ "sections": protected_policy_sections,
1716
+ },
1717
+ "transform_policy": {
1718
+ "scope": "protected_sections" if protected_sections else "none",
1719
+ "protected_sections_only": True,
1720
+ "semantic_transforms_allowed": False if protected_sections else None,
1721
+ "semantic_compress": False if protected_sections else None,
1722
+ "allowed": PROTECTED_ALLOWED_TRANSFORMS if protected_sections else [],
1723
+ "denied": PROTECTED_DENIED_TRANSFORMS if protected_sections else [],
1724
+ "large_protected_sections_use": "local_artifact_retrieval",
1725
+ },
1726
+ "local_artifact_retrieval": {
1727
+ "recommended_for_large_sections": True,
1728
+ "helpers": ["context-guard-artifact", "context-guard-pack"],
1729
+ "replaces_provider_prompt_cache": False,
1730
+ },
1731
+ }
1732
+ emit(report, json_mode=args.json)
1733
+ return 0
1734
+
1735
+
1736
+ def recommendations_for_findings(
1737
+ findings: list[dict[str, Any]],
1738
+ *,
1739
+ cache_level: str,
1740
+ confidence: str,
1741
+ breakpoints: list[dict[str, Any]],
1742
+ ) -> list[dict[str, Any]]:
1743
+ recs: list[dict[str, Any]] = []
1744
+ codes = {str(finding.get("code")) for finding in findings}
1745
+ if cache_level in {"high", "medium"}:
1746
+ recs.append(
1747
+ {
1748
+ "id": "stabilize-cache-prefix",
1749
+ "priority": "P1",
1750
+ "action": "Move stable tools/system/context before volatile questions, timestamps, logs, and task-specific output.",
1751
+ }
1752
+ )
1753
+ if confidence != "high":
1754
+ recs.append(
1755
+ {
1756
+ "id": "verify-cacheable-material",
1757
+ "priority": "P1",
1758
+ "action": "Redaction or unsupported cacheable material lowered confidence; compare exact request construction before relying on cache-risk predictions.",
1759
+ }
1760
+ )
1761
+ if "cost_budget_risk" in codes:
1762
+ recs.append(
1763
+ {
1764
+ "id": "reduce-or-confirm-budget",
1765
+ "priority": "P1",
1766
+ "action": "Use context-guard-pack/artifact slices, clear stale context, or explicit approval before sending an over-budget request.",
1767
+ }
1768
+ )
1769
+ if any(int(bp.get("prefix_delta_bytes") or 0) > DEFAULT_LARGE_SECTION_BYTES for bp in breakpoints):
1770
+ recs.append(
1771
+ {
1772
+ "id": "use-local-artifact-retrieval",
1773
+ "priority": "P2",
1774
+ "action": "Store large local evidence as artifacts or packs and send exact slices instead of full logs/files; this does not replace provider prompt cache.",
1775
+ }
1776
+ )
1777
+ return recs
1778
+
1779
+
1780
+ def emit(data: dict[str, Any], *, json_mode: bool) -> None:
1781
+ if json_mode:
1782
+ try:
1783
+ print(json.dumps(data, ensure_ascii=False, sort_keys=True, indent=2, allow_nan=False))
1784
+ except ValueError as exc:
1785
+ fail(f"JSON output contained a non-finite number: {exc}")
1786
+ return
1787
+ mode = data.get("mode")
1788
+ if mode == "preflight":
1789
+ decision = str(data.get("decision", "allow"))
1790
+ summary = data.get("cache_risk", {}).get("summary", {}) if isinstance(data.get("cache_risk"), dict) else {}
1791
+ cost = data.get("cost_estimate", {}) if isinstance(data.get("cost_estimate"), dict) else {}
1792
+ print(f"{TOOL_NAME}: {decision} · cache {summary.get('predicted_hit', 0)} hit/{summary.get('predicted_miss', 0)} miss · est ${cost.get('mid', 0)}")
1793
+ elif mode == "observe":
1794
+ usage = data.get("usage", {}) if isinstance(data.get("usage"), dict) else {}
1795
+ cost = data.get("cost_estimate", {}) if isinstance(data.get("cost_estimate"), dict) else {}
1796
+ print(f"{TOOL_NAME}: observed cache_read={usage.get('cache_read_input_tokens', 0)} tokens · est ${cost.get('mid', 0)}")
1797
+ elif mode == "compile":
1798
+ findings = data.get("findings", []) if isinstance(data.get("findings"), list) else []
1799
+ print(f"{TOOL_NAME}: compile findings={len(findings)}")
1800
+ else:
1801
+ summary = data.get("summary", {}) if isinstance(data.get("summary"), dict) else {}
1802
+ print(f"{TOOL_NAME}: ledger entries={summary.get('entries', 0)}")
1803
+
1804
+
1805
+ def add_common_cost_args(parser: argparse.ArgumentParser) -> None:
1806
+ parser.add_argument("--pricing-profile", help="JSON string or file with input/output rates, cache multipliers, and usd_to_krw")
1807
+ parser.add_argument("--usd-to-krw", type=float, help="override USD→KRW exchange rate used for estimates")
1808
+ parser.add_argument("--max-bytes", type=int, default=DEFAULT_MAX_BYTES, help=f"maximum JSON input bytes (default: {DEFAULT_MAX_BYTES})")
1809
+ parser.add_argument("--json", action="store_true", help="emit machine-readable JSON")
1810
+
1811
+
1812
+ def build_parser() -> argparse.ArgumentParser:
1813
+ parser = argparse.ArgumentParser(
1814
+ prog=TOOL_NAME,
1815
+ description="Passive Anthropic prompt-cache cost preflight, observation, ledger, and layout compiler.",
1816
+ )
1817
+ sub = parser.add_subparsers(dest="command")
1818
+
1819
+ preflight = sub.add_parser("preflight", help="estimate cache miss risk and request cost before an API call")
1820
+ preflight.add_argument("--request", default="-", help="Anthropic-like request JSON path, or '-' for stdin")
1821
+ preflight.add_argument("--store-dir", default=DEFAULT_STORE_DIR, help="local HMAC ledger directory (path is never emitted in JSON)")
1822
+ preflight.add_argument("--budget-usd", type=float, help="warn/block when high estimate exceeds this USD budget")
1823
+ preflight.add_argument("--budget-krw", type=float, help="warn/block when high estimate exceeds this KRW budget")
1824
+ preflight.add_argument("--max-input-tokens", type=int, default=0, help="warn/block when high estimated input tokens exceed this threshold")
1825
+ preflight.add_argument("--large-context-tokens", type=int, default=200_000, help="threshold for no-cache-control large-context risk")
1826
+ preflight.add_argument("--safety-factor", type=float, default=DEFAULT_SAFETY_FACTOR, help="high estimate multiplier (default: 1.25)")
1827
+ preflight.add_argument("--enforce", action="store_true", help="return nonzero on warn-level findings; default is passive exit 0")
1828
+ preflight.add_argument("--no-ledger-write", action="store_true", help="do not append this preflight to the local HMAC ledger")
1829
+ add_common_cost_args(preflight)
1830
+ preflight.set_defaults(func=preflight_command)
1831
+
1832
+ observe = sub.add_parser("observe", help="estimate observed cost from Anthropic usage fields")
1833
+ observe.add_argument("--usage", default="-", help="usage JSON path, or '-' for stdin")
1834
+ observe.add_argument("--request", help="optional request JSON to fingerprint into the ledger")
1835
+ observe.add_argument("--model", help="model name when usage JSON does not include it")
1836
+ observe.add_argument("--store-dir", default=DEFAULT_STORE_DIR, help="local HMAC ledger directory")
1837
+ add_common_cost_args(observe)
1838
+ observe.set_defaults(func=observe_command)
1839
+
1840
+ ledger = sub.add_parser("ledger", help="summarize the local HMAC ledger without revealing prompts")
1841
+ ledger.add_argument("--store-dir", default=DEFAULT_STORE_DIR, help="local HMAC ledger directory")
1842
+ ledger.add_argument("--limit", type=non_negative_int_arg, default=20, help="maximum recent entries to include")
1843
+ ledger.add_argument("--json", action="store_true", help="emit machine-readable JSON")
1844
+ ledger.set_defaults(func=ledger_command)
1845
+
1846
+ compile_parser = sub.add_parser("compile", help="compile a cache-friendly section layout advisory from a manifest")
1847
+ compile_parser.add_argument("--manifest", default="-", help="section manifest JSON path, or '-' for stdin")
1848
+ compile_parser.add_argument("--large-section-bytes", type=int, default=DEFAULT_LARGE_SECTION_BYTES, help="recommend local artifact retrieval above this size")
1849
+ compile_parser.add_argument("--max-bytes", type=int, default=DEFAULT_MAX_BYTES, help=f"maximum manifest JSON bytes (default: {DEFAULT_MAX_BYTES})")
1850
+ compile_parser.add_argument("--json", action="store_true", help="emit machine-readable JSON")
1851
+ compile_parser.set_defaults(func=compile_command)
1852
+
1853
+ return parser
1854
+
1855
+
1856
+ def main(argv: list[str] | None = None) -> int:
1857
+ parser = build_parser()
1858
+ args = parser.parse_args(argv)
1859
+ if not getattr(args, "command", None):
1860
+ parser.print_help()
1861
+ return 0
1862
+ try:
1863
+ return int(args.func(args))
1864
+ except CostGuardError as exc:
1865
+ print(f"{TOOL_NAME}: {exc}", file=sys.stderr)
1866
+ return 2
1867
+
1868
+
1869
+ if __name__ == "__main__":
1870
+ raise SystemExit(main())