@ictechgy/context-guard 0.4.8 → 0.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +29 -0
  2. package/README.ko.md +92 -37
  3. package/README.md +111 -37
  4. package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
  5. package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
  6. package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
  7. package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
  8. package/docs/distribution.md +10 -7
  9. package/docs/experimental-benchmark-fixtures.md +8 -1
  10. package/package.json +3 -6
  11. package/packaging/homebrew/context-guard.rb.template +1 -1
  12. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  13. package/plugins/context-guard/README.ko.md +9 -6
  14. package/plugins/context-guard/README.md +27 -12
  15. package/plugins/context-guard/bin/context-guard +113 -26
  16. package/plugins/context-guard/bin/context-guard-artifact +542 -46
  17. package/plugins/context-guard/bin/context-guard-cache-score +380 -0
  18. package/plugins/context-guard/bin/context-guard-compress +146 -1
  19. package/plugins/context-guard/bin/context-guard-cost +783 -4
  20. package/plugins/context-guard/bin/context-guard-experiments +2211 -121
  21. package/plugins/context-guard/bin/context-guard-failed-nudge +3 -0
  22. package/plugins/context-guard/bin/context-guard-filter +163 -7
  23. package/plugins/context-guard/bin/context-guard-guard-read +3 -0
  24. package/plugins/context-guard/bin/context-guard-pack +602 -43
  25. package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
  26. package/plugins/context-guard/bin/context-guard-setup +165 -31
  27. package/plugins/context-guard/bin/context-guard-statusline +490 -283
  28. package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
  29. package/plugins/context-guard/bin/context-guard-tool-prune +241 -1
  30. package/plugins/context-guard/lib/context_guard_commands.py +206 -0
  31. package/plugins/context-guard/skills/setup/SKILL.md +1 -0
  32. package/context-guard-kit/README.md +0 -91
  33. package/context-guard-kit/benchmark_runner.py +0 -2401
  34. package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
  35. package/context-guard-kit/context_compress.py +0 -695
  36. package/context-guard-kit/context_escrow.py +0 -935
  37. package/context-guard-kit/context_filter.py +0 -637
  38. package/context-guard-kit/context_guard_cli.py +0 -325
  39. package/context-guard-kit/context_guard_diet.py +0 -1711
  40. package/context-guard-kit/context_pack.py +0 -2713
  41. package/context-guard-kit/cost_guard.py +0 -2349
  42. package/context-guard-kit/experimental_registry.py +0 -2339
  43. package/context-guard-kit/failed_attempt_nudge.py +0 -567
  44. package/context-guard-kit/guard_large_read.py +0 -690
  45. package/context-guard-kit/hook_secret_patterns.py +0 -43
  46. package/context-guard-kit/read_symbol.py +0 -483
  47. package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
  48. package/context-guard-kit/sanitize_output.py +0 -725
  49. package/context-guard-kit/settings.example.json +0 -67
  50. package/context-guard-kit/setup_wizard.py +0 -2515
  51. package/context-guard-kit/statusline.sh +0 -362
  52. package/context-guard-kit/statusline_merged.sh +0 -157
  53. package/context-guard-kit/tool_schema_pruner.py +0 -837
  54. package/context-guard-kit/trim_command_output.py +0 -1449
@@ -1,2349 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Passive Anthropic prompt-cache cost guardrails for ContextGuard.
3
-
4
- This helper is intentionally advisory. It never calls Anthropic, never claims a
5
- provider cache hit as billing authority, and never stores raw request text. The
6
- local ledger stores keyed HMAC fingerprints over confirmed provider observations
7
- so future preflights can warn about likely cache misses without leaking prompts.
8
- """
9
- from __future__ import annotations
10
-
11
- import argparse
12
- import base64
13
- import binascii
14
- import errno
15
- try:
16
- import fcntl
17
- except ImportError: # pragma: no cover - fcntl is unavailable on Windows.
18
- fcntl = None
19
- import hashlib
20
- import hmac
21
- import json
22
- import math
23
- import os
24
- from pathlib import Path
25
- import re
26
- import secrets
27
- import shutil
28
- import stat
29
- import sys
30
- import time
31
- from dataclasses import dataclass
32
- from typing import Any, NoReturn
33
-
34
- TOOL_NAME = "context-guard-cost"
35
- SCHEMA_VERSION = "contextguard.cost.v1"
36
- DEFAULT_STORE_DIR = ".context-guard/cost-ledger"
37
- LEDGER_NAME = "ledger.jsonl"
38
- KEY_NAME = "hmac.key"
39
- LOCK_OWNER_NAME = "owner.json"
40
- HMAC_KEY_RE = re.compile(r"^[A-Za-z0-9_-]{43}=$")
41
- KEY_LOCK_WAIT_ATTEMPTS = 100
42
- KEY_LOCK_POLL_SECONDS = 0.05
43
- KEY_LOCK_STALE_SECONDS = 60.0
44
- KEY_LOCK_METADATA_CLOCK_SKEW_SECONDS = 5.0
45
- DEFAULT_MAX_BYTES = 10_000_000
46
- MAX_MAX_BYTES = 100_000_000
47
- TOKEN_PROXY_CHARS_PER_TOKEN = 4
48
- DEFAULT_USD_TO_KRW = 1350.0
49
- DEFAULT_SAFETY_FACTOR = 1.25
50
- DEFAULT_LARGE_SECTION_BYTES = 64_000
51
- MAX_LEDGER_ROWS = 20_000
52
- LEDGER_TAIL_INITIAL_BYTES = 64 * 1024
53
- LEDGER_OPEN_RETRY_ATTEMPTS = 5
54
- LEDGER_OPEN_RETRY_SECONDS = 0.01
55
- TTL_SECONDS = {"5m": 5 * 60, "1h": 60 * 60}
56
- ANTHROPIC_DOCS_URL = "https://docs.anthropic.com/en/build-with-claude/prompt-caching"
57
- ANTHROPIC_PRICING_URL = "https://platform.claude.com/docs/en/about-claude/pricing"
58
- ALLOWED_FIRST_COMPONENT_SYMLINKS = {
59
- "tmp": Path("/private/tmp"),
60
- "var": Path("/private/var"),
61
- }
62
- DIR_FD_OPEN_SUPPORTED = os.open in getattr(os, "supports_dir_fd", set())
63
- NO_FOLLOW_SUPPORTED = hasattr(os, "O_NOFOLLOW")
64
- DIR_FD_STAT_NOFOLLOW_SUPPORTED = (
65
- os.stat in getattr(os, "supports_dir_fd", set())
66
- and os.stat in getattr(os, "supports_follow_symlinks", set())
67
- )
68
-
69
- SECRET_RE = re.compile(
70
- r"(?is)("
71
- r"-----BEGIN (?:[A-Z0-9 ]*PRIVATE KEY|PGP PRIVATE KEY BLOCK)-----.*?-----END (?:[A-Z0-9 ]*PRIVATE KEY|PGP PRIVATE KEY BLOCK)-----|"
72
- r"AKIA[0-9A-Z]{16}|"
73
- r"gh[pousr]_[A-Za-z0-9_]{20,}|"
74
- r"github_pat_[A-Za-z0-9_]{20,}|"
75
- r"glpat-[A-Za-z0-9_-]{12,}|"
76
- r"xox[abprs]-[A-Za-z0-9-]{10,}|"
77
- r"sk-(?:ant|proj)-[A-Za-z0-9_-]{8,}|"
78
- r"sk-[A-Za-z0-9][A-Za-z0-9_-]{20,}|"
79
- r"AIza[0-9A-Za-z_\-]{20,}|"
80
- r"(?i:Authorization)\s*:\s*(?:Bearer|Basic)\s+[A-Za-z0-9._~+/=-]+|"
81
- r"[?&](?:X-Amz-Signature|X-Amz-Credential|X-Amz-Security-Token|AWSAccessKeyId|Signature|sig|access_token|refresh_token|id_token|auth|authorization|api[_-]?key|apikey|token|secret|password|client[_-]?secret|private[_-]?key|privatekey|pgp[_-]?private[_-]?key|pgpprivatekey|ssh[_-]?key|sshkey|(?:aws[_-]?)?access[_-]?key(?:[_-]?id)?|awsaccesskeyid)=[^&#\s,}\]]+|"
82
- r"(?<![A-Za-z0-9])(?:api[_-]?key|apikey|token|secret|password|client[_-]?secret|authorization|credential|signature|sig|private[_-]?key|privatekey|pgp[_-]?private[_-]?key|pgpprivatekey|ssh[_-]?key|sshkey|(?:aws[_-]?)?access[_-]?key(?:[_-]?id)?|awsaccesskeyid)\s*[:=]\s*[^\s,}\]]+"
83
- r")"
84
- )
85
-
86
-
87
- class CostGuardError(ValueError):
88
- """User-facing deterministic failure."""
89
-
90
-
91
- def fail(message: str) -> NoReturn:
92
- raise CostGuardError(message)
93
-
94
-
95
- def reject_json_constant(value: str) -> NoReturn:
96
- raise ValueError(f"invalid JSON constant: {value}")
97
-
98
-
99
- def json_bytes(data: Any) -> str:
100
- try:
101
- return json.dumps(data, ensure_ascii=False, sort_keys=True, separators=(",", ":"), allow_nan=False)
102
- except ValueError as exc:
103
- fail(f"JSON value contained a non-finite number: {exc}")
104
-
105
-
106
- def require_json_object(data: Any, label: str) -> dict[str, Any]:
107
- if not isinstance(data, dict):
108
- fail(f"{label} must be a JSON object")
109
- return data
110
-
111
-
112
- def safe_int(value: Any, default: int = 0) -> int:
113
- try:
114
- return int(value)
115
- except (TypeError, ValueError, OverflowError):
116
- return default
117
-
118
-
119
- def finite_float_arg(value: Any, label: str, *, minimum: float = 0.0, allow_zero: bool = True) -> float:
120
- try:
121
- number = float(value)
122
- except (TypeError, ValueError, OverflowError):
123
- fail(f"{label} must be numeric")
124
- if not math.isfinite(number):
125
- fail(f"{label} must be finite")
126
- if allow_zero:
127
- if number < minimum:
128
- fail(f"{label} must be >= {minimum:g}")
129
- elif number <= minimum:
130
- fail(f"{label} must be > {minimum:g}")
131
- return number
132
-
133
-
134
- def non_negative_int_arg(value: str) -> int:
135
- try:
136
- number = int(value)
137
- except ValueError as exc:
138
- raise argparse.ArgumentTypeError("must be an integer") from exc
139
- if number < 0:
140
- raise argparse.ArgumentTypeError("must be >= 0")
141
- return number
142
-
143
-
144
- def byte_len_text(text: str) -> int:
145
- return len(text.encode("utf-8", errors="replace"))
146
-
147
-
148
- def token_proxy_text(text: str) -> int:
149
- if not text:
150
- return 0
151
- return max(1, math.ceil(len(text) / TOKEN_PROXY_CHARS_PER_TOKEN))
152
-
153
-
154
- def token_proxy_obj(data: Any) -> int:
155
- return token_proxy_text(json_bytes(data))
156
-
157
-
158
- def read_bounded_regular_path(path: str | Path, *, max_bytes: int, label: str) -> tuple[str, bool]:
159
- if max_bytes < 1 or max_bytes > MAX_MAX_BYTES:
160
- fail(f"max bytes must be between 1 and {MAX_MAX_BYTES}")
161
- p = reject_symlink_components(Path(path), label=label)
162
- leaf_name = _private_leaf_name(p, label=label)
163
- parent_fd = -1
164
- fd = -1
165
- try:
166
- parent_fd = open_directory_no_follow(p.parent, label=f"{label} parent")
167
- if not DIR_FD_STAT_NOFOLLOW_SUPPORTED:
168
- fail(f"{label} requires dir_fd stat support for symlink-safe regular-file validation")
169
- try:
170
- pre_st = os.stat(leaf_name, dir_fd=parent_fd, follow_symlinks=False)
171
- except OSError as exc:
172
- fail(f"could not inspect {label}: {os_error_detail(exc)}")
173
- if not stat.S_ISREG(pre_st.st_mode):
174
- fail(f"{label} must be a regular file")
175
- flags = _base_open_flags() | _no_follow_flag(label=label)
176
- if hasattr(os, "O_NONBLOCK"):
177
- flags |= os.O_NONBLOCK
178
- if hasattr(os, "O_NOCTTY"):
179
- flags |= os.O_NOCTTY
180
- fd = os.open(leaf_name, flags, dir_fd=parent_fd)
181
- if not stat.S_ISREG(os.fstat(fd).st_mode):
182
- fail(f"{label} must be a regular file")
183
- chunks: list[bytes] = []
184
- remaining = max_bytes + 1
185
- while remaining > 0:
186
- chunk = os.read(fd, min(64 * 1024, remaining))
187
- if not chunk:
188
- break
189
- chunks.append(chunk)
190
- remaining -= len(chunk)
191
- raw = b"".join(chunks)
192
- except CostGuardError:
193
- raise
194
- except OSError as exc:
195
- fail(f"could not read {label}: {os_error_detail(exc)}")
196
- finally:
197
- if fd >= 0:
198
- try:
199
- os.close(fd)
200
- except OSError:
201
- pass
202
- if parent_fd >= 0:
203
- try:
204
- os.close(parent_fd)
205
- except OSError:
206
- pass
207
- truncated = len(raw) > max_bytes
208
- if truncated:
209
- raw = raw[:max_bytes]
210
- return raw.decode("utf-8", errors="replace"), truncated
211
-
212
-
213
- def read_text_path(path: str, *, max_bytes: int = DEFAULT_MAX_BYTES) -> tuple[str, bool]:
214
- if max_bytes < 1 or max_bytes > MAX_MAX_BYTES:
215
- fail(f"max bytes must be between 1 and {MAX_MAX_BYTES}")
216
- if path == "-":
217
- raw = sys.stdin.buffer.read(max_bytes + 1)
218
- else:
219
- return read_bounded_regular_path(path, max_bytes=max_bytes, label="input file")
220
- truncated = len(raw) > max_bytes
221
- if truncated:
222
- raw = raw[:max_bytes]
223
- return raw.decode("utf-8", errors="replace"), truncated
224
-
225
-
226
- def load_json_input(path: str, *, max_bytes: int = DEFAULT_MAX_BYTES) -> tuple[Any, bool]:
227
- text, truncated = read_text_path(path, max_bytes=max_bytes)
228
- if truncated:
229
- fail("JSON input exceeded max bytes")
230
- try:
231
- data = json.loads(text, parse_constant=reject_json_constant)
232
- except json.JSONDecodeError as exc:
233
- fail(f"invalid JSON input at line {exc.lineno}: {exc.msg}")
234
- except ValueError as exc:
235
- fail(f"invalid JSON input: {exc}")
236
- return data, truncated
237
-
238
-
239
- def secret_count_in_text(text: str) -> int:
240
- return sum(1 for _ in SECRET_RE.finditer(text))
241
-
242
-
243
- def is_provider_cache_control(value: Any) -> bool:
244
- if not isinstance(value, dict):
245
- return False
246
- raw_type = value.get("type")
247
- raw_ttl = value.get("ttl")
248
- if raw_type is not None:
249
- return str(raw_type).strip().lower() == "ephemeral"
250
- if raw_ttl is None:
251
- return False
252
- ttl = str(raw_ttl).strip().lower()
253
- return ttl in {"5m", "1h", "60m", "hour"}
254
-
255
-
256
- def clone_jsonish(value: Any) -> Any:
257
- if isinstance(value, dict):
258
- return {str(k): clone_jsonish(v) for k, v in value.items()}
259
- if isinstance(value, list):
260
- return [clone_jsonish(item) for item in value]
261
- return value
262
-
263
-
264
- def strip_cache_control(value: Any) -> Any:
265
- """Strip a provider cache_control marker from this object only.
266
-
267
- `cache_control` can also be legitimate user/application data nested inside
268
- tool schemas. Keep nested values intact unless the caller explicitly selects
269
- a recognized provider container.
270
- """
271
- if isinstance(value, dict):
272
- return {
273
- str(k): clone_jsonish(v)
274
- for k, v in value.items()
275
- if not (k == "cache_control" and is_provider_cache_control(v))
276
- }
277
- if isinstance(value, list):
278
- return [clone_jsonish(item) for item in value]
279
- return value
280
-
281
-
282
- def strip_cache_control_at_path(value: Any, path: tuple[str, ...]) -> Any:
283
- if not path:
284
- return strip_cache_control(value)
285
- if isinstance(value, dict):
286
- head, *tail = path
287
- return {
288
- str(k): strip_cache_control_at_path(v, tuple(tail)) if str(k) == head else clone_jsonish(v)
289
- for k, v in value.items()
290
- }
291
- return clone_jsonish(value)
292
-
293
-
294
- def strip_known_cache_controls(request: Any) -> Any:
295
- """Strip provider cache_control markers only from recognized request slots."""
296
- if not isinstance(request, dict):
297
- return clone_jsonish(request)
298
- out = clone_jsonish(request)
299
-
300
- explicit = out.get("cache_breakpoints")
301
- if isinstance(explicit, list):
302
- out["cache_breakpoints"] = [
303
- strip_cache_control(item) if isinstance(item, dict) else clone_jsonish(item)
304
- for item in explicit
305
- ]
306
-
307
- tools = out.get("tools")
308
- if isinstance(tools, list):
309
- out["tools"] = [strip_cache_control(tool) if isinstance(tool, dict) else clone_jsonish(tool) for tool in tools]
310
-
311
- system = out.get("system")
312
- if isinstance(system, list):
313
- out["system"] = [
314
- strip_cache_control(block) if isinstance(block, dict) else clone_jsonish(block)
315
- for block in system
316
- ]
317
- system_cache = out.get("system_cache")
318
- if isinstance(system_cache, dict):
319
- out["system_cache"] = strip_cache_control(system_cache)
320
-
321
- messages = out.get("messages")
322
- if isinstance(messages, list):
323
- stripped_messages = []
324
- for message in messages:
325
- if not isinstance(message, dict):
326
- stripped_messages.append(clone_jsonish(message))
327
- continue
328
- stripped_message = strip_cache_control(message)
329
- content = stripped_message.get("content")
330
- if isinstance(content, list):
331
- stripped_message["content"] = [
332
- strip_cache_control(block) if isinstance(block, dict) else clone_jsonish(block)
333
- for block in content
334
- ]
335
- stripped_messages.append(stripped_message)
336
- out["messages"] = stripped_messages
337
-
338
- return out
339
-
340
-
341
- def cache_ttl(cache_control: Any) -> str:
342
- if not isinstance(cache_control, dict):
343
- return "5m"
344
- ttl = str(cache_control.get("ttl") or "5m").strip().lower()
345
- if ttl in {"1h", "60m", "hour"}:
346
- return "1h"
347
- return "5m"
348
-
349
-
350
- def find_cache_control(value: Any) -> dict[str, Any] | None:
351
- if isinstance(value, dict):
352
- cc = value.get("cache_control")
353
- if is_provider_cache_control(cc):
354
- return cc
355
- return None
356
-
357
-
358
- def has_unsupported_cache_control(value: Any) -> bool:
359
- return (
360
- isinstance(value, dict)
361
- and "cache_control" in value
362
- and not is_provider_cache_control(value.get("cache_control"))
363
- )
364
-
365
-
366
- @dataclass(frozen=True)
367
- class CacheBreakpoint:
368
- index: int
369
- kind: str
370
- ttl: str
371
- prefix: list[Any]
372
- section: Any
373
- unsupported: bool = False
374
-
375
- @property
376
- def breakpoint_id(self) -> str:
377
- return f"bp{self.index:03d}"
378
-
379
-
380
- def _prompt_unit(kind: str, value: Any, *, cache_control_path: tuple[str, ...] = (), **meta: Any) -> dict[str, Any]:
381
- out = {"kind": kind, "value": strip_cache_control_at_path(value, cache_control_path)}
382
- for key, val in sorted(meta.items()):
383
- if val is not None:
384
- out[key] = val
385
- return out
386
-
387
-
388
- def _append_unit(
389
- units: list[Any],
390
- breakpoints: list[CacheBreakpoint],
391
- *,
392
- kind: str,
393
- value: Any,
394
- cc: Any,
395
- cache_control_path: tuple[str, ...] = (),
396
- **meta: Any,
397
- ) -> None:
398
- unit = _prompt_unit(kind, value, cache_control_path=cache_control_path, **meta)
399
- units.append(unit)
400
- if isinstance(cc, dict):
401
- breakpoints.append(
402
- CacheBreakpoint(
403
- index=len(breakpoints) + 1,
404
- kind=kind,
405
- ttl=cache_ttl(cc),
406
- prefix=list(units),
407
- section=unit,
408
- )
409
- )
410
-
411
-
412
- def extract_cache_breakpoints(request: Any) -> tuple[list[CacheBreakpoint], dict[str, Any]]:
413
- """Return cache breakpoints as ordered canonical prompt prefixes.
414
-
415
- Anthropic prompt caching is prefix-oriented. This parser therefore hashes the
416
- canonical prompt material from the beginning of the request through each
417
- cache_control breakpoint, rather than hashing arbitrary snippets. The parser
418
- is intentionally conservative and emits confidence warnings for unrecognized
419
- cache_control layouts.
420
- """
421
- units: list[Any] = []
422
- breakpoints: list[CacheBreakpoint] = []
423
- unsupported_cache_controls = 0
424
-
425
- if not isinstance(request, dict):
426
- return [], {"request_shape": "unsupported", "unsupported_cache_controls": 0}
427
-
428
- explicit = request.get("cache_breakpoints")
429
- if isinstance(explicit, list):
430
- for item in explicit:
431
- if not isinstance(item, dict):
432
- unsupported_cache_controls += 1
433
- continue
434
- if "cache_control" in item:
435
- cc = find_cache_control(item)
436
- if cc is None:
437
- unsupported_cache_controls += 1
438
- else:
439
- cc = {"type": "ephemeral", "ttl": item.get("ttl", "5m")}
440
- _append_unit(units, breakpoints, kind=str(item.get("kind") or "explicit"), value=item, cc=cc)
441
-
442
- tools = request.get("tools")
443
- if isinstance(tools, list):
444
- for i, tool in enumerate(tools):
445
- cc = find_cache_control(tool)
446
- if has_unsupported_cache_control(tool):
447
- unsupported_cache_controls += 1
448
- _append_unit(units, breakpoints, kind="tool", value=tool, cc=cc, index=i)
449
- elif tools is not None:
450
- units.append(_prompt_unit("tools", tools))
451
-
452
- system = request.get("system")
453
- if isinstance(system, list):
454
- for i, block in enumerate(system):
455
- cc = find_cache_control(block)
456
- if has_unsupported_cache_control(block):
457
- unsupported_cache_controls += 1
458
- _append_unit(units, breakpoints, kind="system", value=block, cc=cc, index=i)
459
- elif system is not None:
460
- system_cache = request.get("system_cache") or {}
461
- cc = find_cache_control(system_cache)
462
- if has_unsupported_cache_control(system_cache):
463
- unsupported_cache_controls += 1
464
- _append_unit(units, breakpoints, kind="system", value=system, cc=cc)
465
-
466
- messages = request.get("messages")
467
- if isinstance(messages, list):
468
- for mi, message in enumerate(messages):
469
- if not isinstance(message, dict):
470
- _append_unit(units, breakpoints, kind="message", value=message, cc=None, index=mi)
471
- continue
472
- role = str(message.get("role") or "unknown")
473
- content = message.get("content")
474
- msg_cc = find_cache_control(message)
475
- if has_unsupported_cache_control(message):
476
- unsupported_cache_controls += 1
477
- if isinstance(content, list):
478
- for ci, block in enumerate(content):
479
- cc = find_cache_control(block)
480
- if has_unsupported_cache_control(block):
481
- unsupported_cache_controls += 1
482
- _append_unit(
483
- units,
484
- breakpoints,
485
- kind="message_content",
486
- value={"role": role, "content": block},
487
- cc=cc,
488
- cache_control_path=("content",),
489
- message_index=mi,
490
- content_index=ci,
491
- )
492
- if msg_cc and not any(find_cache_control(block) for block in content if isinstance(block, dict)):
493
- # Message-level cache_control around a list is less common, but keep a
494
- # conservative prefix fingerprint over the whole message.
495
- _append_unit(units, breakpoints, kind="message", value=message, cc=msg_cc, index=mi)
496
- else:
497
- _append_unit(units, breakpoints, kind="message", value=message, cc=msg_cc, index=mi)
498
- elif messages is not None:
499
- units.append(_prompt_unit("messages", messages))
500
-
501
- raw = json_bytes(request)
502
- found_cc = raw.count('"cache_control"')
503
- metadata = {
504
- "request_shape": "anthropic_like",
505
- "prompt_units": len(units),
506
- "unsupported_cache_controls": unsupported_cache_controls,
507
- "cache_control_markers": found_cc,
508
- }
509
- return breakpoints, metadata
510
-
511
-
512
- def ensure_private_dir(path: Path) -> None:
513
- path = reject_symlink_components(path, label="local HMAC ledger directory")
514
- path.mkdir(mode=0o700, parents=True, exist_ok=True)
515
- path = reject_symlink_components(path, label="local HMAC ledger directory")
516
- if not path.is_dir():
517
- fail("local HMAC ledger directory must be a directory")
518
- try:
519
- os.chmod(path, 0o700)
520
- except OSError as exc:
521
- if os.name == "posix":
522
- fail(f"could not secure local HMAC ledger directory: {os_error_detail(exc)}")
523
- return
524
- if os.name == "posix":
525
- try:
526
- mode = stat.S_IMODE(path.stat().st_mode)
527
- except OSError as exc:
528
- fail(f"could not verify local HMAC ledger directory privacy: {os_error_detail(exc)}")
529
- if mode != 0o700:
530
- fail("could not verify local HMAC ledger directory privacy: expected mode 0700")
531
-
532
-
533
- def os_error_detail(exc: OSError) -> str:
534
- detail = exc.strerror or exc.__class__.__name__
535
- if exc.errno is not None:
536
- return f"{detail} (errno {exc.errno})"
537
- return detail
538
-
539
-
540
- def _base_open_flags() -> int:
541
- flags = os.O_RDONLY
542
- if hasattr(os, "O_CLOEXEC"):
543
- flags |= os.O_CLOEXEC
544
- return flags
545
-
546
-
547
- def _no_follow_flag(*, label: str = "private local cost storage") -> int:
548
- if not NO_FOLLOW_SUPPORTED:
549
- fail(f"{label} requires O_NOFOLLOW support")
550
- return os.O_NOFOLLOW
551
-
552
-
553
- def _directory_open_flags(*, follow_final: bool = False, label: str = "private local cost storage") -> int:
554
- flags = os.O_RDONLY
555
- if hasattr(os, "O_CLOEXEC"):
556
- flags |= os.O_CLOEXEC
557
- if hasattr(os, "O_DIRECTORY"):
558
- flags |= os.O_DIRECTORY
559
- if not follow_final:
560
- flags |= _no_follow_flag(label=label)
561
- return flags
562
-
563
-
564
- def dir_fd_open_supported() -> bool:
565
- return DIR_FD_OPEN_SUPPORTED
566
-
567
-
568
- def _private_leaf_name(path: Path, *, label: str) -> str:
569
- name = path.name
570
- if name in {"", ".", ".."}:
571
- fail(f"{label} must name a private file")
572
- return name
573
-
574
-
575
- def _normalized_link_target(anchor: Path, raw_target: str) -> Path:
576
- target = Path(raw_target)
577
- if target.is_absolute():
578
- return Path(os.path.normpath(str(target)))
579
- return Path(os.path.normpath(str(anchor / target)))
580
-
581
-
582
- def normalize_allowed_first_absolute_symlink(path: Path) -> Path:
583
- """Normalize macOS /tmp and /var first-component symlinks only.
584
-
585
- Other symlink components are refused before reading or writing private local
586
- ledger/key material.
587
- """
588
-
589
- if not path.is_absolute():
590
- return path
591
- parts = path.parts
592
- if len(parts) < 2:
593
- return path
594
- first = parts[1]
595
- expected = ALLOWED_FIRST_COMPONENT_SYMLINKS.get(first)
596
- if expected is None:
597
- return path
598
- link = Path(path.anchor) / first
599
- try:
600
- if link.is_symlink() and _normalized_link_target(Path(path.anchor), os.readlink(link)) == expected:
601
- return expected.joinpath(*parts[2:])
602
- except OSError:
603
- return path
604
- return path
605
-
606
-
607
- def reject_symlink_components(path: Path, *, label: str) -> Path:
608
- path = normalize_allowed_first_absolute_symlink(path)
609
- current = Path(path.anchor) if path.is_absolute() else Path()
610
- parts = path.parts[1:] if path.is_absolute() else path.parts
611
- for part in parts:
612
- if part in {"", "."}:
613
- continue
614
- if part == "..":
615
- fail(f"{label} must not contain parent traversal")
616
- current = current / part
617
- try:
618
- if current.is_symlink():
619
- fail(f"{label} must not traverse symlinks")
620
- except OSError as exc:
621
- fail(f"could not inspect {label}: {os_error_detail(exc)}")
622
- return path
623
-
624
-
625
- def open_directory_no_follow(path: Path, *, label: str) -> int:
626
- """Open an existing directory without following symlink path components."""
627
-
628
- if not dir_fd_open_supported():
629
- fail(f"{label} requires dir_fd support for symlink-safe directory traversal")
630
- path = reject_symlink_components(path, label=label)
631
- flags = _directory_open_flags(label=label)
632
- if path.is_absolute():
633
- anchor = path.anchor or os.sep
634
- parts = path.parts[1:]
635
- try:
636
- current_fd = os.open(anchor, _directory_open_flags(follow_final=True, label=label))
637
- except OSError as exc:
638
- fail(f"could not inspect {label}: {os_error_detail(exc)}")
639
- else:
640
- parts = path.parts
641
- try:
642
- current_fd = os.open(".", flags)
643
- except OSError as exc:
644
- fail(f"could not inspect {label}: {os_error_detail(exc)}")
645
- try:
646
- for part in parts:
647
- if part in {"", "."}:
648
- continue
649
- if part == "..":
650
- fail(f"{label} must not contain parent traversal")
651
- next_fd = -1
652
- try:
653
- next_fd = os.open(part, flags, dir_fd=current_fd)
654
- st = os.fstat(next_fd)
655
- if not stat.S_ISDIR(st.st_mode):
656
- fail(f"{label} must not traverse non-directory components")
657
- except CostGuardError:
658
- if next_fd >= 0:
659
- try:
660
- os.close(next_fd)
661
- except OSError:
662
- pass
663
- raise
664
- except OSError as exc:
665
- if next_fd >= 0:
666
- try:
667
- os.close(next_fd)
668
- except OSError:
669
- pass
670
- fail(f"could not inspect {label}: {os_error_detail(exc)}")
671
- finally:
672
- try:
673
- os.close(current_fd)
674
- except OSError:
675
- pass
676
- current_fd = next_fd
677
- owned_fd = current_fd
678
- current_fd = -1
679
- return owned_fd
680
- finally:
681
- if current_fd >= 0:
682
- try:
683
- os.close(current_fd)
684
- except OSError:
685
- pass
686
-
687
-
688
- def open_private_directory(path: Path, *, label: str) -> int:
689
- """Open an existing private-storage directory without following symlinks."""
690
-
691
- return open_directory_no_follow(path, label=label)
692
-
693
-
694
- def fsync_directory_fd(fd: int) -> None:
695
- if os.name != "posix":
696
- return
697
- try:
698
- os.fsync(fd)
699
- except OSError:
700
- pass
701
-
702
-
703
- def lock_guidance() -> str:
704
- return f"<store-dir>/{KEY_NAME}.lock"
705
-
706
-
707
- def ensure_hmac_key_private_mode(key_path: Path, *, label: str = "local HMAC key file") -> None:
708
- try:
709
- os.chmod(key_path, 0o600)
710
- except OSError as exc:
711
- if os.name == "posix":
712
- fail(f"could not secure {label}: {os_error_detail(exc)}")
713
- return
714
- if os.name == "posix":
715
- try:
716
- mode = stat.S_IMODE(key_path.stat().st_mode)
717
- except OSError as exc:
718
- fail(f"could not verify {label} privacy: {os_error_detail(exc)}")
719
- if mode != 0o600:
720
- fail(f"could not verify {label} privacy: expected mode 0600")
721
-
722
-
723
- def open_private_regular_fd_for_read(path: Path, *, label: str) -> int:
724
- path = normalize_allowed_first_absolute_symlink(path)
725
- leaf_name = _private_leaf_name(path, label=label)
726
- try:
727
- if path.is_symlink():
728
- fail(f"{label} must not be a symlink")
729
- except OSError as exc:
730
- fail(f"could not inspect {label}: {os_error_detail(exc)}")
731
- parent_fd = -1
732
- fd = -1
733
- try:
734
- parent_fd = open_private_directory(path.parent, label=f"{label} parent")
735
- fd = os.open(leaf_name, _base_open_flags() | _no_follow_flag(label=label), dir_fd=parent_fd)
736
- st = os.fstat(fd)
737
- if not stat.S_ISREG(st.st_mode):
738
- fail(f"{label} must be a regular file")
739
- try:
740
- os.fchmod(fd, 0o600)
741
- except (AttributeError, OSError):
742
- pass
743
- st = os.fstat(fd)
744
- if os.name == "posix" and stat.S_IMODE(st.st_mode) != 0o600:
745
- fail(f"could not verify {label} privacy: expected mode 0600")
746
- owned_fd = fd
747
- fd = -1
748
- return owned_fd
749
- except CostGuardError:
750
- raise
751
- except OSError as exc:
752
- fail(f"could not read {label}: {os_error_detail(exc)}")
753
- finally:
754
- if fd >= 0:
755
- try:
756
- os.close(fd)
757
- except OSError:
758
- pass
759
- if parent_fd >= 0:
760
- try:
761
- os.close(parent_fd)
762
- except OSError:
763
- pass
764
-
765
-
766
- def open_private_regular_file_for_read(path: Path, *, label: str):
767
- fd = open_private_regular_fd_for_read(path, label=label)
768
- try:
769
- handle = os.fdopen(fd, "r", encoding="utf-8")
770
- fd = -1
771
- return handle
772
- finally:
773
- if fd >= 0:
774
- try:
775
- os.close(fd)
776
- except OSError:
777
- pass
778
-
779
-
780
- def read_hmac_key(key_path: Path) -> bytes:
781
- try:
782
- with open_private_regular_file_for_read(key_path, label="local HMAC key file") as handle:
783
- raw = handle.read()
784
- except UnicodeError:
785
- fail("invalid local HMAC key file: expected UTF-8 canonical URL-safe base64 text")
786
- try:
787
- raw_ascii = raw.encode("ascii")
788
- except UnicodeEncodeError:
789
- fail("invalid local HMAC key file: expected ASCII canonical URL-safe base64 text")
790
- if not HMAC_KEY_RE.fullmatch(raw):
791
- fail("invalid local HMAC key file: expected canonical URL-safe 32-byte key")
792
- try:
793
- key = base64.b64decode(raw_ascii, altchars=b"-_", validate=True)
794
- except (binascii.Error, ValueError):
795
- fail("invalid local HMAC key file: invalid canonical URL-safe base64")
796
- if base64.urlsafe_b64encode(key).decode("ascii") != raw:
797
- fail("invalid local HMAC key file: expected canonical URL-safe 32-byte key")
798
- if len(key) != 32:
799
- fail("invalid local HMAC key file: expected 32 decoded bytes")
800
- return key
801
-
802
-
803
- def fsync_parent_dir(path: Path) -> None:
804
- if os.name != "posix":
805
- return
806
- try:
807
- fd = os.open(path.parent, os.O_RDONLY)
808
- except OSError:
809
- return
810
- try:
811
- os.fsync(fd)
812
- except OSError:
813
- pass
814
- finally:
815
- try:
816
- os.close(fd)
817
- except OSError:
818
- pass
819
-
820
-
821
- def write_all(fd: int, data: bytes) -> None:
822
- view = memoryview(data)
823
- total = 0
824
- while total < len(data):
825
- written = os.write(fd, view[total:])
826
- if written <= 0:
827
- raise OSError("short write to local private file")
828
- total += written
829
-
830
-
831
- def lock_file_exclusive(fd: int, *, label: str) -> bool:
832
- if fcntl is None:
833
- fail(f"could not lock {label}: platform file locking unavailable")
834
- try:
835
- fcntl.flock(fd, fcntl.LOCK_EX)
836
- except OSError as exc:
837
- fail(f"could not lock {label}: {os_error_detail(exc)}")
838
- return True
839
-
840
-
841
- def unlock_file(fd: int) -> None:
842
- if fcntl is None:
843
- return
844
- try:
845
- fcntl.flock(fd, fcntl.LOCK_UN)
846
- except OSError:
847
- pass
848
-
849
-
850
- @dataclass(frozen=True)
851
- class KeyLock:
852
- nonce: str
853
- metadata_written: bool
854
-
855
-
856
- def write_key_lock_metadata(lock_dir: Path) -> KeyLock:
857
- reject_symlink_components(lock_dir, label="local HMAC key lock directory")
858
- nonce = secrets.token_hex(8)
859
- metadata = {
860
- "pid": os.getpid(),
861
- "created_at_unix": time.time(),
862
- "nonce": nonce,
863
- }
864
- lock_fd = -1
865
- fd = -1
866
- try:
867
- lock_fd = open_private_directory(lock_dir, label="local HMAC key lock directory")
868
- flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC | _no_follow_flag()
869
- if hasattr(os, "O_CLOEXEC"):
870
- flags |= os.O_CLOEXEC
871
- fd = os.open(LOCK_OWNER_NAME, flags, 0o600, dir_fd=lock_fd)
872
- st = os.fstat(fd)
873
- if not stat.S_ISREG(st.st_mode):
874
- fail("local HMAC key lock metadata must be a regular file")
875
- try:
876
- os.fchmod(fd, 0o600)
877
- except (AttributeError, OSError):
878
- pass
879
- st = os.fstat(fd)
880
- if os.name == "posix" and stat.S_IMODE(st.st_mode) != 0o600:
881
- fail("could not verify local HMAC key lock metadata privacy: expected mode 0600")
882
- write_all(fd, json_bytes(metadata).encode("utf-8"))
883
- write_all(fd, b"\n")
884
- os.fsync(fd)
885
- os.close(fd)
886
- fd = -1
887
- fsync_directory_fd(lock_fd)
888
- return KeyLock(nonce=nonce, metadata_written=True)
889
- except OSError:
890
- return KeyLock(nonce=nonce, metadata_written=False)
891
- finally:
892
- if fd >= 0:
893
- try:
894
- os.close(fd)
895
- except OSError:
896
- pass
897
- if lock_fd >= 0:
898
- try:
899
- os.close(lock_fd)
900
- except OSError:
901
- pass
902
-
903
-
904
- def key_lock_age_seconds(lock_dir: Path, now: float | None = None) -> float:
905
- current = time.time() if now is None else now
906
- metadata_path = lock_dir / LOCK_OWNER_NAME
907
- try:
908
- metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
909
- if isinstance(metadata, dict):
910
- created = metadata.get("created_at_unix")
911
- if type(created) in (int, float) and math.isfinite(float(created)):
912
- created_float = float(created)
913
- if 0 <= created_float <= current + KEY_LOCK_METADATA_CLOCK_SKEW_SECONDS:
914
- return max(0.0, current - created_float)
915
- except (OSError, UnicodeError, json.JSONDecodeError, TypeError, ValueError, OverflowError):
916
- pass
917
- try:
918
- return max(0.0, current - lock_dir.stat().st_mtime)
919
- except OSError:
920
- return 0.0
921
-
922
-
923
- def path_mtime_age_seconds(path: Path, now: float | None = None) -> float:
924
- current = time.time() if now is None else now
925
- try:
926
- return max(0.0, current - path.stat().st_mtime)
927
- except OSError:
928
- return 0.0
929
-
930
-
931
- def reclaim_stale_key_lock(lock_dir: Path, key_path: Path) -> bool:
932
- if key_path.exists():
933
- return False
934
- if key_lock_age_seconds(lock_dir) < KEY_LOCK_STALE_SECONDS:
935
- return False
936
- if key_path.exists():
937
- return False
938
- stale_dir = lock_dir.with_name(f"{lock_dir.name}.stale.{os.getpid()}.{secrets.token_hex(8)}")
939
- try:
940
- os.rename(lock_dir, stale_dir)
941
- except OSError:
942
- return False
943
- try:
944
- shutil.rmtree(stale_dir)
945
- except OSError:
946
- pass
947
- return True
948
-
949
-
950
- def key_lock_owner_matches(lock_dir: Path, lock: KeyLock) -> bool:
951
- if not lock.metadata_written:
952
- return False
953
- try:
954
- metadata = json.loads((lock_dir / LOCK_OWNER_NAME).read_text(encoding="utf-8"))
955
- except (OSError, UnicodeError, json.JSONDecodeError):
956
- return False
957
- return (
958
- isinstance(metadata, dict)
959
- and metadata.get("nonce") == lock.nonce
960
- and metadata.get("pid") == os.getpid()
961
- )
962
-
963
-
964
- def cleanup_orphaned_stale_key_locks(store_dir: Path) -> None:
965
- stale_prefix = f"{KEY_NAME}.lock.stale."
966
- cleanup_prefix = f"{KEY_NAME}.lock.cleanup."
967
- try:
968
- candidates = list(store_dir.iterdir())
969
- except OSError:
970
- return
971
- for candidate in candidates:
972
- should_remove = candidate.name.startswith(stale_prefix)
973
- if candidate.name.startswith(cleanup_prefix):
974
- should_remove = path_mtime_age_seconds(candidate) >= KEY_LOCK_STALE_SECONDS
975
- if not should_remove:
976
- continue
977
- try:
978
- if candidate.is_dir():
979
- shutil.rmtree(candidate)
980
- else:
981
- candidate.unlink()
982
- except OSError:
983
- pass
984
-
985
-
986
- def cleanup_key_lock(lock_dir: Path, lock: KeyLock) -> None:
987
- if not key_lock_owner_matches(lock_dir, lock):
988
- return
989
- cleanup_dir = lock_dir.with_name(f"{lock_dir.name}.cleanup.{os.getpid()}.{secrets.token_hex(8)}")
990
- try:
991
- os.rename(lock_dir, cleanup_dir)
992
- except OSError:
993
- return
994
- if not key_lock_owner_matches(cleanup_dir, lock):
995
- try:
996
- if not lock_dir.exists():
997
- os.rename(cleanup_dir, lock_dir)
998
- except OSError:
999
- pass
1000
- return
1001
- try:
1002
- shutil.rmtree(cleanup_dir)
1003
- except OSError:
1004
- pass
1005
-
1006
-
1007
- def acquire_key_lock(lock_dir: Path, key_path: Path) -> KeyLock | None:
1008
- for _ in range(KEY_LOCK_WAIT_ATTEMPTS):
1009
- try:
1010
- os.mkdir(lock_dir, 0o700)
1011
- try:
1012
- os.chmod(lock_dir, 0o700)
1013
- except OSError:
1014
- pass
1015
- lock = write_key_lock_metadata(lock_dir)
1016
- if not lock.metadata_written:
1017
- try:
1018
- shutil.rmtree(lock_dir)
1019
- except OSError:
1020
- pass
1021
- fail("could not write local HMAC key lock metadata; retry")
1022
- return lock
1023
- except FileExistsError:
1024
- if key_path.exists():
1025
- return None
1026
- if reclaim_stale_key_lock(lock_dir, key_path):
1027
- continue
1028
- if key_path.exists():
1029
- return None
1030
- time.sleep(KEY_LOCK_POLL_SECONDS)
1031
- except OSError as exc:
1032
- fail(f"could not create local HMAC key lock at {lock_guidance()}: {os_error_detail(exc)}")
1033
- if key_path.exists():
1034
- return None
1035
- fail(f"timed out waiting for local HMAC key lock; remove stale {lock_guidance()}")
1036
-
1037
-
1038
- def load_or_create_hmac_key(store_dir: Path) -> bytes:
1039
- store_dir = normalize_allowed_first_absolute_symlink(store_dir)
1040
- ensure_private_dir(store_dir)
1041
- cleanup_orphaned_stale_key_locks(store_dir)
1042
- key_path = store_dir / KEY_NAME
1043
- if key_path.exists():
1044
- return read_hmac_key(key_path)
1045
-
1046
- lock_dir = store_dir / f"{KEY_NAME}.lock"
1047
- locked = acquire_key_lock(lock_dir, key_path)
1048
- if locked is None:
1049
- return read_hmac_key(key_path)
1050
-
1051
- store_fd = -1
1052
- tmp_leaf: str | None = None
1053
- try:
1054
- if key_path.exists():
1055
- return read_hmac_key(key_path)
1056
- key = secrets.token_bytes(32)
1057
- encoded = base64.urlsafe_b64encode(key)
1058
- store_fd = open_private_directory(store_dir, label="local HMAC ledger directory")
1059
- tmp_leaf = f"{KEY_NAME}.{os.getpid()}.{secrets.token_hex(8)}.tmp"
1060
- try:
1061
- flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | _no_follow_flag()
1062
- if hasattr(os, "O_CLOEXEC"):
1063
- flags |= os.O_CLOEXEC
1064
- fd = os.open(tmp_leaf, flags, 0o600, dir_fd=store_fd)
1065
- except OSError as exc:
1066
- fail(f"could not create local HMAC key file: {os_error_detail(exc)}")
1067
- close_error: OSError | None = None
1068
- try:
1069
- st = os.fstat(fd)
1070
- if not stat.S_ISREG(st.st_mode):
1071
- fail("local HMAC key file must be a regular file")
1072
- try:
1073
- os.fchmod(fd, 0o600)
1074
- except (AttributeError, OSError):
1075
- pass
1076
- st = os.fstat(fd)
1077
- if os.name == "posix" and stat.S_IMODE(st.st_mode) != 0o600:
1078
- fail("could not verify local HMAC key file privacy: expected mode 0600")
1079
- write_all(fd, encoded)
1080
- os.fsync(fd)
1081
- except CostGuardError:
1082
- raise
1083
- except OSError as exc:
1084
- fail(f"could not write local HMAC key file: {os_error_detail(exc)}")
1085
- finally:
1086
- try:
1087
- os.close(fd)
1088
- except OSError as exc:
1089
- close_error = exc
1090
- if close_error is not None:
1091
- fail(f"could not write local HMAC key file: {os_error_detail(close_error)}")
1092
- if locked.metadata_written and not key_lock_owner_matches(lock_dir, locked):
1093
- if key_path.exists():
1094
- return read_hmac_key(key_path)
1095
- fail("lost local HMAC key lock; retry")
1096
- try:
1097
- os.replace(tmp_leaf, KEY_NAME, src_dir_fd=store_fd, dst_dir_fd=store_fd)
1098
- except TypeError:
1099
- fail("could not persist local HMAC key file: platform dir_fd replace unavailable")
1100
- except OSError as exc:
1101
- fail(f"could not persist local HMAC key file: {os_error_detail(exc)}")
1102
- tmp_leaf = None
1103
- fsync_directory_fd(store_fd)
1104
- # Re-read the persisted file so callers always use the same bytes future
1105
- # ledger lookups will use. The lock prevents first-use races without
1106
- # relying on hard links or replacing another process's winner key.
1107
- return read_hmac_key(key_path)
1108
- finally:
1109
- if tmp_leaf is not None:
1110
- try:
1111
- if store_fd >= 0:
1112
- os.unlink(tmp_leaf, dir_fd=store_fd)
1113
- else:
1114
- (store_dir / tmp_leaf).unlink()
1115
- except OSError:
1116
- pass
1117
- if store_fd >= 0:
1118
- try:
1119
- os.close(store_fd)
1120
- except OSError:
1121
- pass
1122
- cleanup_key_lock(lock_dir, locked)
1123
-
1124
-
1125
- def keyed_hmac(key: bytes, text: str) -> str:
1126
- return hmac.new(key, text.encode("utf-8", errors="replace"), hashlib.sha256).hexdigest()
1127
-
1128
-
1129
- def ledger_path(store_dir: Path) -> Path:
1130
- return store_dir / LEDGER_NAME
1131
-
1132
-
1133
- def parse_ledger_line(raw_line: bytes) -> dict[str, Any] | None:
1134
- try:
1135
- line = raw_line.decode("utf-8").strip()
1136
- except UnicodeDecodeError:
1137
- return None
1138
- if not line:
1139
- return None
1140
- try:
1141
- row = json.loads(line, parse_constant=reject_json_constant)
1142
- except (json.JSONDecodeError, ValueError):
1143
- return None
1144
- if isinstance(row, dict):
1145
- return row
1146
- return None
1147
-
1148
-
1149
- def parse_ledger_lines(raw_lines: list[bytes]) -> list[dict[str, Any]]:
1150
- rows: list[dict[str, Any]] = []
1151
- for raw_line in raw_lines:
1152
- row = parse_ledger_line(raw_line)
1153
- if row is not None:
1154
- rows.append(row)
1155
- return rows
1156
-
1157
-
1158
- def tail_recent_ledger_rows(handle, *, initial_bytes: int, max_rows: int) -> list[dict[str, Any]]:
1159
- if max_rows <= 0:
1160
- return []
1161
- handle.seek(0, os.SEEK_END)
1162
- size = handle.tell()
1163
- if size <= 0:
1164
- return []
1165
- window = max(1, int(initial_bytes))
1166
- while True:
1167
- start = max(0, size - window)
1168
- handle.seek(start)
1169
- data = handle.read(size - start)
1170
- if start > 0:
1171
- newline_at = data.find(b"\n")
1172
- if newline_at < 0:
1173
- candidate_lines: list[bytes] = []
1174
- else:
1175
- candidate_lines = data[newline_at + 1 :].split(b"\n")
1176
- else:
1177
- candidate_lines = data.split(b"\n")
1178
- rows = parse_ledger_lines(candidate_lines)
1179
- if start == 0:
1180
- return rows[-max_rows:]
1181
- if len(rows) >= max_rows:
1182
- return rows[-max_rows:]
1183
- window = min(size, window * 2)
1184
-
1185
-
1186
- def open_private_regular_file_for_append(path: Path, *, label: str) -> int:
1187
- path = normalize_allowed_first_absolute_symlink(path)
1188
- leaf_name = _private_leaf_name(path, label=label)
1189
- try:
1190
- if path.is_symlink():
1191
- fail(f"{label} must not be a symlink")
1192
- except OSError as exc:
1193
- fail(f"could not inspect {label}: {os_error_detail(exc)}")
1194
- flags = os.O_WRONLY | os.O_CREAT | os.O_APPEND | _no_follow_flag()
1195
- if hasattr(os, "O_CLOEXEC"):
1196
- flags |= os.O_CLOEXEC
1197
- for attempt in range(LEDGER_OPEN_RETRY_ATTEMPTS):
1198
- parent_fd = -1
1199
- fd = -1
1200
- try:
1201
- parent_fd = open_private_directory(path.parent, label=f"{label} parent")
1202
- fd = os.open(leaf_name, flags, 0o600, dir_fd=parent_fd)
1203
- st = os.fstat(fd)
1204
- if not stat.S_ISREG(st.st_mode):
1205
- fail(f"{label} must be a regular file")
1206
- try:
1207
- os.fchmod(fd, 0o600)
1208
- except (AttributeError, OSError):
1209
- pass
1210
- st = os.fstat(fd)
1211
- if os.name == "posix" and stat.S_IMODE(st.st_mode) != 0o600:
1212
- fail(f"could not verify {label} privacy: expected mode 0600")
1213
- owned_fd = fd
1214
- fd = -1
1215
- return owned_fd
1216
- except CostGuardError:
1217
- raise
1218
- except OSError as exc:
1219
- if exc.errno == errno.ENOENT and attempt + 1 < LEDGER_OPEN_RETRY_ATTEMPTS:
1220
- time.sleep(LEDGER_OPEN_RETRY_SECONDS)
1221
- continue
1222
- fail(f"could not open {label}: {os_error_detail(exc)}")
1223
- finally:
1224
- if fd >= 0:
1225
- # Ownership transfers to the caller only on the successful
1226
- # return above. On errors, close before surfacing a
1227
- # deterministic message.
1228
- try:
1229
- os.close(fd)
1230
- except OSError:
1231
- pass
1232
- if parent_fd >= 0:
1233
- try:
1234
- os.close(parent_fd)
1235
- except OSError:
1236
- pass
1237
- raise AssertionError("unreachable: append retry loop exits via return or fail")
1238
-
1239
-
1240
- def load_ledger(store_dir: Path) -> list[dict[str, Any]]:
1241
- store_dir = normalize_allowed_first_absolute_symlink(store_dir)
1242
- path = ledger_path(store_dir)
1243
- if not path.exists():
1244
- return []
1245
- rows: list[dict[str, Any]] = []
1246
- fd = open_private_regular_fd_for_read(path, label="local HMAC ledger file")
1247
- try:
1248
- with os.fdopen(fd, "rb") as fh:
1249
- fd = -1
1250
- rows = tail_recent_ledger_rows(fh, initial_bytes=LEDGER_TAIL_INITIAL_BYTES, max_rows=MAX_LEDGER_ROWS)
1251
- finally:
1252
- if fd >= 0:
1253
- try:
1254
- os.close(fd)
1255
- except OSError:
1256
- pass
1257
- return rows[-MAX_LEDGER_ROWS:]
1258
-
1259
-
1260
- def append_ledger(store_dir: Path, entry: dict[str, Any]) -> None:
1261
- store_dir = normalize_allowed_first_absolute_symlink(store_dir)
1262
- ensure_private_dir(store_dir)
1263
- path = ledger_path(store_dir)
1264
- # JSONL is append-only. Hold an advisory file lock while looping over
1265
- # os.write so short writes do not interleave with cooperating local wrappers;
1266
- # load_ledger also tolerates any pre-existing malformed/partial line by
1267
- # skipping it.
1268
- fd = open_private_regular_file_for_append(path, label="local HMAC ledger file")
1269
- locked = False
1270
- try:
1271
- locked = lock_file_exclusive(fd, label="local HMAC ledger file")
1272
- write_all(fd, (json_bytes(entry) + "\n").encode("utf-8"))
1273
- os.fsync(fd)
1274
- finally:
1275
- if locked:
1276
- unlock_file(fd)
1277
- os.close(fd)
1278
-
1279
-
1280
- def latest_fingerprint_rows(rows: list[dict[str, Any]]) -> dict[tuple[str, str], dict[str, Any]]:
1281
- latest: dict[tuple[str, str], dict[str, Any]] = {}
1282
- for row in rows:
1283
- if row.get("kind") != "observe":
1284
- continue
1285
- model = str(row.get("model") or "unknown")
1286
- created = safe_int(row.get("created_at_unix") or 0, 0)
1287
- for fp in row.get("fingerprints", []) if isinstance(row.get("fingerprints"), list) else []:
1288
- if not isinstance(fp, dict):
1289
- continue
1290
- digest = fp.get("hmac")
1291
- if not isinstance(digest, str):
1292
- continue
1293
- key = (model, digest)
1294
- old = latest.get(key)
1295
- if old is None or created >= safe_int(old.get("created_at_unix") or 0, 0):
1296
- merged = dict(fp)
1297
- merged["created_at_unix"] = created
1298
- merged["model"] = model
1299
- latest[key] = merged
1300
- return latest
1301
-
1302
-
1303
- def default_pricing_profile() -> dict[str, Any]:
1304
- return {
1305
- "name": "anthropic-default-2026-06",
1306
- "source": "Anthropic pricing docs retrieved 2026-06-05; recheck before release or billing assertions.",
1307
- "source_urls": [ANTHROPIC_DOCS_URL, ANTHROPIC_PRICING_URL],
1308
- "checked_at": "2026-06-05",
1309
- "release_recheck_required": True,
1310
- "usd_to_krw": DEFAULT_USD_TO_KRW,
1311
- "cache_write_multipliers": {"5m": 1.25, "1h": 2.0},
1312
- "cache_read_multiplier": 0.10,
1313
- "default_input_usd_per_mtok": 3.0,
1314
- "default_output_usd_per_mtok": 15.0,
1315
- "models": {
1316
- "opus 4.8": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
1317
- "opus-4-8": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
1318
- "opus 4.7": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
1319
- "opus-4-7": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
1320
- "opus 4.6": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
1321
- "opus-4-6": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
1322
- "opus 4.5": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
1323
- "opus-4-5": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
1324
- "opus 4.1": {"input_usd_per_mtok": 15.0, "output_usd_per_mtok": 75.0},
1325
- "opus-4-1": {"input_usd_per_mtok": 15.0, "output_usd_per_mtok": 75.0},
1326
- "opus 4": {"input_usd_per_mtok": 15.0, "output_usd_per_mtok": 75.0},
1327
- "opus-4": {"input_usd_per_mtok": 15.0, "output_usd_per_mtok": 75.0},
1328
- "sonnet 4.6": {"input_usd_per_mtok": 3.0, "output_usd_per_mtok": 15.0},
1329
- "sonnet-4-6": {"input_usd_per_mtok": 3.0, "output_usd_per_mtok": 15.0},
1330
- "sonnet 4.5": {"input_usd_per_mtok": 3.0, "output_usd_per_mtok": 15.0},
1331
- "sonnet-4-5": {"input_usd_per_mtok": 3.0, "output_usd_per_mtok": 15.0},
1332
- "sonnet 4": {"input_usd_per_mtok": 3.0, "output_usd_per_mtok": 15.0},
1333
- "sonnet-4": {"input_usd_per_mtok": 3.0, "output_usd_per_mtok": 15.0},
1334
- "haiku 4.5": {"input_usd_per_mtok": 1.0, "output_usd_per_mtok": 5.0},
1335
- "haiku-4-5": {"input_usd_per_mtok": 1.0, "output_usd_per_mtok": 5.0},
1336
- "haiku 3.5": {"input_usd_per_mtok": 0.80, "output_usd_per_mtok": 4.0},
1337
- "haiku-3-5": {"input_usd_per_mtok": 0.80, "output_usd_per_mtok": 4.0},
1338
- "sonnet": {"input_usd_per_mtok": 3.0, "output_usd_per_mtok": 15.0},
1339
- "haiku": {"input_usd_per_mtok": 1.0, "output_usd_per_mtok": 5.0},
1340
- "opus": {"input_usd_per_mtok": 5.0, "output_usd_per_mtok": 25.0},
1341
- },
1342
- }
1343
-
1344
-
1345
- def load_pricing_profile(raw: str | None, *, max_bytes: int = DEFAULT_MAX_BYTES) -> dict[str, Any]:
1346
- profile = default_pricing_profile()
1347
- if not raw:
1348
- return profile
1349
- try:
1350
- if raw.lstrip().startswith("{"):
1351
- override = json.loads(raw, parse_constant=reject_json_constant)
1352
- else:
1353
- text, truncated = read_bounded_regular_path(raw, max_bytes=max_bytes, label="pricing profile")
1354
- if truncated:
1355
- fail("pricing profile exceeded max bytes")
1356
- override = json.loads(text, parse_constant=reject_json_constant)
1357
- except CostGuardError:
1358
- raise
1359
- except (OSError, json.JSONDecodeError, ValueError) as exc:
1360
- fail(f"could not load pricing profile: {exc}")
1361
- if not isinstance(override, dict):
1362
- fail("pricing profile must be a JSON object")
1363
- merged = merge_dict(profile, override)
1364
- if "models" in override:
1365
- # A user-supplied model map is an explicit pricing contract for this
1366
- # run. Do not let bundled release-time defaults shadow a generic custom
1367
- # key such as "sonnet" with a more specific built-in key.
1368
- merged["models"] = override["models"]
1369
- return merged
1370
-
1371
-
1372
- def merge_dict(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
1373
- out = dict(base)
1374
- for key, value in override.items():
1375
- if isinstance(value, dict) and isinstance(out.get(key), dict):
1376
- out[key] = merge_dict(out[key], value) # type: ignore[arg-type]
1377
- else:
1378
- out[key] = value
1379
- return out
1380
-
1381
-
1382
- def float_field(data: dict[str, Any], key: str, default: float) -> float:
1383
- try:
1384
- val = float(data.get(key, default))
1385
- except (TypeError, ValueError, OverflowError):
1386
- return default
1387
- if not math.isfinite(val) or val < 0:
1388
- return default
1389
- return val
1390
-
1391
-
1392
- def rates_for_model(profile: dict[str, Any], model: str) -> tuple[float, float, str]:
1393
- model_l = model.lower()
1394
- model_norm = re.sub(r"[^a-z0-9]+", "-", model_l).strip("-")
1395
- model_tokens = set(tok for tok in model_norm.split("-") if tok)
1396
- models = profile.get("models") if isinstance(profile.get("models"), dict) else {}
1397
- if isinstance(models, dict):
1398
- def match_specificity(item: tuple[Any, Any]) -> tuple[int, int]:
1399
- key_norm = re.sub(r"[^a-z0-9]+", "-", str(item[0]).lower()).strip("-")
1400
- return (len([tok for tok in key_norm.split("-") if tok]), len(key_norm))
1401
-
1402
- for key, raw in sorted(models.items(), key=match_specificity, reverse=True):
1403
- key_l = str(key).lower()
1404
- key_norm = re.sub(r"[^a-z0-9]+", "-", key_l).strip("-")
1405
- key_tokens = [tok for tok in key_norm.split("-") if tok]
1406
- token_subset_match = bool(key_tokens) and all(tok in model_tokens for tok in key_tokens)
1407
- if isinstance(raw, dict) and (key_l in model_l or key_norm in model_norm or token_subset_match):
1408
- return (
1409
- float_field(raw, "input_usd_per_mtok", float_field(profile, "default_input_usd_per_mtok", 3.0)),
1410
- float_field(raw, "output_usd_per_mtok", float_field(profile, "default_output_usd_per_mtok", 15.0)),
1411
- str(key),
1412
- )
1413
- return (
1414
- float_field(profile, "default_input_usd_per_mtok", 3.0),
1415
- float_field(profile, "default_output_usd_per_mtok", 15.0),
1416
- "default",
1417
- )
1418
-
1419
-
1420
- def pricing_multipliers(profile: dict[str, Any]) -> tuple[dict[str, float], float]:
1421
- raw = profile.get("cache_write_multipliers")
1422
- write = {"5m": 1.25, "1h": 2.0}
1423
- if isinstance(raw, dict):
1424
- for ttl in ("5m", "1h"):
1425
- try:
1426
- value = float(raw.get(ttl, write[ttl]))
1427
- except (TypeError, ValueError, OverflowError):
1428
- value = write[ttl]
1429
- if math.isfinite(value) and value >= 0:
1430
- write[ttl] = value
1431
- read = float_field(profile, "cache_read_multiplier", 0.10)
1432
- return write, read
1433
-
1434
-
1435
- def usd_to_krw(profile: dict[str, Any], override: float | None = None) -> float:
1436
- if override is not None:
1437
- return finite_float_arg(override, "--usd-to-krw", minimum=0.0, allow_zero=False)
1438
- rate = float_field(profile, "usd_to_krw", DEFAULT_USD_TO_KRW)
1439
- if rate <= 0:
1440
- fail("pricing profile usd_to_krw must be > 0")
1441
- return rate
1442
-
1443
-
1444
- def money(tokens: int, usd_per_mtok: float, multiplier: float = 1.0) -> float:
1445
- return (max(0, tokens) / 1_000_000.0) * usd_per_mtok * multiplier
1446
-
1447
-
1448
- def krw(usd: float, rate: float) -> float:
1449
- return usd * rate
1450
-
1451
-
1452
- def uncertainty(mid_tokens: int, safety_factor: float) -> dict[str, int]:
1453
- high = max(mid_tokens, math.ceil(mid_tokens * max(1.0, safety_factor)))
1454
- low = min(mid_tokens, math.floor(mid_tokens * 0.75))
1455
- return {"low": low, "mid": mid_tokens, "high": high}
1456
-
1457
-
1458
- def cost_range(mid_usd: float, safety_factor: float) -> dict[str, float]:
1459
- return {
1460
- "low": round(mid_usd * 0.75, 8),
1461
- "mid": round(mid_usd, 8),
1462
- "high": round(mid_usd * max(1.0, safety_factor), 8),
1463
- }
1464
-
1465
-
1466
- def budget_state(cost_usd_range: dict[str, float], args: argparse.Namespace, profile: dict[str, Any]) -> dict[str, Any]:
1467
- budgets: list[tuple[str, float, float]] = []
1468
- if getattr(args, "budget_usd", None) is not None:
1469
- budget_usd = finite_float_arg(args.budget_usd, "--budget-usd", minimum=0.0, allow_zero=True)
1470
- budgets.append(("USD", budget_usd, budget_usd))
1471
- if getattr(args, "budget_krw", None) is not None:
1472
- budget_krw = finite_float_arg(args.budget_krw, "--budget-krw", minimum=0.0, allow_zero=True)
1473
- rate = usd_to_krw(profile, getattr(args, "usd_to_krw", None))
1474
- budgets.append(("KRW", budget_krw, budget_krw / rate))
1475
- if not budgets:
1476
- return {"configured": False, "near_threshold": False, "over_budget": False}
1477
- high = float(cost_usd_range.get("high", 0.0))
1478
- mid = float(cost_usd_range.get("mid", 0.0))
1479
- low = float(cost_usd_range.get("low", 0.0))
1480
- checks = []
1481
- over = False
1482
- near = False
1483
- for currency, display_value, budget_usd in budgets:
1484
- is_over = high > budget_usd
1485
- is_near = low <= budget_usd < high or mid <= budget_usd < high
1486
- over = over or is_over
1487
- near = near or is_near
1488
- checks.append({"currency": currency, "budget": display_value, "budget_usd": round(budget_usd, 8), "over_high_estimate": is_over, "near_threshold": is_near})
1489
- return {"configured": True, "near_threshold": near, "over_budget": over, "checks": checks}
1490
-
1491
-
1492
- def model_from_request(request: Any) -> str:
1493
- if isinstance(request, dict) and isinstance(request.get("model"), str):
1494
- return str(request["model"])
1495
- return "unknown"
1496
-
1497
-
1498
- def build_fingerprints(breakpoints: list[CacheBreakpoint], key: bytes) -> tuple[list[dict[str, Any]], int]:
1499
- fingerprints: list[dict[str, Any]] = []
1500
- redactions = 0
1501
- previous_prefix_tokens = 0
1502
- previous_prefix_bytes = 0
1503
- for bp in breakpoints:
1504
- canonical = json_bytes(bp.prefix)
1505
- section_canonical = json_bytes(bp.section)
1506
- bp_redactions = secret_count_in_text(canonical)
1507
- redactions += bp_redactions
1508
- prefix_tokens = token_proxy_text(canonical)
1509
- prefix_bytes = byte_len_text(canonical)
1510
- prefix_delta_tokens = max(0, prefix_tokens - previous_prefix_tokens)
1511
- prefix_delta_bytes = max(0, prefix_bytes - previous_prefix_bytes)
1512
- previous_prefix_tokens = max(previous_prefix_tokens, prefix_tokens)
1513
- previous_prefix_bytes = max(previous_prefix_bytes, prefix_bytes)
1514
- fingerprints.append(
1515
- {
1516
- "breakpoint_id": bp.breakpoint_id,
1517
- "kind": bp.kind,
1518
- "ttl": bp.ttl,
1519
- "hmac": keyed_hmac(key, canonical),
1520
- "display_hmac": "hmac-sha256:" + keyed_hmac(key, canonical)[:16],
1521
- "prefix_bytes": prefix_bytes,
1522
- "prefix_delta_bytes": prefix_delta_bytes,
1523
- "section_bytes": byte_len_text(section_canonical),
1524
- "tokens_estimated": prefix_tokens,
1525
- "prefix_delta_tokens_estimated": prefix_delta_tokens,
1526
- "section_tokens_estimated": token_proxy_text(section_canonical),
1527
- "redactions_detected": bp_redactions,
1528
- }
1529
- )
1530
- return fingerprints, redactions
1531
-
1532
-
1533
- def annotate_cache_state(
1534
- fingerprints: list[dict[str, Any]],
1535
- rows: list[dict[str, Any]],
1536
- now: int,
1537
- *,
1538
- model: str,
1539
- input_rate: float,
1540
- write_mult: dict[str, float],
1541
- read_mult: float,
1542
- exchange_rate: float,
1543
- ) -> list[dict[str, Any]]:
1544
- latest = latest_fingerprint_rows(rows)
1545
- has_prior = bool(latest)
1546
- out: list[dict[str, Any]] = []
1547
- for fp in fingerprints:
1548
- digest = str(fp["hmac"])
1549
- ttl = str(fp.get("ttl") or "5m")
1550
- prev = latest.get((model, digest))
1551
- status = "miss"
1552
- age_seconds: int | None = None
1553
- expires_at_unix = 0
1554
- ttl_remaining_seconds = 0
1555
- reasons: list[str] = []
1556
- if prev:
1557
- created = int(prev.get("created_at_unix") or 0)
1558
- age_seconds = max(0, now - created)
1559
- previous_ttl = str(prev.get("ttl") or "5m")
1560
- expires_at_unix = created + TTL_SECONDS.get(previous_ttl, TTL_SECONDS["5m"])
1561
- ttl_remaining_seconds = max(0, expires_at_unix - now)
1562
- if previous_ttl != ttl:
1563
- status = "miss"
1564
- reasons.append("ttl_mismatch")
1565
- else:
1566
- status = "hit" if ttl_remaining_seconds > 0 else "expired"
1567
- if status == "hit":
1568
- matched = True
1569
- risk = "low"
1570
- elif status == "expired":
1571
- matched = False
1572
- risk = "medium"
1573
- reasons.append("ttl_expired")
1574
- else:
1575
- matched = False
1576
- risk = "high"
1577
- reasons.append("prefix_hash_changed" if has_prior else "no_previous_cache_entry")
1578
- if has_prior and str(fp.get("kind")) == "tool":
1579
- reasons.append("tool_schema_changed")
1580
- if int(fp.get("redactions_detected") or 0) > 0:
1581
- reasons.append("redaction_changed_cacheable_material")
1582
- tokens = int(fp.get("prefix_delta_tokens_estimated") or 0)
1583
- miss_usd = money(tokens, input_rate, write_mult.get(ttl, write_mult["5m"]))
1584
- hit_usd = money(tokens, input_rate, read_mult)
1585
- confidence = "medium" if int(fp.get("redactions_detected") or 0) > 0 else "high"
1586
- visible = {k: v for k, v in fp.items() if k != "hmac"}
1587
- visible.update(
1588
- {
1589
- "id": fp.get("breakpoint_id"),
1590
- "fingerprint": fp.get("display_hmac"),
1591
- "matched": matched,
1592
- "risk": risk,
1593
- "confidence": confidence,
1594
- "projected_tokens": tokens,
1595
- "cost_delta_if_miss": round(krw(max(0.0, miss_usd - hit_usd), exchange_rate), 2),
1596
- "cost_delta_if_miss_usd": round(max(0.0, miss_usd - hit_usd), 8),
1597
- "expires_at_unix": expires_at_unix,
1598
- "ttl_remaining_seconds": ttl_remaining_seconds,
1599
- "reasons": reasons,
1600
- "predicted_cache_state": status,
1601
- }
1602
- )
1603
- if age_seconds is not None:
1604
- visible["age_seconds"] = age_seconds
1605
- out.append(visible)
1606
- return out
1607
-
1608
-
1609
- def preflight_command(args: argparse.Namespace) -> int:
1610
- request_raw, _truncated = load_json_input(args.request, max_bytes=args.max_bytes)
1611
- request = require_json_object(request_raw, "request")
1612
- profile = load_pricing_profile(args.pricing_profile, max_bytes=args.max_bytes)
1613
- if args.usd_to_krw is not None:
1614
- profile["usd_to_krw"] = usd_to_krw(profile, args.usd_to_krw)
1615
- if args.budget_usd is not None:
1616
- args.budget_usd = finite_float_arg(args.budget_usd, "--budget-usd", minimum=0.0, allow_zero=True)
1617
- if args.budget_krw is not None:
1618
- args.budget_krw = finite_float_arg(args.budget_krw, "--budget-krw", minimum=0.0, allow_zero=True)
1619
- safety = float(args.safety_factor)
1620
- if not math.isfinite(safety) or safety < 1.0:
1621
- fail("--safety-factor must be >= 1.0")
1622
-
1623
- store_dir = Path(args.store_dir)
1624
- key = load_or_create_hmac_key(store_dir)
1625
- rows = load_ledger(store_dir)
1626
- now = int(time.time())
1627
- breakpoints, parse_meta = extract_cache_breakpoints(request)
1628
- fingerprints_private, redactions = build_fingerprints(breakpoints, key)
1629
-
1630
- model = model_from_request(request)
1631
- input_rate, output_rate, model_rate_key = rates_for_model(profile, model)
1632
- write_mult, read_mult = pricing_multipliers(profile)
1633
- exchange = usd_to_krw(profile, args.usd_to_krw)
1634
- cache_breakdowns = annotate_cache_state(
1635
- fingerprints_private,
1636
- rows,
1637
- now,
1638
- model=model,
1639
- input_rate=input_rate,
1640
- write_mult=write_mult,
1641
- read_mult=read_mult,
1642
- exchange_rate=exchange,
1643
- )
1644
- full_prompt_tokens_mid = token_proxy_obj(strip_known_cache_controls(request))
1645
- cacheable_tokens_mid = max((int(fp.get("tokens_estimated") or 0) for fp in fingerprints_private), default=0)
1646
- noncacheable_tokens_mid = max(0, full_prompt_tokens_mid - cacheable_tokens_mid)
1647
- output_tokens_max = usage_int(request, "max_tokens")
1648
- output_usd_mid = money(output_tokens_max, output_rate)
1649
- predicted_mid_usd = money(noncacheable_tokens_mid, input_rate) + output_usd_mid
1650
- all_miss_mid_usd = predicted_mid_usd
1651
- all_hit_mid_usd = predicted_mid_usd
1652
- for public, private in zip(cache_breakdowns, fingerprints_private):
1653
- tokens = int(private.get("prefix_delta_tokens_estimated") or 0)
1654
- ttl = str(private.get("ttl") or "5m")
1655
- if public.get("predicted_cache_state") == "hit":
1656
- predicted_mid_usd += money(tokens, input_rate, read_mult)
1657
- else:
1658
- predicted_mid_usd += money(tokens, input_rate, write_mult.get(ttl, write_mult["5m"]))
1659
- all_miss_mid_usd += money(tokens, input_rate, write_mult.get(ttl, write_mult["5m"]))
1660
- all_hit_mid_usd += money(tokens, input_rate, read_mult)
1661
-
1662
- token_estimate = uncertainty(full_prompt_tokens_mid, safety)
1663
- cost_usd = cost_range(predicted_mid_usd, safety)
1664
- budget = budget_state(cost_usd, args, profile)
1665
- hit_count = sum(1 for bp in cache_breakdowns if bp.get("predicted_cache_state") == "hit")
1666
- miss_count = sum(1 for bp in cache_breakdowns if bp.get("predicted_cache_state") == "miss")
1667
- expired_count = sum(1 for bp in cache_breakdowns if bp.get("predicted_cache_state") == "expired")
1668
- aggregate_reasons = sorted(
1669
- {
1670
- reason
1671
- for bp in cache_breakdowns
1672
- for reason in bp.get("reasons", [])
1673
- if isinstance(reason, str)
1674
- }
1675
- )
1676
- if not cache_breakdowns:
1677
- cache_level = "unknown"
1678
- elif miss_count > 0:
1679
- cache_level = "high"
1680
- elif expired_count > 0:
1681
- cache_level = "medium"
1682
- else:
1683
- cache_level = "low"
1684
- matched_previous_entry = bool(cache_breakdowns) and all(bool(bp.get("matched")) for bp in cache_breakdowns)
1685
- ttl_remaining_values = [
1686
- int(bp.get("ttl_remaining_seconds") or 0)
1687
- for bp in cache_breakdowns
1688
- if int(bp.get("ttl_remaining_seconds") or 0) > 0
1689
- ]
1690
- aggregate_ttl_remaining = min(ttl_remaining_values) if ttl_remaining_values else 0
1691
- aggregate_fingerprint = cache_breakdowns[-1].get("fingerprint") if cache_breakdowns else None
1692
-
1693
- confidence = "high"
1694
- reasons: list[str] = []
1695
- if redactions:
1696
- confidence = "medium"
1697
- reasons.append("redaction_changed_cacheable_material")
1698
- if int(parse_meta.get("unsupported_cache_controls") or 0) > 0:
1699
- confidence = "medium" if confidence == "high" else confidence
1700
- reasons.append("unsupported_cache_control_layout")
1701
- if not breakpoints:
1702
- confidence = "low"
1703
- reasons.append("no_cache_control")
1704
- if full_prompt_tokens_mid >= int(args.large_context_tokens):
1705
- reasons.append("no_cache_control_large_context")
1706
- for reason in reasons:
1707
- if reason not in aggregate_reasons:
1708
- aggregate_reasons.append(reason)
1709
-
1710
- findings: list[dict[str, Any]] = []
1711
- if budget.get("over_budget"):
1712
- findings.append({"severity": "warn", "code": "cost_budget_risk", "message": "high estimate exceeds configured budget"})
1713
- elif budget.get("near_threshold"):
1714
- findings.append({"severity": "info", "code": "near_cost_budget", "message": "uncertainty range crosses configured budget"})
1715
- if args.max_input_tokens and token_estimate["high"] > int(args.max_input_tokens):
1716
- findings.append({"severity": "warn", "code": "input_token_limit_risk", "message": "high estimate exceeds configured input-token threshold"})
1717
- if len(breakpoints) > 4:
1718
- findings.append({"severity": "warn", "code": "too_many_cache_breakpoints", "message": "Anthropic prompt caching supports up to four cache breakpoints; reduce or compile layout"})
1719
-
1720
- block = bool(args.enforce and any(f.get("severity") == "warn" for f in findings))
1721
- decision = "block_if_enforced" if block else "warn" if findings else "allow"
1722
- report = {
1723
- "schema_version": SCHEMA_VERSION,
1724
- "tool": TOOL_NAME,
1725
- "mode": "preflight",
1726
- "decision": decision,
1727
- "enforcement": "enforced" if args.enforce else "passive",
1728
- "policy": {"action": decision, "passive": not args.enforce, "enforced": bool(args.enforce)},
1729
- "model": model,
1730
- "confidence": {"level": confidence, "reasons": reasons},
1731
- "request": {"model": model, "model_rate_key": model_rate_key, "source_omitted": True},
1732
- "token_estimate": {
1733
- "measurement": "estimated",
1734
- "method": f"chars_div_{TOKEN_PROXY_CHARS_PER_TOKEN}",
1735
- "estimator": f"chars_div_{TOKEN_PROXY_CHARS_PER_TOKEN}",
1736
- "safety_factor": safety,
1737
- "near_threshold": bool(budget.get("near_threshold")),
1738
- "input_tokens_low": token_estimate["low"],
1739
- "input_tokens_mid": token_estimate["mid"],
1740
- "input_tokens_high": token_estimate["high"],
1741
- "cacheable_tokens_mid": cacheable_tokens_mid,
1742
- "volatile_tokens_mid": noncacheable_tokens_mid,
1743
- "output_tokens_max": output_tokens_max,
1744
- **token_estimate,
1745
- },
1746
- "pricing": {
1747
- "profile": str(profile.get("name") or "custom"),
1748
- "release_recheck_required": bool(profile.get("release_recheck_required", True)),
1749
- "source_urls": profile.get("source_urls", [ANTHROPIC_DOCS_URL, ANTHROPIC_PRICING_URL]),
1750
- "input_usd_per_mtok": input_rate,
1751
- "output_usd_per_mtok": output_rate,
1752
- "usd_to_krw": exchange,
1753
- "cache_write_multipliers": write_mult,
1754
- "cache_read_multiplier": read_mult,
1755
- },
1756
- "cost_estimate": {
1757
- "measurement": "estimated",
1758
- "currency": "USD",
1759
- **cost_usd,
1760
- "krw": {k: round(krw(v, exchange), 2) for k, v in cost_usd.items()},
1761
- "if_cache_hit": cost_range(all_hit_mid_usd, safety),
1762
- "if_cache_miss_5m_write": cost_range(
1763
- money(noncacheable_tokens_mid, input_rate)
1764
- + output_usd_mid
1765
- + sum(
1766
- money(int(fp.get("prefix_delta_tokens_estimated") or 0), input_rate, write_mult["5m"])
1767
- for fp in fingerprints_private
1768
- ),
1769
- safety,
1770
- ),
1771
- "if_cache_miss_1h_write": cost_range(
1772
- money(noncacheable_tokens_mid, input_rate)
1773
- + output_usd_mid
1774
- + sum(
1775
- money(int(fp.get("prefix_delta_tokens_estimated") or 0), input_rate, write_mult["1h"])
1776
- for fp in fingerprints_private
1777
- ),
1778
- safety,
1779
- ),
1780
- "worst_case": cost_usd["high"],
1781
- "pricing_profile_id": str(profile.get("name") or "custom"),
1782
- "if_all_cache_miss_usd_mid": round(all_miss_mid_usd, 8),
1783
- "if_all_cache_hit_usd_mid": round(all_hit_mid_usd, 8),
1784
- "estimated_cache_delta_usd_mid": round(max(0.0, all_miss_mid_usd - all_hit_mid_usd), 8),
1785
- "output_usd_mid": round(output_usd_mid, 8),
1786
- "includes_output_token_budget": output_tokens_max > 0,
1787
- },
1788
- "budget": budget,
1789
- "cache_risk": {
1790
- "level": cache_level,
1791
- "confidence": confidence,
1792
- "reasons": aggregate_reasons,
1793
- "aggregate_fingerprint": aggregate_fingerprint,
1794
- "matched_previous_entry": matched_previous_entry,
1795
- "ttl_remaining_seconds": aggregate_ttl_remaining,
1796
- "breakpoints": cache_breakdowns,
1797
- "summary": {"total": len(cache_breakdowns), "predicted_hit": hit_count, "predicted_miss": miss_count, "expired": expired_count},
1798
- "ledger": {
1799
- "uses_keyed_hmac": True,
1800
- "raw_prompt_stored": False,
1801
- "path_omitted": True,
1802
- "append_mode": "o_append_single_write_fsync",
1803
- "malformed_rows_skipped": True,
1804
- },
1805
- },
1806
- "redaction": {"secret_like_values_detected": redactions, "redacted_before_output_or_storage": True},
1807
- "privacy": {
1808
- "raw_prompt_emitted": False,
1809
- "raw_prompt_stored": False,
1810
- "raw_paths_emitted": False,
1811
- "hmac_key_emitted": False,
1812
- "redacted_values": redactions,
1813
- },
1814
- "parse": parse_meta,
1815
- "findings": findings,
1816
- "recommendations": recommendations_for_findings(
1817
- findings,
1818
- cache_level=cache_level,
1819
- confidence=confidence,
1820
- breakpoints=cache_breakdowns,
1821
- ),
1822
- "local_artifact_retrieval": {
1823
- "helps_reduce_sent_context": True,
1824
- "replaces_provider_prompt_cache": False,
1825
- "recommended_helper": "context-guard-artifact/context-guard-pack for large local evidence",
1826
- },
1827
- }
1828
-
1829
- if not args.no_ledger_write:
1830
- entry: dict[str, Any] = {
1831
- "schema_version": SCHEMA_VERSION,
1832
- "kind": "preflight_blocked" if block else "preflight",
1833
- "created_at_unix": now,
1834
- "model": model,
1835
- "summary": {
1836
- "breakpoints": len(fingerprints_private),
1837
- "secret_like_values_detected": redactions,
1838
- "raw_prompt_stored": False,
1839
- "cache_seeded": False,
1840
- },
1841
- }
1842
- append_ledger(store_dir, entry)
1843
-
1844
- emit(report, json_mode=args.json)
1845
- return 3 if block else 0
1846
-
1847
-
1848
- def usage_int(data: dict[str, Any], key: str) -> int:
1849
- value = data.get(key, 0)
1850
- try:
1851
- number = int(value)
1852
- except (TypeError, ValueError, OverflowError):
1853
- return 0
1854
- return max(0, number)
1855
-
1856
-
1857
- def cache_creation_buckets(usage: dict[str, Any]) -> tuple[int, int]:
1858
- cache_creation = usage.get("cache_creation")
1859
- if isinstance(cache_creation, dict):
1860
- return (
1861
- usage_int(cache_creation, "ephemeral_5m_input_tokens"),
1862
- usage_int(cache_creation, "ephemeral_1h_input_tokens"),
1863
- )
1864
- flat_5m = usage_int(usage, "cache_creation_input_tokens_5m")
1865
- flat_1h = usage_int(usage, "cache_creation_input_tokens_1h")
1866
- if flat_5m or flat_1h:
1867
- return flat_5m, flat_1h
1868
- return usage_int(usage, "cache_creation_input_tokens"), 0
1869
-
1870
-
1871
- def observe_command(args: argparse.Namespace) -> int:
1872
- usage_raw, _truncated = load_json_input(args.usage, max_bytes=args.max_bytes)
1873
- if isinstance(usage_raw, dict) and isinstance(usage_raw.get("usage"), dict):
1874
- usage = usage_raw["usage"]
1875
- else:
1876
- usage = usage_raw
1877
- if not isinstance(usage, dict):
1878
- fail("usage must be a JSON object or an object containing a usage object")
1879
- profile = load_pricing_profile(args.pricing_profile, max_bytes=args.max_bytes)
1880
- if args.usd_to_krw is not None:
1881
- profile["usd_to_krw"] = usd_to_krw(profile, args.usd_to_krw)
1882
- model = str(args.model or (usage_raw.get("model") if isinstance(usage_raw, dict) else "") or "unknown")
1883
- input_rate, output_rate, model_rate_key = rates_for_model(profile, model)
1884
- write_mult, read_mult = pricing_multipliers(profile)
1885
- exchange = usd_to_krw(profile, args.usd_to_krw)
1886
-
1887
- input_tokens = usage_int(usage, "input_tokens")
1888
- output_tokens = usage_int(usage, "output_tokens")
1889
- cache_creation_5m, cache_creation_1h = cache_creation_buckets(usage)
1890
- cache_read = usage_int(usage, "cache_read_input_tokens")
1891
- cost_usd_mid = (
1892
- money(input_tokens, input_rate)
1893
- + money(output_tokens, output_rate)
1894
- + money(cache_creation_5m, input_rate, write_mult["5m"])
1895
- + money(cache_creation_1h, input_rate, write_mult["1h"])
1896
- + money(cache_read, input_rate, read_mult)
1897
- )
1898
- report = {
1899
- "schema_version": SCHEMA_VERSION,
1900
- "tool": TOOL_NAME,
1901
- "mode": "observe",
1902
- "measurement": "from_usage",
1903
- "usage_source": "provider_usage_fields",
1904
- "request": {"model": model, "model_rate_key": model_rate_key, "source_omitted": True},
1905
- "usage": {
1906
- "input_tokens": input_tokens,
1907
- "output_tokens": output_tokens,
1908
- "cache_creation_input_tokens_5m": cache_creation_5m,
1909
- "cache_creation_input_tokens_1h": cache_creation_1h,
1910
- "cache_read_input_tokens": cache_read,
1911
- },
1912
- "cost_estimate": {
1913
- "currency": "USD",
1914
- "mid": round(cost_usd_mid, 8),
1915
- "krw_mid": round(krw(cost_usd_mid, exchange), 2),
1916
- "pricing_profile": str(profile.get("name") or "custom"),
1917
- "release_recheck_required": bool(profile.get("release_recheck_required", True)),
1918
- "source_urls": profile.get("source_urls", [ANTHROPIC_DOCS_URL, ANTHROPIC_PRICING_URL]),
1919
- },
1920
- "cache_effect": {
1921
- "observed_cache_read_tokens": cache_read,
1922
- "observed_cache_write_tokens": cache_creation_5m + cache_creation_1h,
1923
- "provider_measured": True,
1924
- },
1925
- "privacy": {"raw_request_stored": False, "raw_usage_stored": False, "path_omitted": True},
1926
- }
1927
- confirmed_cache_tokens = cache_creation_5m + cache_creation_1h + cache_read
1928
- if args.request and confirmed_cache_tokens > 0:
1929
- request_raw, _ = load_json_input(args.request, max_bytes=args.max_bytes)
1930
- request = require_json_object(request_raw, "request")
1931
- store_dir = Path(args.store_dir)
1932
- key = load_or_create_hmac_key(store_dir)
1933
- breakpoints, _meta = extract_cache_breakpoints(request)
1934
- fingerprints_private, redactions = build_fingerprints(breakpoints, key)
1935
- confirmed_fingerprints = [
1936
- fp
1937
- for fp in fingerprints_private
1938
- if int(fp.get("tokens_estimated") or 0) <= confirmed_cache_tokens
1939
- ]
1940
- if not confirmed_fingerprints:
1941
- report["ledger"] = {
1942
- "updated": False,
1943
- "reason": "insufficient_provider_cache_tokens",
1944
- "uses_keyed_hmac": True,
1945
- "raw_prompt_stored": False,
1946
- "path_omitted": True,
1947
- }
1948
- emit(report, json_mode=args.json)
1949
- return 0
1950
- append_ledger(
1951
- store_dir,
1952
- {
1953
- "schema_version": SCHEMA_VERSION,
1954
- "kind": "observe",
1955
- "created_at_unix": int(time.time()),
1956
- "model": model,
1957
- "fingerprints": [
1958
- {k: v for k, v in fp.items() if k in {"breakpoint_id", "kind", "ttl", "hmac", "prefix_bytes", "section_bytes", "tokens_estimated", "section_tokens_estimated", "redactions_detected"}}
1959
- for fp in confirmed_fingerprints
1960
- ],
1961
- "usage": report["usage"],
1962
- "summary": {"breakpoints": len(confirmed_fingerprints), "secret_like_values_detected": redactions, "raw_prompt_stored": False},
1963
- },
1964
- )
1965
- report["ledger"] = {"updated": True, "confirmed_fingerprints": len(confirmed_fingerprints), "uses_keyed_hmac": True, "raw_prompt_stored": False, "path_omitted": True}
1966
- elif args.request:
1967
- report["ledger"] = {
1968
- "updated": False,
1969
- "reason": "no_provider_cache_tokens",
1970
- "uses_keyed_hmac": True,
1971
- "raw_prompt_stored": False,
1972
- "path_omitted": True,
1973
- }
1974
- emit(report, json_mode=args.json)
1975
- return 0
1976
-
1977
-
1978
- def ledger_command(args: argparse.Namespace) -> int:
1979
- rows = load_ledger(Path(args.store_dir))
1980
- latest = rows[-1] if rows else None
1981
- counts: dict[str, int] = {}
1982
- for row in rows:
1983
- kind = str(row.get("kind") or "unknown")
1984
- counts[kind] = counts.get(kind, 0) + 1
1985
- visible_rows = []
1986
- limit = int(args.limit)
1987
- recent_rows = [] if limit == 0 else rows[-limit:]
1988
- for row in recent_rows:
1989
- visible_rows.append(
1990
- {
1991
- "kind": row.get("kind"),
1992
- "created_at_unix": row.get("created_at_unix"),
1993
- "model": row.get("model"),
1994
- "fingerprint_count": len(row.get("fingerprints", [])) if isinstance(row.get("fingerprints"), list) else 0,
1995
- "raw_prompt_stored": False,
1996
- }
1997
- )
1998
- report = {
1999
- "schema_version": SCHEMA_VERSION,
2000
- "tool": TOOL_NAME,
2001
- "mode": "ledger",
2002
- "summary": {"entries": len(rows), "counts": counts, "latest_created_at_unix": latest.get("created_at_unix") if isinstance(latest, dict) else None},
2003
- "ledger": {"uses_keyed_hmac": True, "raw_prompt_stored": False, "path_omitted": True},
2004
- "entries": visible_rows,
2005
- }
2006
- emit(report, json_mode=args.json)
2007
- return 0
2008
-
2009
-
2010
- def safe_section_id(section: dict[str, Any], index: int) -> str:
2011
- raw = section.get("id") or section.get("name") or f"section-{index + 1}"
2012
- text = re.sub(r"[^A-Za-z0-9_.:-]+", "-", str(raw)).strip("-")[:80]
2013
- return text or f"section-{index + 1}"
2014
-
2015
-
2016
- def section_ttl(section: dict[str, Any]) -> str:
2017
- ttl = str(section.get("ttl") or section.get("cache_ttl") or "5m").lower()
2018
- return "1h" if ttl in {"1h", "60m", "hour"} else "5m"
2019
-
2020
-
2021
- PROTECTED_ALLOWED_TRANSFORMS = ["exact_dedupe", "structural_window", "line_truncate", "whitespace_normalize", "json_compact", "artifact_retrieval"]
2022
- PROTECTED_DENIED_TRANSFORMS = ["semantic_compress", "paraphrase", "identifier_rewrite", "numeric_rewrite", "hash_rewrite", "path_rewrite", "quoted_literal_rewrite"]
2023
- PROTECTED_ZONE_CLASS_RE = re.compile(r"[^a-z0-9]+")
2024
- KNOWN_PROTECTED_CONTENT_TYPES = {"json", "diff", "log", "search", "code", "prose", "unknown"}
2025
- KNOWN_PROTECTED_ZONE_CLASSES = {
2026
- "code_fence",
2027
- "diff",
2028
- "identifier",
2029
- "numeric_constant",
2030
- "hash",
2031
- "path",
2032
- "stack_frame",
2033
- "quoted_string",
2034
- "json_key",
2035
- }
2036
-
2037
-
2038
- def manifest_bool(value: Any) -> bool:
2039
- if isinstance(value, bool):
2040
- return value
2041
- if isinstance(value, str):
2042
- return value.strip().lower() in {"1", "true", "yes", "y", "on"}
2043
- return bool(value)
2044
-
2045
-
2046
- def protected_zone_classes(raw: dict[str, Any]) -> list[str]:
2047
- value = raw.get("protected_zone_classes") or raw.get("zone_classes") or []
2048
- if isinstance(value, str):
2049
- items = [item.strip() for item in value.split(",")]
2050
- elif isinstance(value, list):
2051
- items = [str(item).strip() for item in value]
2052
- else:
2053
- items = []
2054
- cleaned = sorted({PROTECTED_ZONE_CLASS_RE.sub("_", item.lower()).strip("_") for item in items if item})
2055
- return [item for item in cleaned if item in KNOWN_PROTECTED_ZONE_CLASSES]
2056
-
2057
-
2058
- def protected_content_type(raw: dict[str, Any]) -> str:
2059
- """Return a known content-type label without echoing raw manifest strings."""
2060
- value = str(raw.get("content_type") or raw.get("type") or "unknown").strip().lower()
2061
- return value if value in KNOWN_PROTECTED_CONTENT_TYPES else "unknown"
2062
-
2063
-
2064
- def section_is_protected(raw: dict[str, Any], zone_classes: list[str]) -> bool:
2065
- return (
2066
- manifest_bool(raw.get("protected"))
2067
- or manifest_bool(raw.get("semantic_sensitive"))
2068
- or bool(zone_classes)
2069
- )
2070
-
2071
-
2072
- def compile_command(args: argparse.Namespace) -> int:
2073
- manifest, _truncated = load_json_input(args.manifest, max_bytes=args.max_bytes)
2074
- if isinstance(manifest, dict):
2075
- raw_sections = manifest.get("sections") or manifest.get("cache_breakpoints") or []
2076
- elif isinstance(manifest, list):
2077
- raw_sections = manifest
2078
- else:
2079
- raw_sections = []
2080
- if not isinstance(raw_sections, list):
2081
- fail("manifest sections must be a list")
2082
- sections: list[dict[str, Any]] = []
2083
- for i, raw in enumerate(raw_sections):
2084
- if not isinstance(raw, dict):
2085
- continue
2086
- zone_classes = protected_zone_classes(raw)
2087
- protected = section_is_protected(raw, zone_classes)
2088
- sec = {
2089
- "id": f"protected-section-{i + 1}" if protected else safe_section_id(raw, i),
2090
- "source_id_omitted": protected,
2091
- "ttl": section_ttl(raw),
2092
- "volatile": manifest_bool(raw.get("volatile")) or manifest_bool(raw.get("changes_often")),
2093
- "bytes": safe_int(raw.get("bytes") or raw.get("estimated_bytes") or 0),
2094
- "tokens_estimated": safe_int(raw.get("tokens") or raw.get("estimated_tokens") or 0),
2095
- "has_path": "path" in raw or "file" in raw,
2096
- "protected": protected,
2097
- "content_type": protected_content_type(raw),
2098
- "protected_zone_classes": zone_classes,
2099
- }
2100
- sections.append(sec)
2101
-
2102
- recommended = sorted(sections, key=lambda sec: (bool(sec["volatile"]), 0 if sec["ttl"] == "1h" else 1, -int(sec["bytes"] or 0), str(sec["id"])))
2103
- findings: list[dict[str, Any]] = []
2104
- for i, sec in enumerate(sections):
2105
- if sec["ttl"] == "5m" and any(later["ttl"] == "1h" for later in sections[i + 1 :]):
2106
- findings.append({"severity": "warn", "code": "ttl_order_violation", "section_id": sec["id"], "message": "place 1h cacheable stable sections before 5m sections"})
2107
- break
2108
- for i, sec in enumerate(sections):
2109
- if sec["volatile"] and any(not later["volatile"] for later in sections[i + 1 :]):
2110
- findings.append({"severity": "warn", "code": "volatile_prefix_before_stable_context", "section_id": sec["id"], "message": "move volatile context toward the tail so stable prefixes can be reused"})
2111
- break
2112
- if len(sections) > 4:
2113
- findings.append({"severity": "warn", "code": "too_many_cache_breakpoints", "message": "reduce to four or fewer provider cache breakpoints"})
2114
- for sec in sections:
2115
- if int(sec["bytes"] or 0) > int(args.large_section_bytes):
2116
- findings.append(
2117
- {
2118
- "severity": "info",
2119
- "code": "use_local_artifact_retrieval",
2120
- "section_id": sec["id"],
2121
- "message": "store/query large local evidence with context-guard-artifact or context-guard-pack; RAM/disk can reduce sent context but does not replace provider prompt cache",
2122
- }
2123
- )
2124
- if sec.get("protected"):
2125
- findings.append(
2126
- {
2127
- "severity": "info",
2128
- "code": "protected_zone_structural_only",
2129
- "section_id": sec["id"],
2130
- "message": "protected sections deny semantic/paraphrase compression; use structural transforms and exact retrieval",
2131
- }
2132
- )
2133
- if sec.get("protected") and sec.get("volatile"):
2134
- findings.append(
2135
- {
2136
- "severity": "info",
2137
- "code": "protected_volatile_tail",
2138
- "section_id": sec["id"],
2139
- "message": "volatile controls cache ordering toward the tail; protection controls transforms and retrieval",
2140
- }
2141
- )
2142
- if sec.get("protected") and int(sec["bytes"] or 0) > int(args.large_section_bytes):
2143
- findings.append(
2144
- {
2145
- "severity": "info",
2146
- "code": "protected_zone_artifact_retrieval",
2147
- "section_id": sec["id"],
2148
- "message": "large protected evidence should be stored locally and sent as exact retrieved slices, not semantically compressed",
2149
- }
2150
- )
2151
- protected_sections = [sec for sec in sections if sec.get("protected")]
2152
- protected_policy_sections = [
2153
- {
2154
- "section_id": sec["id"],
2155
- "content_type": sec["content_type"],
2156
- "volatile": sec["volatile"],
2157
- "ttl": sec["ttl"],
2158
- "large": int(sec["bytes"] or 0) > int(args.large_section_bytes),
2159
- "zone_classes": sec["protected_zone_classes"],
2160
- "semantic_compress": False,
2161
- "retrieval_required": int(sec["bytes"] or 0) > int(args.large_section_bytes),
2162
- "cache_ordering": "volatile_tail" if sec["volatile"] else "stable_prefix_eligible",
2163
- "source_id_omitted": bool(sec["source_id_omitted"]),
2164
- }
2165
- for sec in protected_sections
2166
- ]
2167
- report = {
2168
- "schema_version": SCHEMA_VERSION,
2169
- "tool": TOOL_NAME,
2170
- "mode": "compile",
2171
- "provider_cache": {"replaced_by_local_ram_or_disk": False, "stable_prefix_required": True, "max_breakpoints_advisory": 4},
2172
- "recommended_order": [
2173
- {
2174
- "section_id": sec["id"],
2175
- "ttl": sec["ttl"],
2176
- "volatile": sec["volatile"],
2177
- "protected": sec["protected"],
2178
- "content_type": sec["content_type"],
2179
- "path_omitted": bool(sec["has_path"]),
2180
- "source_id_omitted": bool(sec["source_id_omitted"]),
2181
- "transform_policy": "structural_only" if sec["protected"] else "default",
2182
- }
2183
- for sec in recommended
2184
- ],
2185
- "findings": findings,
2186
- "protected_zone_policy": {
2187
- "enabled": bool(protected_sections),
2188
- "section_count": len(protected_sections),
2189
- "semantic_compress": False,
2190
- "allowed_transforms": PROTECTED_ALLOWED_TRANSFORMS,
2191
- "denied_transforms": PROTECTED_DENIED_TRANSFORMS,
2192
- "raw_spans_stored": False,
2193
- "protected_volatile_precedence": "volatile controls cache ordering; protection controls transforms and retrieval",
2194
- "sections": protected_policy_sections,
2195
- },
2196
- "transform_policy": {
2197
- "scope": "protected_sections" if protected_sections else "none",
2198
- "protected_sections_only": True,
2199
- "semantic_transforms_allowed": False if protected_sections else None,
2200
- "semantic_compress": False if protected_sections else None,
2201
- "allowed": PROTECTED_ALLOWED_TRANSFORMS if protected_sections else [],
2202
- "denied": PROTECTED_DENIED_TRANSFORMS if protected_sections else [],
2203
- "large_protected_sections_use": "local_artifact_retrieval",
2204
- },
2205
- "local_artifact_retrieval": {
2206
- "recommended_for_large_sections": True,
2207
- "helpers": ["context-guard-artifact", "context-guard-pack"],
2208
- "replaces_provider_prompt_cache": False,
2209
- },
2210
- }
2211
- emit(report, json_mode=args.json)
2212
- return 0
2213
-
2214
-
2215
- def recommendations_for_findings(
2216
- findings: list[dict[str, Any]],
2217
- *,
2218
- cache_level: str,
2219
- confidence: str,
2220
- breakpoints: list[dict[str, Any]],
2221
- ) -> list[dict[str, Any]]:
2222
- recs: list[dict[str, Any]] = []
2223
- codes = {str(finding.get("code")) for finding in findings}
2224
- if cache_level in {"high", "medium"}:
2225
- recs.append(
2226
- {
2227
- "id": "stabilize-cache-prefix",
2228
- "priority": "P1",
2229
- "action": "Move stable tools/system/context before volatile questions, timestamps, logs, and task-specific output.",
2230
- }
2231
- )
2232
- if confidence != "high":
2233
- recs.append(
2234
- {
2235
- "id": "verify-cacheable-material",
2236
- "priority": "P1",
2237
- "action": "Redaction or unsupported cacheable material lowered confidence; compare exact request construction before relying on cache-risk predictions.",
2238
- }
2239
- )
2240
- if "cost_budget_risk" in codes:
2241
- recs.append(
2242
- {
2243
- "id": "reduce-or-confirm-budget",
2244
- "priority": "P1",
2245
- "action": "Use context-guard-pack/artifact slices, clear stale context, or explicit approval before sending an over-budget request.",
2246
- }
2247
- )
2248
- if any(int(bp.get("prefix_delta_bytes") or 0) > DEFAULT_LARGE_SECTION_BYTES for bp in breakpoints):
2249
- recs.append(
2250
- {
2251
- "id": "use-local-artifact-retrieval",
2252
- "priority": "P2",
2253
- "action": "Store large local evidence as artifacts or packs and send exact slices instead of full logs/files; this does not replace provider prompt cache.",
2254
- }
2255
- )
2256
- return recs
2257
-
2258
-
2259
- def emit(data: dict[str, Any], *, json_mode: bool) -> None:
2260
- if json_mode:
2261
- try:
2262
- print(json.dumps(data, ensure_ascii=False, sort_keys=True, indent=2, allow_nan=False))
2263
- except ValueError as exc:
2264
- fail(f"JSON output contained a non-finite number: {exc}")
2265
- return
2266
- mode = data.get("mode")
2267
- if mode == "preflight":
2268
- decision = str(data.get("decision", "allow"))
2269
- summary = data.get("cache_risk", {}).get("summary", {}) if isinstance(data.get("cache_risk"), dict) else {}
2270
- cost = data.get("cost_estimate", {}) if isinstance(data.get("cost_estimate"), dict) else {}
2271
- print(f"{TOOL_NAME}: {decision} · cache {summary.get('predicted_hit', 0)} hit/{summary.get('predicted_miss', 0)} miss · est ${cost.get('mid', 0)}")
2272
- elif mode == "observe":
2273
- usage = data.get("usage", {}) if isinstance(data.get("usage"), dict) else {}
2274
- cost = data.get("cost_estimate", {}) if isinstance(data.get("cost_estimate"), dict) else {}
2275
- print(f"{TOOL_NAME}: observed cache_read={usage.get('cache_read_input_tokens', 0)} tokens · est ${cost.get('mid', 0)}")
2276
- elif mode == "compile":
2277
- findings = data.get("findings", []) if isinstance(data.get("findings"), list) else []
2278
- print(f"{TOOL_NAME}: compile findings={len(findings)}")
2279
- else:
2280
- summary = data.get("summary", {}) if isinstance(data.get("summary"), dict) else {}
2281
- print(f"{TOOL_NAME}: ledger entries={summary.get('entries', 0)}")
2282
-
2283
-
2284
- def add_common_cost_args(parser: argparse.ArgumentParser) -> None:
2285
- parser.add_argument("--pricing-profile", help="JSON string or file with input/output rates, cache multipliers, and usd_to_krw")
2286
- parser.add_argument("--usd-to-krw", type=float, help="override USD→KRW exchange rate used for estimates")
2287
- parser.add_argument("--max-bytes", type=int, default=DEFAULT_MAX_BYTES, help=f"maximum JSON input and pricing profile file bytes (default: {DEFAULT_MAX_BYTES})")
2288
- parser.add_argument("--json", action="store_true", help="emit machine-readable JSON")
2289
-
2290
-
2291
- def build_parser() -> argparse.ArgumentParser:
2292
- parser = argparse.ArgumentParser(
2293
- prog=TOOL_NAME,
2294
- description="Passive Anthropic prompt-cache cost preflight, observation, ledger, and layout compiler.",
2295
- )
2296
- sub = parser.add_subparsers(dest="command")
2297
-
2298
- preflight = sub.add_parser("preflight", help="estimate cache miss risk and request cost before an API call")
2299
- preflight.add_argument("--request", default="-", help="Anthropic-like request JSON path, or '-' for stdin")
2300
- preflight.add_argument("--store-dir", default=DEFAULT_STORE_DIR, help="local HMAC ledger directory (path is never emitted in JSON)")
2301
- preflight.add_argument("--budget-usd", type=float, help="warn/block when high estimate exceeds this USD budget")
2302
- preflight.add_argument("--budget-krw", type=float, help="warn/block when high estimate exceeds this KRW budget")
2303
- preflight.add_argument("--max-input-tokens", type=int, default=0, help="warn/block when high estimated input tokens exceed this threshold")
2304
- preflight.add_argument("--large-context-tokens", type=int, default=200_000, help="threshold for no-cache-control large-context risk")
2305
- preflight.add_argument("--safety-factor", type=float, default=DEFAULT_SAFETY_FACTOR, help="high estimate multiplier (default: 1.25)")
2306
- preflight.add_argument("--enforce", action="store_true", help="return nonzero on warn-level findings; default is passive exit 0")
2307
- preflight.add_argument("--no-ledger-write", action="store_true", help="do not append this preflight to the local HMAC ledger")
2308
- add_common_cost_args(preflight)
2309
- preflight.set_defaults(func=preflight_command)
2310
-
2311
- observe = sub.add_parser("observe", help="estimate observed cost from Anthropic usage fields")
2312
- observe.add_argument("--usage", default="-", help="usage JSON path, or '-' for stdin")
2313
- observe.add_argument("--request", help="optional request JSON to fingerprint into the ledger")
2314
- observe.add_argument("--model", help="model name when usage JSON does not include it")
2315
- observe.add_argument("--store-dir", default=DEFAULT_STORE_DIR, help="local HMAC ledger directory")
2316
- add_common_cost_args(observe)
2317
- observe.set_defaults(func=observe_command)
2318
-
2319
- ledger = sub.add_parser("ledger", help="summarize the local HMAC ledger without revealing prompts")
2320
- ledger.add_argument("--store-dir", default=DEFAULT_STORE_DIR, help="local HMAC ledger directory")
2321
- ledger.add_argument("--limit", type=non_negative_int_arg, default=20, help="maximum recent entries to include")
2322
- ledger.add_argument("--json", action="store_true", help="emit machine-readable JSON")
2323
- ledger.set_defaults(func=ledger_command)
2324
-
2325
- compile_parser = sub.add_parser("compile", help="compile a cache-friendly section layout advisory from a manifest")
2326
- compile_parser.add_argument("--manifest", default="-", help="section manifest JSON path, or '-' for stdin")
2327
- compile_parser.add_argument("--large-section-bytes", type=int, default=DEFAULT_LARGE_SECTION_BYTES, help="recommend local artifact retrieval above this size")
2328
- compile_parser.add_argument("--max-bytes", type=int, default=DEFAULT_MAX_BYTES, help=f"maximum manifest JSON bytes (default: {DEFAULT_MAX_BYTES})")
2329
- compile_parser.add_argument("--json", action="store_true", help="emit machine-readable JSON")
2330
- compile_parser.set_defaults(func=compile_command)
2331
-
2332
- return parser
2333
-
2334
-
2335
- def main(argv: list[str] | None = None) -> int:
2336
- parser = build_parser()
2337
- args = parser.parse_args(argv)
2338
- if not getattr(args, "command", None):
2339
- parser.print_help()
2340
- return 0
2341
- try:
2342
- return int(args.func(args))
2343
- except CostGuardError as exc:
2344
- print(f"{TOOL_NAME}: {exc}", file=sys.stderr)
2345
- return 2
2346
-
2347
-
2348
- if __name__ == "__main__":
2349
- raise SystemExit(main())