@ictechgy/context-guard 0.4.9 → 0.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +28 -0
  2. package/README.ko.md +59 -31
  3. package/README.md +85 -36
  4. package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
  5. package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
  6. package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
  7. package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
  8. package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
  9. package/docs/benchmark-workflow-examples.md +3 -0
  10. package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
  11. package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
  12. package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
  13. package/docs/distribution.md +10 -7
  14. package/docs/experimental-benchmark-fixtures.md +30 -6
  15. package/package.json +4 -6
  16. package/packaging/homebrew/context-guard.rb.template +1 -1
  17. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  18. package/plugins/context-guard/README.ko.md +20 -14
  19. package/plugins/context-guard/README.md +26 -17
  20. package/plugins/context-guard/bin/context-guard +147 -25
  21. package/plugins/context-guard/bin/context-guard-artifact +884 -79
  22. package/plugins/context-guard/bin/context-guard-audit +33 -2
  23. package/plugins/context-guard/bin/context-guard-bench +1542 -31
  24. package/plugins/context-guard/bin/context-guard-cache-score +665 -0
  25. package/plugins/context-guard/bin/context-guard-compress +146 -1
  26. package/plugins/context-guard/bin/context-guard-cost +790 -6
  27. package/plugins/context-guard/bin/context-guard-experiments +463 -26
  28. package/plugins/context-guard/bin/context-guard-failed-nudge +9 -2
  29. package/plugins/context-guard/bin/context-guard-filter +163 -7
  30. package/plugins/context-guard/bin/context-guard-guard-read +3 -0
  31. package/plugins/context-guard/bin/context-guard-pack +892 -49
  32. package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
  33. package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
  34. package/plugins/context-guard/bin/context-guard-setup +165 -31
  35. package/plugins/context-guard/bin/context-guard-statusline +490 -283
  36. package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
  37. package/plugins/context-guard/bin/context-guard-tool-prune +480 -53
  38. package/plugins/context-guard/bin/context-guard-trim-output +288 -41
  39. package/plugins/context-guard/brief/README.md +5 -5
  40. package/plugins/context-guard/lib/context_guard_commands.py +230 -0
  41. package/plugins/context-guard/skills/setup/SKILL.md +1 -0
  42. package/context-guard-kit/README.md +0 -91
  43. package/context-guard-kit/benchmark_runner.py +0 -2401
  44. package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
  45. package/context-guard-kit/context_compress.py +0 -695
  46. package/context-guard-kit/context_escrow.py +0 -935
  47. package/context-guard-kit/context_filter.py +0 -637
  48. package/context-guard-kit/context_guard_cli.py +0 -325
  49. package/context-guard-kit/context_guard_diet.py +0 -1711
  50. package/context-guard-kit/context_pack.py +0 -2713
  51. package/context-guard-kit/cost_guard.py +0 -2349
  52. package/context-guard-kit/experimental_registry.py +0 -4348
  53. package/context-guard-kit/failed_attempt_nudge.py +0 -567
  54. package/context-guard-kit/guard_large_read.py +0 -690
  55. package/context-guard-kit/hook_secret_patterns.py +0 -43
  56. package/context-guard-kit/read_symbol.py +0 -483
  57. package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
  58. package/context-guard-kit/sanitize_output.py +0 -725
  59. package/context-guard-kit/settings.example.json +0 -67
  60. package/context-guard-kit/setup_wizard.py +0 -2515
  61. package/context-guard-kit/statusline.sh +0 -362
  62. package/context-guard-kit/statusline_merged.sh +0 -157
  63. package/context-guard-kit/tool_schema_pruner.py +0 -837
  64. package/context-guard-kit/trim_command_output.py +0 -1449
@@ -0,0 +1,665 @@
1
+ #!/usr/bin/env python3
2
+ """Static prompt cacheability lint for ContextGuard.
3
+
4
+ ``context-guard-cache-score`` is advisory-only: it does not call provider APIs,
5
+ does not estimate price, does not observe cache hits, and does not write raw
6
+ prompts to disk. It only inspects a prompt/request fixture for stable-prefix
7
+ shape, common dynamic markers, deterministic ordering hints, and provider cache
8
+ eligibility using a tokenizer-free char/4 proxy.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import json
14
+ import math
15
+ import os
16
+ from pathlib import Path
17
+ import re
18
+ import stat
19
+ import sys
20
+ from typing import Any, NoReturn
21
+
22
+ TOOL_NAME = "context-guard-cache-score"
23
+ SCHEMA_VERSION = "contextguard.cache-score.v1"
24
+ DEFAULT_MAX_INPUT_BYTES = 1_000_000
25
+ TOKEN_PROXY_CHARS_PER_TOKEN = 4
26
+ DEFAULT_EXPECTED_REUSES = 1
27
+ MAX_EXPECTED_REUSES = 1_000_000
28
+ MAX_CACHE_MULTIPLIER = 1_000_000.0
29
+ SAVINGS_EPSILON = 1e-12
30
+ PROVIDER_MINIMUM_CACHEABLE_TOKENS = {
31
+ # Provider and model minimums move over time. These defaults are advisory
32
+ # and can be overridden with --minimum-cacheable-tokens.
33
+ "openai": 1024,
34
+ "anthropic": 1024,
35
+ "gemini": 2048,
36
+ "generic": 1024,
37
+ }
38
+ PROVIDER_CAVEATS = {
39
+ "openai": (
40
+ "OpenAI prompt caching is automatic for eligible prompts; verify real "
41
+ "hits with provider usage.prompt_tokens_details.cached_tokens."
42
+ ),
43
+ "anthropic": (
44
+ "Anthropic prompt caching is model/platform-specific and usually needs "
45
+ "cache_control around the reusable prefix; verify cache_creation/read "
46
+ "usage fields."
47
+ ),
48
+ "gemini": (
49
+ "Gemini context caching thresholds vary by model/platform; verify with "
50
+ "provider cached-content usage fields and override the threshold when "
51
+ "your model differs."
52
+ ),
53
+ "generic": (
54
+ "Generic cache scoring uses a conservative threshold only; check your "
55
+ "provider documentation before claiming cache eligibility."
56
+ ),
57
+ }
58
+ ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
59
+ "tmp": Path("/private/tmp"),
60
+ "var": Path("/private/var"),
61
+ }
62
+ MAX_JSON_PATH_SEGMENT_CHARS = 64
63
+ MAX_JSON_WALK_NODES = 10_000
64
+ MAX_JSON_WALK_DEPTH = 64
65
+ MAX_JSON_SHAPE_WARNINGS = 200
66
+ SAFE_JSON_PATH_SEGMENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_-]{0,63}$")
67
+ DYNAMIC_JSON_KEY_RE = re.compile(r"(?i)(request|trace|nonce|random|timestamp|created[_-]?at|updated[_-]?at|date)")
68
+ SENSITIVE_JSON_KEY_RE = re.compile(
69
+ r"(?i)(authorization|api[_-]?key|apikey|token|secret|password|passwd|pwd|client[_-]?secret|credential|signature|sig|private[_-]?key|privatekey|ssh[_-]?key|sshkey)"
70
+ )
71
+
72
+ DYNAMIC_MARKERS: tuple[tuple[str, re.Pattern[str]], ...] = (
73
+ ("iso_timestamp", re.compile(r"\b20\d{2}-\d{2}-\d{2}[T ][0-2]\d:[0-5]\d(?::[0-5]\d(?:\.\d{1,9})?)?(?:Z|[+-][0-2]\d:?[0-5]\d)?\b")),
74
+ ("uuid", re.compile(r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}\b")),
75
+ ("unix_epoch_ms", re.compile(r"\b1[6-9]\d{11}\b")),
76
+ ("request_id_key", re.compile(r"(?i)\b(?:request[_-]?id|trace[_-]?id|nonce|random[_-]?(?:id|seed)?|timestamp|created[_-]?at|updated[_-]?at|date_now)\b")),
77
+ )
78
+
79
+
80
+ class CacheScoreError(ValueError):
81
+ """User-facing fail-closed error."""
82
+
83
+
84
+ def fail(message: str) -> NoReturn:
85
+ raise CacheScoreError(message)
86
+
87
+
88
+ def byte_len_text(text: str) -> int:
89
+ return len(text.encode("utf-8", errors="replace"))
90
+
91
+
92
+ def json_bytes(data: Any, *, indent: int | None = None) -> str:
93
+ return json.dumps(data, ensure_ascii=False, sort_keys=True, separators=(",", ":") if indent is None else None, indent=indent)
94
+
95
+
96
+ def json_path_child(path: str, key: object) -> str:
97
+ """Return a JSON warning path segment without echoing sensitive/dynamic keys."""
98
+ text = str(key)
99
+ if DYNAMIC_JSON_KEY_RE.search(text) or SENSITIVE_JSON_KEY_RE.search(text):
100
+ return f"{path}.[redacted-key]"
101
+ if SAFE_JSON_PATH_SEGMENT_RE.fullmatch(text):
102
+ return f"{path}.{text}"
103
+ if len(text) > MAX_JSON_PATH_SEGMENT_CHARS:
104
+ return f"{path}.[key:{len(text)} chars]"
105
+ return f"{path}.[key]"
106
+
107
+
108
+ def bounded_int(value: object, *, default: int, minimum: int, maximum: int, name: str) -> int:
109
+ try:
110
+ number = int(default if value is None else value)
111
+ except (TypeError, ValueError, OverflowError):
112
+ fail(f"{name} must be an integer")
113
+ if number < minimum:
114
+ fail(f"{name} must be >= {minimum}")
115
+ if number > maximum:
116
+ fail(f"{name} must be <= {maximum}")
117
+ return number
118
+
119
+
120
+ def bounded_float(
121
+ value: object,
122
+ *,
123
+ minimum: float,
124
+ maximum: float,
125
+ name: str,
126
+ ) -> float | None:
127
+ if value is None:
128
+ return None
129
+ if isinstance(value, bool):
130
+ fail(f"{name} must be a finite number")
131
+ try:
132
+ number = float(value)
133
+ except (TypeError, ValueError, OverflowError):
134
+ fail(f"{name} must be a finite number")
135
+ if not math.isfinite(number):
136
+ fail(f"{name} must be finite")
137
+ if number < minimum:
138
+ fail(f"{name} must be >= {minimum:g}")
139
+ if number > maximum:
140
+ fail(f"{name} must be <= {maximum:g}")
141
+ return number
142
+
143
+
144
+ def normalized_link_target(parent: Path, raw_target: str) -> Path:
145
+ target = Path(raw_target)
146
+ if not target.is_absolute():
147
+ target = parent / target
148
+ return Path(os.path.normpath(str(target)))
149
+
150
+
151
+ def normalize_allowed_first_absolute_symlink(path: Path) -> Path:
152
+ if not path.is_absolute() or len(path.parts) < 2:
153
+ return path
154
+ first = path.parts[1]
155
+ expected = ALLOWED_FIRST_ABSOLUTE_SYMLINKS.get(first)
156
+ if expected is None:
157
+ return path
158
+ link = Path(path.anchor) / first
159
+ try:
160
+ if not stat.S_ISLNK(os.lstat(link).st_mode):
161
+ return path
162
+ if normalized_link_target(Path(path.anchor), os.readlink(link)) != expected:
163
+ return path
164
+ except OSError:
165
+ return path
166
+ return expected.joinpath(*path.parts[2:])
167
+
168
+
169
+ def reject_symlink_components(path: Path) -> None:
170
+ path = normalize_allowed_first_absolute_symlink(path)
171
+ current = Path(path.anchor) if path.is_absolute() else Path()
172
+ for part in path.parts:
173
+ if path.is_absolute() and part == path.anchor:
174
+ continue
175
+ current = current / part
176
+ try:
177
+ st = os.lstat(current)
178
+ except FileNotFoundError:
179
+ return
180
+ if stat.S_ISLNK(st.st_mode):
181
+ fail(f"refusing path with symlink component: {current}")
182
+ if not stat.S_ISDIR(st.st_mode) and current != path:
183
+ fail(f"refusing path through non-directory component: {current}")
184
+
185
+
186
+ def read_limited_path(path: Path, max_bytes: int) -> str:
187
+ reject_symlink_components(path)
188
+ flags = os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0)
189
+ try:
190
+ fd = os.open(str(path), flags)
191
+ except OSError as exc:
192
+ fail(f"input read failed: {exc}")
193
+ try:
194
+ st = os.fstat(fd)
195
+ if not stat.S_ISREG(st.st_mode):
196
+ fail("input must be a regular file")
197
+ if st.st_size > max_bytes:
198
+ fail(f"input exceeds --max-input-bytes: {st.st_size} > {max_bytes}")
199
+ data = os.read(fd, max_bytes + 1)
200
+ finally:
201
+ os.close(fd)
202
+ if len(data) > max_bytes:
203
+ fail(f"input exceeds --max-input-bytes: > {max_bytes}")
204
+ return data.decode("utf-8", errors="replace")
205
+
206
+
207
+ def read_limited_stdin(max_bytes: int) -> str:
208
+ data = sys.stdin.buffer.read(max_bytes + 1)
209
+ if len(data) > max_bytes:
210
+ fail(f"input exceeds --max-input-bytes: > {max_bytes}")
211
+ return data.decode("utf-8", errors="replace")
212
+
213
+
214
+ def estimate_tokens(text: str) -> int:
215
+ if not text:
216
+ return 0
217
+ return int(math.ceil(len(text) / TOKEN_PROXY_CHARS_PER_TOKEN))
218
+
219
+
220
+ def first_dynamic_marker(text: str) -> tuple[int | None, str | None]:
221
+ best_offset: int | None = None
222
+ best_name: str | None = None
223
+ for name, pattern in DYNAMIC_MARKERS:
224
+ match = pattern.search(text)
225
+ if match and (best_offset is None or match.start() < best_offset):
226
+ best_offset = match.start()
227
+ best_name = name
228
+ return best_offset, best_name
229
+
230
+
231
+ def _walk_json(
232
+ value: Any,
233
+ path: str = "$",
234
+ *,
235
+ max_nodes: int = MAX_JSON_WALK_NODES,
236
+ max_depth: int = MAX_JSON_WALK_DEPTH,
237
+ max_warnings: int = MAX_JSON_SHAPE_WARNINGS,
238
+ ) -> list[dict[str, Any]]:
239
+ warnings: list[dict[str, Any]] = []
240
+ capped_nodes = False
241
+ capped_depth = False
242
+ capped_warnings = False
243
+
244
+ def add_warning(item: dict[str, Any]) -> None:
245
+ nonlocal capped_warnings
246
+ if len(warnings) < max_warnings:
247
+ warnings.append(item)
248
+ else:
249
+ capped_warnings = True
250
+
251
+ stack: list[tuple[Any, str, int]] = [(value, path, 0)]
252
+ visited = 0
253
+ while stack:
254
+ if visited >= max_nodes:
255
+ capped_nodes = True
256
+ break
257
+ current, current_path, depth = stack.pop()
258
+ visited += 1
259
+ if depth >= max_depth and isinstance(current, (dict, list)) and current:
260
+ capped_depth = True
261
+ continue
262
+ if isinstance(current, dict):
263
+ previous_key: str | None = None
264
+ keys_sorted = True
265
+ remaining_child_slots = max(0, max_nodes - visited - len(stack))
266
+ child_items: list[tuple[Any, str, int]] = []
267
+ for key, item in current.items():
268
+ text_key = str(key)
269
+ if previous_key is not None and text_key < previous_key:
270
+ keys_sorted = False
271
+ previous_key = text_key
272
+ child_path = json_path_child(current_path, key)
273
+ if DYNAMIC_JSON_KEY_RE.search(text_key):
274
+ add_warning({
275
+ "code": "dynamic_json_key",
276
+ "path": child_path,
277
+ "severity": "warn",
278
+ "message": "Dynamic-looking JSON key appears in the prompt/request; place dynamic values after the reusable prefix.",
279
+ })
280
+ if len(child_items) >= remaining_child_slots:
281
+ capped_nodes = True
282
+ break
283
+ child_items.append((item, child_path, depth + 1))
284
+ if not keys_sorted:
285
+ add_warning({
286
+ "code": "json_object_key_order_not_sorted",
287
+ "path": current_path,
288
+ "severity": "info",
289
+ "message": "Object keys are not in deterministic sorted order; keep generated JSON stable across runs.",
290
+ })
291
+ stack.extend(reversed(child_items))
292
+ elif isinstance(current, list):
293
+ if current_path.endswith(".tools") and all(isinstance(item, dict) and "name" in item for item in current):
294
+ names = [str(item.get("name")) for item in current]
295
+ if names != sorted(names):
296
+ add_warning({
297
+ "code": "tool_order_not_sorted",
298
+ "path": current_path,
299
+ "severity": "info",
300
+ "message": "Tool definitions are not sorted by name; deterministic ordering improves prefix reuse.",
301
+ })
302
+ remaining_child_slots = max(0, max_nodes - visited - len(stack))
303
+ child_items = []
304
+ for index, item in enumerate(current):
305
+ if len(child_items) >= remaining_child_slots:
306
+ capped_nodes = True
307
+ break
308
+ child_items.append((item, f"{current_path}[{index}]", depth + 1))
309
+ stack.extend(reversed(child_items))
310
+ if capped_nodes or capped_depth or capped_warnings:
311
+ cap_warning = {
312
+ "code": "json_walk_truncated",
313
+ "path": "$",
314
+ "severity": "warn",
315
+ "message": "JSON shape analysis was capped by node, depth, or warning limits; rerun on a narrower prompt fixture for complete linting.",
316
+ "nodes_visited": visited,
317
+ "max_nodes": max_nodes,
318
+ "max_depth": max_depth,
319
+ "max_warnings": max_warnings,
320
+ }
321
+ if max_warnings <= 0:
322
+ return warnings
323
+ if len(warnings) < max_warnings:
324
+ warnings.append(cap_warning)
325
+ elif warnings:
326
+ warnings[-1] = cap_warning
327
+ return warnings
328
+
329
+
330
+ def json_shape_warnings(text: str) -> tuple[str, list[dict[str, Any]]]:
331
+ try:
332
+ data = json.loads(text)
333
+ except json.JSONDecodeError:
334
+ return "text", []
335
+ if not isinstance(data, (dict, list)):
336
+ return "json-scalar", []
337
+ warnings = _walk_json(data)
338
+ canonical = json_bytes(data, indent=2) + "\n"
339
+ if canonical != text:
340
+ warnings.append({
341
+ "code": "json_not_canonical",
342
+ "path": "$",
343
+ "severity": "info",
344
+ "message": "JSON input is parseable but not canonical sort-key formatting; generated prompt JSON should be byte-stable.",
345
+ })
346
+ return "json", warnings
347
+
348
+
349
+ def read_premium_relative_savings(reuses: int, *, write_multiplier: float, read_multiplier: float) -> float:
350
+ return (1.0 - write_multiplier) + (reuses * (1.0 - read_multiplier))
351
+
352
+
353
+ def max_profitable_read_premium_reuses(*, write_multiplier: float, read_multiplier: float) -> int:
354
+ """Return the largest reuse count with strictly positive relative savings."""
355
+ candidate = max(0, int(math.floor((1.0 - write_multiplier) / (read_multiplier - 1.0))))
356
+ while candidate > 0 and read_premium_relative_savings(
357
+ candidate,
358
+ write_multiplier=write_multiplier,
359
+ read_multiplier=read_multiplier,
360
+ ) <= SAVINGS_EPSILON:
361
+ candidate -= 1
362
+ while read_premium_relative_savings(
363
+ candidate + 1,
364
+ write_multiplier=write_multiplier,
365
+ read_multiplier=read_multiplier,
366
+ ) > SAVINGS_EPSILON:
367
+ candidate += 1
368
+ return candidate
369
+
370
+
371
+ def build_amortization_report(
372
+ *,
373
+ eligible: bool,
374
+ prefix_tokens: int,
375
+ expected_reuses: int,
376
+ cache_write_multiplier: float | None,
377
+ cache_read_multiplier: float | None,
378
+ ) -> dict[str, Any]:
379
+ """Return advisory cache amortization math using user-supplied multipliers.
380
+
381
+ ``expected_reuses`` means future cache reads after the initial cache write.
382
+ Multipliers are relative to uncached prefix input cost = 1.0. Provider
383
+ pricing/cache policies change, so ContextGuard intentionally does not ship
384
+ provider-specific multiplier defaults.
385
+ """
386
+ supplied = cache_write_multiplier is not None and cache_read_multiplier is not None
387
+ break_even_reuses: int | None = None
388
+ max_profitable_reuses: int | None = None
389
+ expected_uncached_relative_cost: float | None = None
390
+ expected_cached_relative_cost: float | None = None
391
+ expected_relative_savings: float | None = None
392
+ status = "multipliers_not_supplied"
393
+ risk = "unknown"
394
+
395
+ if not eligible:
396
+ status = "not_cacheable"
397
+ risk = "high"
398
+ elif not supplied:
399
+ status = "multipliers_not_supplied"
400
+ risk = "unknown"
401
+ else:
402
+ expected_uncached_relative_cost = 1.0 + expected_reuses
403
+ expected_cached_relative_cost = cache_write_multiplier + (expected_reuses * cache_read_multiplier)
404
+ expected_relative_savings = expected_uncached_relative_cost - expected_cached_relative_cost
405
+ if cache_read_multiplier < 1.0:
406
+ if cache_write_multiplier <= 1.0:
407
+ break_even_reuses = 0
408
+ else:
409
+ break_even_reuses = int(math.ceil((cache_write_multiplier - 1.0) / (1.0 - cache_read_multiplier)))
410
+ if expected_reuses >= break_even_reuses:
411
+ status = "already_break_even_on_write" if break_even_reuses == 0 else "amortizes_with_expected_reuses"
412
+ risk = "low"
413
+ elif expected_reuses > 0:
414
+ status = "not_enough_expected_reuses"
415
+ risk = "medium"
416
+ else:
417
+ status = "not_enough_expected_reuses"
418
+ risk = "high"
419
+ elif cache_read_multiplier == 1.0 and cache_write_multiplier <= 1.0:
420
+ break_even_reuses = 0
421
+ status = "already_break_even_on_write"
422
+ risk = "low"
423
+ elif cache_read_multiplier > 1.0:
424
+ if cache_write_multiplier < 1.0:
425
+ max_profitable_reuses = max_profitable_read_premium_reuses(
426
+ write_multiplier=cache_write_multiplier,
427
+ read_multiplier=cache_read_multiplier,
428
+ )
429
+ if expected_relative_savings < -SAVINGS_EPSILON:
430
+ status = "no_read_discount"
431
+ risk = "high"
432
+ elif expected_reuses == 0:
433
+ if expected_relative_savings > SAVINGS_EPSILON:
434
+ status = "write_discount_only_no_expected_reads"
435
+ risk = "low"
436
+ else:
437
+ status = "break_even_only_no_expected_reads"
438
+ risk = "medium"
439
+ elif abs(expected_relative_savings) <= SAVINGS_EPSILON:
440
+ status = "break_even_only_with_limited_reuses"
441
+ risk = "medium"
442
+ else:
443
+ status = "positive_only_with_limited_reuses"
444
+ risk = "medium"
445
+ else:
446
+ status = "no_read_discount"
447
+ risk = "high"
448
+
449
+ return {
450
+ "expected_reuses": expected_reuses,
451
+ "expected_reuses_semantics": "future_cache_reads_after_initial_write",
452
+ "cacheable_prefix_tokens": prefix_tokens,
453
+ "break_even_reuses": break_even_reuses,
454
+ "max_profitable_reuses": max_profitable_reuses,
455
+ "status": status,
456
+ "risk": risk,
457
+ "cache_write_multiplier": cache_write_multiplier,
458
+ "cache_read_multiplier": cache_read_multiplier,
459
+ "expected_uncached_relative_cost": expected_uncached_relative_cost,
460
+ "expected_cached_relative_cost": expected_cached_relative_cost,
461
+ "expected_relative_savings": expected_relative_savings,
462
+ "multiplier_baseline": "uncached_prefix_input_cost_equals_1.0",
463
+ "user_supplied_multipliers": supplied,
464
+ "formula": "expected_cached=write_multiplier + expected_reuses*read_multiplier; expected_uncached=1 + expected_reuses; break_even=ceil((write_multiplier - 1.0)/(1.0-read_multiplier)) only when read_multiplier<1; max_profitable_reuses is the largest integer reuse count with expected_uncached-expected_cached > 0, only when read_multiplier>1 and write_multiplier<1",
465
+ "claim_boundary": {
466
+ "advisory_only": True,
467
+ "provider_pricing_defaults_included": False,
468
+ "provider_measured_cache_hit": False,
469
+ "hosted_api_token_or_cost_savings_claim_allowed": False,
470
+ "requires_user_supplied_or_provider_documented_multipliers": True,
471
+ },
472
+ }
473
+
474
+
475
+ def score_prompt(
476
+ text: str,
477
+ *,
478
+ provider: str,
479
+ minimum_cacheable_tokens: int,
480
+ expected_reuses: int = DEFAULT_EXPECTED_REUSES,
481
+ cache_write_multiplier: float | None = None,
482
+ cache_read_multiplier: float | None = None,
483
+ ) -> dict[str, Any]:
484
+ prompt_kind, shape_warnings = json_shape_warnings(text)
485
+ dynamic_offset, dynamic_marker = first_dynamic_marker(text)
486
+ prefix_text = text if dynamic_offset is None else text[:dynamic_offset]
487
+ estimated = estimate_tokens(text)
488
+ prefix_estimated = estimate_tokens(prefix_text)
489
+ total_chars = len(text)
490
+ static_ratio = 1.0 if total_chars == 0 else len(prefix_text) / total_chars
491
+ warnings = list(shape_warnings)
492
+ if dynamic_offset is not None:
493
+ warnings.append({
494
+ "code": "dynamic_marker_in_prompt",
495
+ "severity": "warn",
496
+ "message": "Dynamic-looking content appears before the end of the prompt; move timestamps/request IDs/user-specific values later.",
497
+ "offset": dynamic_offset,
498
+ "marker": dynamic_marker,
499
+ })
500
+ if prefix_estimated < minimum_cacheable_tokens:
501
+ warnings.append({
502
+ "code": "below_minimum_cacheable_tokens",
503
+ "severity": "warn",
504
+ "message": "Static prefix token proxy is below the selected provider threshold.",
505
+ })
506
+ if provider == "anthropic" and "cache_control" not in text:
507
+ warnings.append({
508
+ "code": "anthropic_cache_control_not_detected",
509
+ "severity": "info",
510
+ "message": "Anthropic caching usually requires cache_control around the reusable prefix.",
511
+ })
512
+
513
+ eligible = prefix_estimated >= minimum_cacheable_tokens
514
+ return {
515
+ "tool": TOOL_NAME,
516
+ "schema_version": SCHEMA_VERSION,
517
+ "provider": provider,
518
+ "prompt_kind": prompt_kind,
519
+ "minimum_cacheable_tokens": minimum_cacheable_tokens,
520
+ "eligible": eligible,
521
+ "estimated_tokens": estimated,
522
+ "cacheable_prefix_tokens": prefix_estimated,
523
+ "token_estimate": {
524
+ "method": "char4_proxy",
525
+ "chars_per_token": TOKEN_PROXY_CHARS_PER_TOKEN,
526
+ "estimated_tokens": estimated,
527
+ "cacheable_prefix_tokens": prefix_estimated,
528
+ "label": "provider_tokenizer_free_proxy_not_billed_tokens",
529
+ },
530
+ "input_chars": total_chars,
531
+ "cacheable_prefix_chars": len(prefix_text),
532
+ "first_dynamic_offset": dynamic_offset,
533
+ "first_dynamic_marker": dynamic_marker,
534
+ "static_prefix_ratio": round(static_ratio, 6),
535
+ "warnings": warnings,
536
+ "provider_caveat": PROVIDER_CAVEATS[provider],
537
+ "amortization": build_amortization_report(
538
+ eligible=eligible,
539
+ prefix_tokens=prefix_estimated,
540
+ expected_reuses=expected_reuses,
541
+ cache_write_multiplier=cache_write_multiplier,
542
+ cache_read_multiplier=cache_read_multiplier,
543
+ ),
544
+ "raw_prompt_stored": False,
545
+ "claim_boundary": {
546
+ "advisory_only": True,
547
+ "provider_measured_cache_hit": False,
548
+ "hosted_api_token_or_cost_savings_claim_allowed": False,
549
+ "requires_provider_usage_fields_for_claims": True,
550
+ "token_estimate_is_provider_tokenizer_free_proxy": True,
551
+ },
552
+ }
553
+
554
+
555
+ def render_text(report: dict[str, Any]) -> str:
556
+ status = "eligible" if report.get("eligible") else "not eligible"
557
+ warnings = report.get("warnings") if isinstance(report.get("warnings"), list) else []
558
+ warning_codes = ", ".join(str(item.get("code")) for item in warnings if isinstance(item, dict)) or "none"
559
+ amortization = report.get("amortization") if isinstance(report.get("amortization"), dict) else {}
560
+ return (
561
+ f"{TOOL_NAME}: {status} for {report['provider']} "
562
+ f"(static_prefix≈{report['cacheable_prefix_tokens']} char/4 tokens, "
563
+ f"minimum={report['minimum_cacheable_tokens']})\n"
564
+ f"warnings: {warning_codes}\n"
565
+ f"amortization: {amortization.get('status', 'unknown')} "
566
+ f"(risk={amortization.get('risk', 'unknown')}, "
567
+ f"break_even_reuses={amortization.get('break_even_reuses')}, "
568
+ f"max_profitable_reuses={amortization.get('max_profitable_reuses')})\n"
569
+ "claim boundary: advisory static lint only; not a measured provider cache hit or cost saving.\n"
570
+ )
571
+
572
+
573
+ def build_parser() -> argparse.ArgumentParser:
574
+ parser = argparse.ArgumentParser(
575
+ description=(
576
+ "Static prompt cacheability lint. No provider calls, no pricing ledger, "
577
+ "and no measured cache-hit claims."
578
+ )
579
+ )
580
+ parser.add_argument("--input", help="prompt/request text or JSON path; stdin is used when omitted")
581
+ parser.add_argument("--provider", choices=sorted(PROVIDER_MINIMUM_CACHEABLE_TOKENS), default="generic")
582
+ parser.add_argument(
583
+ "--minimum-cacheable-tokens",
584
+ default=None,
585
+ help="override provider threshold for model/platform-specific cache minimums",
586
+ )
587
+ parser.add_argument("--max-input-bytes", default=DEFAULT_MAX_INPUT_BYTES, help=f"maximum input bytes (default: {DEFAULT_MAX_INPUT_BYTES})")
588
+ parser.add_argument(
589
+ "--expected-reuses",
590
+ default=DEFAULT_EXPECTED_REUSES,
591
+ help=(
592
+ "future cache reads expected after the initial write; advisory only "
593
+ f"(default: {DEFAULT_EXPECTED_REUSES})"
594
+ ),
595
+ )
596
+ parser.add_argument(
597
+ "--cache-write-multiplier",
598
+ default=None,
599
+ help="optional user-supplied cache write multiplier relative to uncached prefix input cost=1.0",
600
+ )
601
+ parser.add_argument(
602
+ "--cache-read-multiplier",
603
+ default=None,
604
+ help="optional user-supplied cache read multiplier relative to uncached prefix input cost=1.0",
605
+ )
606
+ parser.add_argument("--json", action="store_true", help="emit stable JSON")
607
+ return parser
608
+
609
+
610
+ def main(argv: list[str] | None = None) -> int:
611
+ parser = build_parser()
612
+ args = parser.parse_args(argv)
613
+ try:
614
+ max_input_bytes = bounded_int(args.max_input_bytes, default=DEFAULT_MAX_INPUT_BYTES, minimum=1, maximum=100_000_000, name="--max-input-bytes")
615
+ provider = str(args.provider)
616
+ default_minimum = PROVIDER_MINIMUM_CACHEABLE_TOKENS[provider]
617
+ minimum = bounded_int(
618
+ args.minimum_cacheable_tokens,
619
+ default=default_minimum,
620
+ minimum=1,
621
+ maximum=10_000_000,
622
+ name="--minimum-cacheable-tokens",
623
+ )
624
+ expected_reuses = bounded_int(
625
+ args.expected_reuses,
626
+ default=DEFAULT_EXPECTED_REUSES,
627
+ minimum=0,
628
+ maximum=MAX_EXPECTED_REUSES,
629
+ name="--expected-reuses",
630
+ )
631
+ cache_write_multiplier = bounded_float(
632
+ args.cache_write_multiplier,
633
+ minimum=0.0,
634
+ maximum=MAX_CACHE_MULTIPLIER,
635
+ name="--cache-write-multiplier",
636
+ )
637
+ cache_read_multiplier = bounded_float(
638
+ args.cache_read_multiplier,
639
+ minimum=0.0,
640
+ maximum=MAX_CACHE_MULTIPLIER,
641
+ name="--cache-read-multiplier",
642
+ )
643
+ text = read_limited_path(Path(args.input), max_input_bytes) if args.input else read_limited_stdin(max_input_bytes)
644
+ report = score_prompt(
645
+ text,
646
+ provider=provider,
647
+ minimum_cacheable_tokens=minimum,
648
+ expected_reuses=expected_reuses,
649
+ cache_write_multiplier=cache_write_multiplier,
650
+ cache_read_multiplier=cache_read_multiplier,
651
+ )
652
+ if args.json:
653
+ sys.stdout.write(json_bytes(report, indent=2) + "\n")
654
+ else:
655
+ sys.stdout.write(render_text(report))
656
+ return 0
657
+ except CacheScoreError as exc:
658
+ print(f"{TOOL_NAME}: {exc}", file=sys.stderr)
659
+ return 1
660
+ except BrokenPipeError:
661
+ return 1
662
+
663
+
664
+ if __name__ == "__main__":
665
+ raise SystemExit(main())