@ictechgy/context-guard 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +9 -0
  2. package/README.ko.md +61 -32
  3. package/README.md +90 -22
  4. package/context-guard-kit/README.md +39 -26
  5. package/context-guard-kit/benchmark_runner.py +273 -8
  6. package/context-guard-kit/claude_transcript_cost_audit.py +325 -12
  7. package/context-guard-kit/context_compress.py +153 -1
  8. package/context-guard-kit/context_filter.py +446 -0
  9. package/context-guard-kit/context_guard_cli.py +3 -0
  10. package/context-guard-kit/context_guard_diet.py +677 -2
  11. package/context-guard-kit/context_pack.py +1694 -2
  12. package/context-guard-kit/cost_guard.py +1870 -0
  13. package/context-guard-kit/setup_wizard.py +820 -29
  14. package/context-guard-kit/trim_command_output.py +396 -45
  15. package/docs/benchmark-fixtures/learned-compression.tasks.example.json +24 -0
  16. package/docs/benchmark-fixtures/learned-compression.variants.example.json +10 -0
  17. package/docs/benchmark-fixtures/visual-ocr.tasks.example.json +24 -0
  18. package/docs/benchmark-fixtures/visual-ocr.variants.example.json +10 -0
  19. package/docs/benchmark-workflow-examples.md +40 -0
  20. package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +169 -0
  21. package/docs/benchmark-workflows/measured-token-workflow.example.json +170 -0
  22. package/docs/benchmark-workflows/provider-cache-telemetry.example.json +170 -0
  23. package/docs/cache-diagnostics-schema.md +75 -0
  24. package/docs/cache-diagnostics.example.json +116 -0
  25. package/docs/cache-diagnostics.schema.json +460 -0
  26. package/docs/distribution.md +4 -2
  27. package/docs/experimental-benchmark-fixtures.md +36 -0
  28. package/package.json +11 -2
  29. package/packaging/homebrew/context-guard.rb.template +3 -2
  30. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  31. package/plugins/context-guard/README.ko.md +21 -13
  32. package/plugins/context-guard/README.md +24 -10
  33. package/plugins/context-guard/bin/context-guard +3 -0
  34. package/plugins/context-guard/bin/context-guard-audit +325 -12
  35. package/plugins/context-guard/bin/context-guard-bench +273 -8
  36. package/plugins/context-guard/bin/context-guard-compress +153 -1
  37. package/plugins/context-guard/bin/context-guard-cost +1870 -0
  38. package/plugins/context-guard/bin/context-guard-diet +677 -2
  39. package/plugins/context-guard/bin/context-guard-filter +446 -0
  40. package/plugins/context-guard/bin/context-guard-pack +1694 -2
  41. package/plugins/context-guard/bin/context-guard-setup +820 -29
  42. package/plugins/context-guard/bin/context-guard-trim-output +396 -45
  43. package/plugins/context-guard/brief/README.md +10 -3
  44. package/plugins/context-guard/skills/optimize/SKILL.md +5 -2
  45. package/plugins/context-guard/skills/setup/SKILL.md +3 -1
@@ -45,8 +45,10 @@ TOKEN_TYPE_ALIASES = {
45
45
  COST_KEYS = ("total_cost_usd", "cost_usd", "costUSD")
46
46
  MODEL_KEYS = ("model", "model_id", "modelId")
47
47
  QUERY_SOURCE_KEYS = ("query_source", "querySource")
48
- FEASIBILITY_SCHEMA_VERSION = "contextguard.metric-feasibility.v1.1"
48
+ TIMESTAMP_KEYS = ("timestamp", "created_at", "createdAt", "time", "ts")
49
+ FEASIBILITY_SCHEMA_VERSION = "contextguard.metric-feasibility.v1.2"
49
50
  FEASIBILITY_PRODUCER = "context-guard-audit"
51
+ CACHE_DIAGNOSTICS_SCHEMA_VERSION = "contextguard.cache-diagnostics.v1"
50
52
  MAX_ERROR_EXAMPLES = 20
51
53
  JSON_PARSE_RECURSION_LIMIT = 10_000
52
54
  READ_CHUNK_BYTES = 64 * 1024
@@ -177,8 +179,11 @@ class UsageSummary:
177
179
  by_tool: Counter[str] = field(default_factory=Counter)
178
180
  token_field_presence: Counter[str] = field(default_factory=Counter)
179
181
  cost_field_count: int = 0
182
+ cache_record_timestamps: list[_dt.datetime] = field(default_factory=list)
183
+ positive_cache_record_timestamps: list[_dt.datetime] = field(default_factory=list)
180
184
  prompt_cache_audit: PromptCacheAudit = field(default_factory=PromptCacheAudit)
181
185
  cache_friendliness_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
186
+ cache_diagnostics_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
182
187
 
183
188
  @property
184
189
  def total_tokens(self) -> int:
@@ -295,6 +300,48 @@ def finite_nonnegative_number(value: Any, *, clamp_negative: bool) -> int | floa
295
300
  return None
296
301
 
297
302
 
303
+ def parse_timestamp_value(value: Any) -> _dt.datetime | None:
304
+ if isinstance(value, str):
305
+ text = value.strip()
306
+ if not text:
307
+ return None
308
+ try:
309
+ if text.endswith("Z"):
310
+ text = text[:-1] + "+00:00"
311
+ parsed = _dt.datetime.fromisoformat(text)
312
+ except ValueError:
313
+ return None
314
+ if parsed.tzinfo is None:
315
+ parsed = parsed.replace(tzinfo=_dt.timezone.utc)
316
+ return parsed.astimezone(_dt.timezone.utc)
317
+ metric = finite_nonnegative_number(value, clamp_negative=False)
318
+ if metric is None:
319
+ return None
320
+ seconds = float(metric) / 1000.0 if float(metric) > 10_000_000_000 else float(metric)
321
+ try:
322
+ return _dt.datetime.fromtimestamp(seconds, tz=_dt.timezone.utc)
323
+ except (OverflowError, OSError, ValueError):
324
+ return None
325
+
326
+
327
+ def record_timestamp(root: Any) -> _dt.datetime | None:
328
+ candidates: list[Any] = []
329
+ if isinstance(root, dict):
330
+ for key in TIMESTAMP_KEYS:
331
+ if key in root:
332
+ candidates.append(root.get(key))
333
+ message = root.get("message")
334
+ if isinstance(message, dict):
335
+ for key in TIMESTAMP_KEYS:
336
+ if key in message:
337
+ candidates.append(message.get(key))
338
+ for candidate in candidates:
339
+ parsed = parse_timestamp_value(candidate)
340
+ if parsed is not None:
341
+ return parsed
342
+ return None
343
+
344
+
298
345
  def normalize_token_bucket(raw: str) -> str:
299
346
  return TOKEN_TYPE_ALIASES.get(raw, raw)
300
347
 
@@ -667,11 +714,15 @@ def add_usage(
667
714
  ) -> RecordUsage:
668
715
  root_model = None
669
716
  root_query_source = None
717
+ parsed_timestamp = None
670
718
  if isinstance(root, dict):
671
719
  root_model = first_string(root, MODEL_KEYS)
672
720
  root_query_source = first_string(root, QUERY_SOURCE_KEYS)
721
+ parsed_timestamp = record_timestamp(root)
673
722
 
674
723
  record = RecordUsage()
724
+ cache_telemetry_present = False
725
+ positive_cache_telemetry_present = False
675
726
  summary.prompt_cache_audit.observe(root)
676
727
  for d in walk(root):
677
728
  local_tokens: Counter[str] = Counter()
@@ -695,6 +746,10 @@ def add_usage(
695
746
 
696
747
  for bucket in present_buckets:
697
748
  summary.token_field_presence[bucket] += 1
749
+ if "cache_read" in present_buckets or "cache_creation" in present_buckets:
750
+ cache_telemetry_present = True
751
+ if local_tokens.get("cache_read", 0) > 0 or local_tokens.get("cache_creation", 0) > 0:
752
+ positive_cache_telemetry_present = True
698
753
 
699
754
  if local_tokens:
700
755
  summary.tokens.update(local_tokens)
@@ -713,6 +768,10 @@ def add_usage(
713
768
  record.cost_usd += cost
714
769
  summary.cost_field_count += 1
715
770
  break
771
+ if parsed_timestamp is not None and cache_telemetry_present:
772
+ summary.cache_record_timestamps.append(parsed_timestamp)
773
+ if parsed_timestamp is not None and positive_cache_telemetry_present:
774
+ summary.positive_cache_record_timestamps.append(parsed_timestamp)
716
775
  commands, tools = collect_record_hints(root, show_commands=show_commands)
717
776
  record.commands = commands
718
777
  record.tools = tools
@@ -980,6 +1039,7 @@ def segment_position_stats(samples: list[PromptSegmentSample], attr: str, window
980
1039
  "stability": stability,
981
1040
  "volatile_share": 1.0 - stability,
982
1041
  "unique_hashes": len(counts),
1042
+ "sample_count": len(values),
983
1043
  })
984
1044
  return stats
985
1045
 
@@ -1143,6 +1203,201 @@ def cache_friendliness_for_summary(summary: UsageSummary) -> dict[str, Any]:
1143
1203
  return summary.cache_friendliness_cache
1144
1204
 
1145
1205
 
1206
+ def _cache_diagnostic_confidence(*, skipped: bool, samples: bool, has_cache: bool) -> str:
1207
+ if skipped:
1208
+ return "partial"
1209
+ if samples or has_cache:
1210
+ return "hypothesis"
1211
+ return "unavailable"
1212
+
1213
+
1214
+ def build_ttl_diagnostics(summary: UsageSummary, *, has_cache_any: bool, skipped: bool) -> dict[str, Any]:
1215
+ timestamped_cache_record_count = len(summary.cache_record_timestamps)
1216
+ timestamps = sorted(summary.positive_cache_record_timestamps)
1217
+ caveats = [
1218
+ "Timestamped cache telemetry records do not prove exact provider cache-prefix identity or provider cache TTL state.",
1219
+ "5-minute versus 1-hour TTL guidance is a local hypothesis unless corroborated with provider telemetry and repeated stable prefixes.",
1220
+ ]
1221
+ if len(timestamps) < 2:
1222
+ return {
1223
+ "status": "unavailable",
1224
+ "evidence": EVIDENCE_UNAVAILABLE,
1225
+ "confidence": "unavailable" if not skipped else "partial",
1226
+ "timestamped_cache_record_count": timestamped_cache_record_count,
1227
+ "positive_timestamped_cache_record_count": len(timestamps),
1228
+ "timestamped_cache_record_span_seconds": None,
1229
+ "candidate": None,
1230
+ "reason": (
1231
+ "Fewer than two positive timestamped cache telemetry records were observed, so TTL reuse intervals cannot be inferred."
1232
+ ),
1233
+ "interval_basis": "positive_timestamped_cache_records",
1234
+ "caveats": caveats,
1235
+ }
1236
+ interval = max(0, int((timestamps[-1] - timestamps[0]).total_seconds()))
1237
+ candidate = "within-5m" if interval <= 5 * 60 else ("between-5m-and-1h" if interval <= 60 * 60 else "beyond-1h")
1238
+ return {
1239
+ "status": "hypothesis" if has_cache_any else "unavailable",
1240
+ "evidence": EVIDENCE_INFERRED if has_cache_any else EVIDENCE_UNAVAILABLE,
1241
+ "confidence": "partial" if skipped else "hypothesis",
1242
+ "timestamped_cache_record_count": timestamped_cache_record_count,
1243
+ "positive_timestamped_cache_record_count": len(timestamps),
1244
+ "timestamped_cache_record_span_seconds": interval,
1245
+ "candidate": candidate,
1246
+ "reason": (
1247
+ "Positive timestamped cache telemetry records bound the local cache-observation span, but exact provider cache TTL reuse remains a hypothesis."
1248
+ ),
1249
+ "interval_basis": "positive_timestamped_cache_records",
1250
+ "caveats": caveats,
1251
+ }
1252
+
1253
+
1254
+ def build_cache_diagnostics(summary: UsageSummary) -> dict[str, Any]:
1255
+ if summary.cache_diagnostics_cache is not None:
1256
+ return summary.cache_diagnostics_cache
1257
+
1258
+ availability = build_metric_availability(summary)
1259
+ cache_availability = availability["cache"]
1260
+ cache_friendliness = cache_friendliness_for_summary(summary)
1261
+ skipped = bool(
1262
+ summary.skipped_files
1263
+ or summary.skipped_records
1264
+ or summary.parse_errors
1265
+ or cache_friendliness.get("skipped_evidence")
1266
+ )
1267
+ has_cache_read = summary.token_field_presence.get("cache_read", 0) > 0
1268
+ has_cache_creation = summary.token_field_presence.get("cache_creation", 0) > 0
1269
+ has_cache_any = has_cache_read or has_cache_creation
1270
+ cache_read = summary.tokens.get("cache_read", 0)
1271
+ cache_creation = summary.tokens.get("cache_creation", 0)
1272
+ samples = summary.prompt_cache_audit.samples
1273
+ prefix_stats = segment_position_stats(samples, "prefix_hashes", PROMPT_AUDIT_PREFIX_SEGMENTS) if samples else []
1274
+ confidence = _cache_diagnostic_confidence(skipped=skipped, samples=bool(samples), has_cache=has_cache_any)
1275
+
1276
+ stable_prefix_candidates: list[dict[str, Any]] = []
1277
+ for stat_item in sorted(prefix_stats, key=lambda item: (-item["stability"], item["position"]))[:PROMPT_AUDIT_PREFIX_SEGMENTS]:
1278
+ if stat_item["stability"] < 0.66:
1279
+ continue
1280
+ stable_prefix_candidates.append({
1281
+ "position": stat_item["position"],
1282
+ "stability": round(float(stat_item["stability"]), 4),
1283
+ "volatile_share": round(float(stat_item["volatile_share"]), 4),
1284
+ "unique_hashes": stat_item["unique_hashes"],
1285
+ "sample_count": stat_item["sample_count"],
1286
+ "evidence": EVIDENCE_INFERRED,
1287
+ "confidence": "partial" if cache_friendliness.get("confidence") == "partial" else "hypothesis",
1288
+ "action": "Keep stable instructions, policies, and reusable context before run-specific evidence.",
1289
+ })
1290
+
1291
+ dynamic_prefix_breakers: list[dict[str, Any]] = []
1292
+ breaker_trigger = "prefix_position"
1293
+ for finding in cache_friendliness.get("findings", []):
1294
+ if isinstance(finding, dict) and finding.get("id") == "volatile-content-near-prefix":
1295
+ evidence = finding.get("evidence") if isinstance(finding.get("evidence"), dict) else {}
1296
+ breaker_trigger = str(evidence.get("trigger") or breaker_trigger)
1297
+ break
1298
+ for stat_item in sorted(prefix_stats, key=lambda item: (-item["volatile_share"], item["position"])):
1299
+ if stat_item["volatile_share"] < 0.34:
1300
+ continue
1301
+ dynamic_prefix_breakers.append({
1302
+ "position": stat_item["position"],
1303
+ "trigger": breaker_trigger,
1304
+ "volatile_share": round(float(stat_item["volatile_share"]), 4),
1305
+ "stability": round(float(stat_item["stability"]), 4),
1306
+ "unique_hashes": stat_item["unique_hashes"],
1307
+ "sample_count": stat_item["sample_count"],
1308
+ "evidence": EVIDENCE_INFERRED,
1309
+ "confidence": "partial" if cache_friendliness.get("confidence") == "partial" else "hypothesis",
1310
+ "heuristic": True,
1311
+ "action": "Move diffs, logs, timestamps, and command output after stable reusable prompt prefixes.",
1312
+ })
1313
+ dynamic_prefix_breakers = dynamic_prefix_breakers[:PROMPT_AUDIT_MAX_FINDINGS]
1314
+
1315
+ hypotheses: list[dict[str, Any]] = []
1316
+ if not has_cache_any:
1317
+ hypotheses.append({
1318
+ "id": "cache-fields-missing",
1319
+ "evidence": EVIDENCE_UNAVAILABLE,
1320
+ "confidence": "unavailable" if not skipped else "partial",
1321
+ "reason": "No cache_read/cache_creation transcript fields were observed.",
1322
+ "action": "Hide cache-read UI or label cache telemetry as missing for this scan.",
1323
+ })
1324
+ if has_cache_creation and cache_creation > 0 and (not has_cache_read or cache_read == 0):
1325
+ hypotheses.append({
1326
+ "id": "cache-cold-or-prefix-changed",
1327
+ "evidence": EVIDENCE_INFERRED,
1328
+ "confidence": "hypothesis",
1329
+ "reason": "Cache creation tokens were observed without corresponding cache read tokens.",
1330
+ "action": "Check whether stable instructions changed or whether the session was cache-cold.",
1331
+ })
1332
+ if has_cache_creation and cache_creation >= 10_000 and cache_read > 0 and summary.cache_amortization < 0.5:
1333
+ hypotheses.append({
1334
+ "id": "cache-read-low-vs-write",
1335
+ "evidence": EVIDENCE_INFERRED,
1336
+ "confidence": "hypothesis",
1337
+ "reason": "Cache reads are small relative to observed cache writes.",
1338
+ "action": "Keep reusable prompt prefixes stable across turns before changing large context blocks.",
1339
+ })
1340
+ if dynamic_prefix_breakers:
1341
+ hypotheses.append({
1342
+ "id": "volatile-prefix-breakers",
1343
+ "evidence": EVIDENCE_INFERRED,
1344
+ "confidence": dynamic_prefix_breakers[0]["confidence"],
1345
+ "reason": "Redacted prompt segment hashes show volatile content near the prefix window.",
1346
+ "action": dynamic_prefix_breakers[0]["action"],
1347
+ })
1348
+ if skipped:
1349
+ hypotheses.append({
1350
+ "id": "partial-transcript-scan",
1351
+ "evidence": EVIDENCE_INFERRED,
1352
+ "confidence": "partial",
1353
+ "reason": "Some transcript files, records, or prompt structures were skipped/capped.",
1354
+ "action": "Rerun against narrower transcript paths or higher safe scan limits before making decisions.",
1355
+ })
1356
+
1357
+ ttl = build_ttl_diagnostics(summary, has_cache_any=has_cache_any, skipped=skipped)
1358
+ headroom = build_headroom_availability(summary)
1359
+ headroom_diagnostics = {
1360
+ **headroom,
1361
+ "historical_total_tokens_are_not_headroom": True,
1362
+ "required_observation": "live_statusline_snapshot",
1363
+ }
1364
+ status = "missing"
1365
+ if has_cache_any or samples:
1366
+ status = "partial" if skipped or cache_friendliness.get("status") == "partial" else "available"
1367
+ elif skipped:
1368
+ status = "partial"
1369
+
1370
+ diagnostics = {
1371
+ "schema_version": CACHE_DIAGNOSTICS_SCHEMA_VERSION,
1372
+ "status": status,
1373
+ "confidence": confidence,
1374
+ "evidence": EVIDENCE_INFERRED if (has_cache_any or samples) else EVIDENCE_UNAVAILABLE,
1375
+ "heuristic": True,
1376
+ "observations": {
1377
+ "cache_fields": cache_availability,
1378
+ "cache_read_tokens": cache_read,
1379
+ "cache_creation_tokens": cache_creation,
1380
+ },
1381
+ "derived_ratios": cache_availability["derived"],
1382
+ "stable_prefix_candidates": stable_prefix_candidates,
1383
+ "dynamic_prefix_breakers": dynamic_prefix_breakers,
1384
+ "cache_miss_hypotheses": hypotheses[:PROMPT_AUDIT_MAX_FINDINGS],
1385
+ "ttl_diagnostics": ttl,
1386
+ "headroom_diagnostics": headroom_diagnostics,
1387
+ "caveats": [
1388
+ "Cache diagnostics are local transcript heuristics and do not prove exact provider cache-prefix state.",
1389
+ "Provider cache read/write fields are diagnostic telemetry and do not prove ContextGuard-caused token reduction.",
1390
+ "Stable-prefix and breaker positions come from bounded redacted segment hashes, not raw prompt text.",
1391
+ ],
1392
+ }
1393
+ summary.cache_diagnostics_cache = diagnostics
1394
+ return diagnostics
1395
+
1396
+
1397
+ def cache_diagnostics_for_summary(summary: UsageSummary) -> dict[str, Any]:
1398
+ return build_cache_diagnostics(summary)
1399
+
1400
+
1146
1401
  def build_metric_caveats(summary: UsageSummary) -> list[str]:
1147
1402
  caveats = [
1148
1403
  "Values are observed from local Claude Code transcript JSON/JSONL fields and are not official billing records.",
@@ -1177,6 +1432,7 @@ def feasibility_json(
1177
1432
  stable_tokens = stable_token_counter(summary.tokens)
1178
1433
  stable_total_tokens = sum(stable_tokens.values())
1179
1434
  cache_friendliness = cache_friendliness_for_summary(summary)
1435
+ cache_diagnostics = cache_diagnostics_for_summary(summary)
1180
1436
  return {
1181
1437
  "schema_version": FEASIBILITY_SCHEMA_VERSION,
1182
1438
  "producer": FEASIBILITY_PRODUCER,
@@ -1195,6 +1451,7 @@ def feasibility_json(
1195
1451
  "context_availability",
1196
1452
  "headroom_availability",
1197
1453
  "cache_friendliness",
1454
+ "cache_diagnostics",
1198
1455
  "totals",
1199
1456
  ],
1200
1457
  "diagnostic_fields": ["summary"],
@@ -1222,6 +1479,7 @@ def feasibility_json(
1222
1479
  "context_availability": availability["context"],
1223
1480
  "headroom_availability": availability["headroom"],
1224
1481
  "cache_friendliness": cache_friendliness,
1482
+ "cache_diagnostics": cache_diagnostics,
1225
1483
  "totals": {
1226
1484
  "total_tokens": stable_total_tokens,
1227
1485
  "tokens": stable_tokens,
@@ -1272,6 +1530,7 @@ def build_recommendations(summary: UsageSummary, top: int) -> list[dict[str, Any
1272
1530
  output_ratio = output_tokens / total
1273
1531
  input_ratio = input_tokens / total
1274
1532
  cache_friendliness = cache_friendliness_for_summary(summary)
1533
+ cache_diagnostics = cache_diagnostics_for_summary(summary)
1275
1534
  for finding in cache_friendliness.get("findings", []):
1276
1535
  if isinstance(finding, dict) and finding.get("id") == "volatile-content-near-prefix":
1277
1536
  evidence = dict(finding.get("evidence") or {})
@@ -1331,25 +1590,57 @@ def build_recommendations(summary: UsageSummary, top: int) -> list[dict[str, Any
1331
1590
  },
1332
1591
  ))
1333
1592
  if cache_creation >= 50_000 and 1.0 <= summary.cache_amortization < 5.0:
1593
+ ttl = cache_diagnostics.get("ttl_diagnostics") or {}
1594
+ ttl_status = str(ttl.get("status") or "unavailable")
1595
+ ttl_confidence = str(ttl.get("confidence") or "unavailable")
1596
+ ttl_candidate = ttl.get("candidate")
1597
+ ttl_span = ttl.get("timestamped_cache_record_span_seconds")
1598
+ if ttl_status == "hypothesis" and ttl_candidate in {"between-5m-and-1h", "beyond-1h"}:
1599
+ ttl_reason = (
1600
+ f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
1601
+ f"{cache_creation} write tokens; timestamped cache telemetry spans {ttl_span} seconds "
1602
+ f"({ttl_candidate})."
1603
+ )
1604
+ ttl_action = (
1605
+ "Evaluate a longer provider prompt-cache TTL only after confirming the same stable prefix "
1606
+ "pattern in representative sessions and rechecking current provider TTL/pricing documentation."
1607
+ )
1608
+ elif ttl_status == "hypothesis":
1609
+ ttl_reason = (
1610
+ f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
1611
+ f"{cache_creation} write tokens, but timestamped cache telemetry currently points to {ttl_candidate}."
1612
+ )
1613
+ ttl_action = (
1614
+ "Keep collecting timestamped cache read/write evidence; do not enable a longer TTL solely from this scan."
1615
+ )
1616
+ else:
1617
+ ttl_reason = (
1618
+ f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
1619
+ f"{cache_creation} write tokens, but TTL diagnostics are {ttl_status} because this scan lacks "
1620
+ "at least two timestamped cache telemetry records."
1621
+ )
1622
+ ttl_action = (
1623
+ "Collect or inspect timestamped cache read/write evidence before evaluating a longer provider "
1624
+ "prompt-cache TTL; historical token totals alone are not TTL evidence."
1625
+ )
1334
1626
  recs.append(recommendation(
1335
1627
  "evaluate-1h-ttl-cache",
1336
- "Cache writes are large; evaluate the 1h TTL cache beta",
1337
- (
1338
- f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
1339
- f"{cache_creation} write tokens; absolute write cost is high and reuse is moderate. "
1340
- "This metric does not inspect timestamps, so confirm reuse spans >5min in a sample "
1341
- "session before enabling 1h TTL."
1342
- ),
1343
- (
1344
- "If sessions reuse the same prefix beyond the 5-minute default TTL, evaluate the 1h prompt cache "
1345
- "beta (write 2x, read 0.1x). It pays off when reuse spans the gap between two 5-min cache writes."
1346
- ),
1628
+ "Cache writes are large; validate TTL evidence before longer TTL",
1629
+ ttl_reason,
1630
+ ttl_action,
1347
1631
  "P2",
1348
1632
  {
1349
1633
  "cache_creation": cache_creation,
1350
1634
  "cache_read": cache_read,
1351
1635
  "cache_amortization": round(summary.cache_amortization, 4),
1352
1636
  "cache_hit_rate": round(summary.cache_hit_rate, 4),
1637
+ "ttl_status": ttl_status,
1638
+ "ttl_evidence": ttl.get("evidence") or EVIDENCE_UNAVAILABLE,
1639
+ "ttl_confidence": ttl_confidence,
1640
+ "ttl_candidate": ttl_candidate,
1641
+ "timestamped_cache_record_count": ttl.get("timestamped_cache_record_count"),
1642
+ "positive_timestamped_cache_record_count": ttl.get("positive_timestamped_cache_record_count"),
1643
+ "timestamped_cache_record_span_seconds": ttl_span,
1353
1644
  "heuristic": True,
1354
1645
  },
1355
1646
  ))
@@ -1462,6 +1753,7 @@ def summary_json(
1462
1753
  "top_commands": counter_json(summary.by_command, top),
1463
1754
  "top_tools": counter_json(summary.by_tool, top),
1464
1755
  "cache_friendliness": cache_friendliness_for_summary(summary),
1756
+ "cache_diagnostics": cache_diagnostics_for_summary(summary),
1465
1757
  }
1466
1758
  if include_recommendations:
1467
1759
  data["recommendations"] = build_recommendations(summary, top)
@@ -1574,6 +1866,27 @@ def main() -> int:
1574
1866
  if isinstance(finding, dict):
1575
1867
  print(f" finding [{finding.get('severity')}] {finding.get('id')}: {finding.get('title')}")
1576
1868
 
1869
+ cache_diagnostics = cache_diagnostics_for_summary(summary)
1870
+ print("\nCache diagnostics")
1871
+ print(f" status {cache_diagnostics.get('status')}")
1872
+ print(f" confidence {cache_diagnostics.get('confidence')}")
1873
+ hypotheses = cache_diagnostics.get("cache_miss_hypotheses") or []
1874
+ if hypotheses:
1875
+ first = hypotheses[0]
1876
+ print(f" top_hypothesis {first.get('id')} ({first.get('confidence')})")
1877
+ stable_candidates = cache_diagnostics.get("stable_prefix_candidates") or []
1878
+ if stable_candidates:
1879
+ first = stable_candidates[0]
1880
+ print(f" stable_prefix_candidate position={first.get('position')} stability={first.get('stability')}")
1881
+ breakers = cache_diagnostics.get("dynamic_prefix_breakers") or []
1882
+ if breakers:
1883
+ first = breakers[0]
1884
+ print(f" dynamic_prefix_breaker position={first.get('position')} volatile_share={first.get('volatile_share')}")
1885
+ ttl = cache_diagnostics.get("ttl_diagnostics") or {}
1886
+ print(f" ttl_status {ttl.get('status')} ({ttl.get('confidence')})")
1887
+ headroom = cache_diagnostics.get("headroom_diagnostics") or {}
1888
+ print(f" headroom_status {headroom.get('status')} ({headroom.get('evidence')})")
1889
+
1577
1890
  model_totals = Counter({model: sum(tokens.values()) for model, tokens in summary.by_model.items()})
1578
1891
  print_counter("By model", model_totals, args.top)
1579
1892