@ictechgy/context-guard 0.4.1 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.ko.md +62 -33
  3. package/README.md +91 -23
  4. package/context-guard-kit/README.md +39 -26
  5. package/context-guard-kit/benchmark_runner.py +273 -8
  6. package/context-guard-kit/claude_transcript_cost_audit.py +597 -12
  7. package/context-guard-kit/context_compress.py +153 -1
  8. package/context-guard-kit/context_filter.py +446 -0
  9. package/context-guard-kit/context_guard_cli.py +3 -0
  10. package/context-guard-kit/context_guard_diet.py +677 -2
  11. package/context-guard-kit/context_pack.py +1694 -2
  12. package/context-guard-kit/cost_guard.py +1870 -0
  13. package/context-guard-kit/setup_wizard.py +820 -29
  14. package/context-guard-kit/trim_command_output.py +396 -45
  15. package/docs/benchmark-fixtures/learned-compression.tasks.example.json +24 -0
  16. package/docs/benchmark-fixtures/learned-compression.variants.example.json +10 -0
  17. package/docs/benchmark-fixtures/visual-ocr.tasks.example.json +24 -0
  18. package/docs/benchmark-fixtures/visual-ocr.variants.example.json +10 -0
  19. package/docs/benchmark-workflow-examples.md +40 -0
  20. package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +169 -0
  21. package/docs/benchmark-workflows/measured-token-workflow.example.json +170 -0
  22. package/docs/benchmark-workflows/provider-cache-telemetry.example.json +170 -0
  23. package/docs/cache-diagnostics-schema.md +96 -0
  24. package/docs/cache-diagnostics.example.json +116 -0
  25. package/docs/cache-diagnostics.schema.json +460 -0
  26. package/docs/distribution.md +4 -2
  27. package/docs/experimental-benchmark-fixtures.md +36 -0
  28. package/package.json +11 -2
  29. package/packaging/homebrew/context-guard.rb.template +3 -2
  30. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  31. package/plugins/context-guard/README.ko.md +22 -14
  32. package/plugins/context-guard/README.md +24 -10
  33. package/plugins/context-guard/bin/context-guard +3 -0
  34. package/plugins/context-guard/bin/context-guard-audit +597 -12
  35. package/plugins/context-guard/bin/context-guard-bench +273 -8
  36. package/plugins/context-guard/bin/context-guard-compress +153 -1
  37. package/plugins/context-guard/bin/context-guard-cost +1870 -0
  38. package/plugins/context-guard/bin/context-guard-diet +677 -2
  39. package/plugins/context-guard/bin/context-guard-filter +446 -0
  40. package/plugins/context-guard/bin/context-guard-pack +1694 -2
  41. package/plugins/context-guard/bin/context-guard-setup +820 -29
  42. package/plugins/context-guard/bin/context-guard-trim-output +396 -45
  43. package/plugins/context-guard/brief/README.md +10 -3
  44. package/plugins/context-guard/skills/optimize/SKILL.md +5 -2
  45. package/plugins/context-guard/skills/setup/SKILL.md +3 -1
@@ -45,8 +45,11 @@ TOKEN_TYPE_ALIASES = {
45
45
  COST_KEYS = ("total_cost_usd", "cost_usd", "costUSD")
46
46
  MODEL_KEYS = ("model", "model_id", "modelId")
47
47
  QUERY_SOURCE_KEYS = ("query_source", "querySource")
48
- FEASIBILITY_SCHEMA_VERSION = "contextguard.metric-feasibility.v1.1"
48
+ TIMESTAMP_KEYS = ("timestamp", "created_at", "createdAt", "time", "ts")
49
+ FEASIBILITY_SCHEMA_VERSION = "contextguard.metric-feasibility.v1.2"
49
50
  FEASIBILITY_PRODUCER = "context-guard-audit"
51
+ CACHE_DIAGNOSTICS_SCHEMA_VERSION = "contextguard.cache-diagnostics.v1"
52
+ CACHE_LAYOUT_ADVICE_SCHEMA_VERSION = "contextguard.cache-layout-advice.v1"
50
53
  MAX_ERROR_EXAMPLES = 20
51
54
  JSON_PARSE_RECURSION_LIMIT = 10_000
52
55
  READ_CHUNK_BYTES = 64 * 1024
@@ -177,8 +180,12 @@ class UsageSummary:
177
180
  by_tool: Counter[str] = field(default_factory=Counter)
178
181
  token_field_presence: Counter[str] = field(default_factory=Counter)
179
182
  cost_field_count: int = 0
183
+ cache_record_timestamps: list[_dt.datetime] = field(default_factory=list)
184
+ positive_cache_record_timestamps: list[_dt.datetime] = field(default_factory=list)
180
185
  prompt_cache_audit: PromptCacheAudit = field(default_factory=PromptCacheAudit)
181
186
  cache_friendliness_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
187
+ cache_diagnostics_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
188
+ cache_layout_advice_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
182
189
 
183
190
  @property
184
191
  def total_tokens(self) -> int:
@@ -295,6 +302,48 @@ def finite_nonnegative_number(value: Any, *, clamp_negative: bool) -> int | floa
295
302
  return None
296
303
 
297
304
 
305
+ def parse_timestamp_value(value: Any) -> _dt.datetime | None:
306
+ if isinstance(value, str):
307
+ text = value.strip()
308
+ if not text:
309
+ return None
310
+ try:
311
+ if text.endswith("Z"):
312
+ text = text[:-1] + "+00:00"
313
+ parsed = _dt.datetime.fromisoformat(text)
314
+ except ValueError:
315
+ return None
316
+ if parsed.tzinfo is None:
317
+ parsed = parsed.replace(tzinfo=_dt.timezone.utc)
318
+ return parsed.astimezone(_dt.timezone.utc)
319
+ metric = finite_nonnegative_number(value, clamp_negative=False)
320
+ if metric is None:
321
+ return None
322
+ seconds = float(metric) / 1000.0 if float(metric) > 10_000_000_000 else float(metric)
323
+ try:
324
+ return _dt.datetime.fromtimestamp(seconds, tz=_dt.timezone.utc)
325
+ except (OverflowError, OSError, ValueError):
326
+ return None
327
+
328
+
329
+ def record_timestamp(root: Any) -> _dt.datetime | None:
330
+ candidates: list[Any] = []
331
+ if isinstance(root, dict):
332
+ for key in TIMESTAMP_KEYS:
333
+ if key in root:
334
+ candidates.append(root.get(key))
335
+ message = root.get("message")
336
+ if isinstance(message, dict):
337
+ for key in TIMESTAMP_KEYS:
338
+ if key in message:
339
+ candidates.append(message.get(key))
340
+ for candidate in candidates:
341
+ parsed = parse_timestamp_value(candidate)
342
+ if parsed is not None:
343
+ return parsed
344
+ return None
345
+
346
+
298
347
  def normalize_token_bucket(raw: str) -> str:
299
348
  return TOKEN_TYPE_ALIASES.get(raw, raw)
300
349
 
@@ -667,11 +716,15 @@ def add_usage(
667
716
  ) -> RecordUsage:
668
717
  root_model = None
669
718
  root_query_source = None
719
+ parsed_timestamp = None
670
720
  if isinstance(root, dict):
671
721
  root_model = first_string(root, MODEL_KEYS)
672
722
  root_query_source = first_string(root, QUERY_SOURCE_KEYS)
723
+ parsed_timestamp = record_timestamp(root)
673
724
 
674
725
  record = RecordUsage()
726
+ cache_telemetry_present = False
727
+ positive_cache_telemetry_present = False
675
728
  summary.prompt_cache_audit.observe(root)
676
729
  for d in walk(root):
677
730
  local_tokens: Counter[str] = Counter()
@@ -695,6 +748,10 @@ def add_usage(
695
748
 
696
749
  for bucket in present_buckets:
697
750
  summary.token_field_presence[bucket] += 1
751
+ if "cache_read" in present_buckets or "cache_creation" in present_buckets:
752
+ cache_telemetry_present = True
753
+ if local_tokens.get("cache_read", 0) > 0 or local_tokens.get("cache_creation", 0) > 0:
754
+ positive_cache_telemetry_present = True
698
755
 
699
756
  if local_tokens:
700
757
  summary.tokens.update(local_tokens)
@@ -713,6 +770,10 @@ def add_usage(
713
770
  record.cost_usd += cost
714
771
  summary.cost_field_count += 1
715
772
  break
773
+ if parsed_timestamp is not None and cache_telemetry_present:
774
+ summary.cache_record_timestamps.append(parsed_timestamp)
775
+ if parsed_timestamp is not None and positive_cache_telemetry_present:
776
+ summary.positive_cache_record_timestamps.append(parsed_timestamp)
716
777
  commands, tools = collect_record_hints(root, show_commands=show_commands)
717
778
  record.commands = commands
718
779
  record.tools = tools
@@ -980,6 +1041,7 @@ def segment_position_stats(samples: list[PromptSegmentSample], attr: str, window
980
1041
  "stability": stability,
981
1042
  "volatile_share": 1.0 - stability,
982
1043
  "unique_hashes": len(counts),
1044
+ "sample_count": len(values),
983
1045
  })
984
1046
  return stats
985
1047
 
@@ -1143,6 +1205,417 @@ def cache_friendliness_for_summary(summary: UsageSummary) -> dict[str, Any]:
1143
1205
  return summary.cache_friendliness_cache
1144
1206
 
1145
1207
 
1208
+ def _cache_diagnostic_confidence(*, skipped: bool, samples: bool, has_cache: bool) -> str:
1209
+ if skipped:
1210
+ return "partial"
1211
+ if samples or has_cache:
1212
+ return "hypothesis"
1213
+ return "unavailable"
1214
+
1215
+
1216
+ def build_ttl_diagnostics(summary: UsageSummary, *, has_cache_any: bool, skipped: bool) -> dict[str, Any]:
1217
+ timestamped_cache_record_count = len(summary.cache_record_timestamps)
1218
+ timestamps = sorted(summary.positive_cache_record_timestamps)
1219
+ caveats = [
1220
+ "Timestamped cache telemetry records do not prove exact provider cache-prefix identity or provider cache TTL state.",
1221
+ "5-minute versus 1-hour TTL guidance is a local hypothesis unless corroborated with provider telemetry and repeated stable prefixes.",
1222
+ ]
1223
+ if len(timestamps) < 2:
1224
+ return {
1225
+ "status": "unavailable",
1226
+ "evidence": EVIDENCE_UNAVAILABLE,
1227
+ "confidence": "unavailable" if not skipped else "partial",
1228
+ "timestamped_cache_record_count": timestamped_cache_record_count,
1229
+ "positive_timestamped_cache_record_count": len(timestamps),
1230
+ "timestamped_cache_record_span_seconds": None,
1231
+ "candidate": None,
1232
+ "reason": (
1233
+ "Fewer than two positive timestamped cache telemetry records were observed, so TTL reuse intervals cannot be inferred."
1234
+ ),
1235
+ "interval_basis": "positive_timestamped_cache_records",
1236
+ "caveats": caveats,
1237
+ }
1238
+ interval = max(0, int((timestamps[-1] - timestamps[0]).total_seconds()))
1239
+ candidate = "within-5m" if interval <= 5 * 60 else ("between-5m-and-1h" if interval <= 60 * 60 else "beyond-1h")
1240
+ return {
1241
+ "status": "hypothesis" if has_cache_any else "unavailable",
1242
+ "evidence": EVIDENCE_INFERRED if has_cache_any else EVIDENCE_UNAVAILABLE,
1243
+ "confidence": "partial" if skipped else "hypothesis",
1244
+ "timestamped_cache_record_count": timestamped_cache_record_count,
1245
+ "positive_timestamped_cache_record_count": len(timestamps),
1246
+ "timestamped_cache_record_span_seconds": interval,
1247
+ "candidate": candidate,
1248
+ "reason": (
1249
+ "Positive timestamped cache telemetry records bound the local cache-observation span, but exact provider cache TTL reuse remains a hypothesis."
1250
+ ),
1251
+ "interval_basis": "positive_timestamped_cache_records",
1252
+ "caveats": caveats,
1253
+ }
1254
+
1255
+
1256
+ def build_cache_diagnostics(summary: UsageSummary) -> dict[str, Any]:
1257
+ if summary.cache_diagnostics_cache is not None:
1258
+ return summary.cache_diagnostics_cache
1259
+
1260
+ availability = build_metric_availability(summary)
1261
+ cache_availability = availability["cache"]
1262
+ cache_friendliness = cache_friendliness_for_summary(summary)
1263
+ skipped = bool(
1264
+ summary.skipped_files
1265
+ or summary.skipped_records
1266
+ or summary.parse_errors
1267
+ or cache_friendliness.get("skipped_evidence")
1268
+ )
1269
+ has_cache_read = summary.token_field_presence.get("cache_read", 0) > 0
1270
+ has_cache_creation = summary.token_field_presence.get("cache_creation", 0) > 0
1271
+ has_cache_any = has_cache_read or has_cache_creation
1272
+ cache_read = summary.tokens.get("cache_read", 0)
1273
+ cache_creation = summary.tokens.get("cache_creation", 0)
1274
+ samples = summary.prompt_cache_audit.samples
1275
+ prefix_stats = segment_position_stats(samples, "prefix_hashes", PROMPT_AUDIT_PREFIX_SEGMENTS) if samples else []
1276
+ confidence = _cache_diagnostic_confidence(skipped=skipped, samples=bool(samples), has_cache=has_cache_any)
1277
+
1278
+ stable_prefix_candidates: list[dict[str, Any]] = []
1279
+ for stat_item in sorted(prefix_stats, key=lambda item: (-item["stability"], item["position"]))[:PROMPT_AUDIT_PREFIX_SEGMENTS]:
1280
+ if stat_item["stability"] < 0.66:
1281
+ continue
1282
+ stable_prefix_candidates.append({
1283
+ "position": stat_item["position"],
1284
+ "stability": round(float(stat_item["stability"]), 4),
1285
+ "volatile_share": round(float(stat_item["volatile_share"]), 4),
1286
+ "unique_hashes": stat_item["unique_hashes"],
1287
+ "sample_count": stat_item["sample_count"],
1288
+ "evidence": EVIDENCE_INFERRED,
1289
+ "confidence": "partial" if cache_friendliness.get("confidence") == "partial" else "hypothesis",
1290
+ "action": "Keep stable instructions, policies, and reusable context before run-specific evidence.",
1291
+ })
1292
+
1293
+ dynamic_prefix_breakers: list[dict[str, Any]] = []
1294
+ breaker_trigger = "prefix_position"
1295
+ for finding in cache_friendliness.get("findings", []):
1296
+ if isinstance(finding, dict) and finding.get("id") == "volatile-content-near-prefix":
1297
+ evidence = finding.get("evidence") if isinstance(finding.get("evidence"), dict) else {}
1298
+ breaker_trigger = str(evidence.get("trigger") or breaker_trigger)
1299
+ break
1300
+ for stat_item in sorted(prefix_stats, key=lambda item: (-item["volatile_share"], item["position"])):
1301
+ if stat_item["volatile_share"] < 0.34:
1302
+ continue
1303
+ dynamic_prefix_breakers.append({
1304
+ "position": stat_item["position"],
1305
+ "trigger": breaker_trigger,
1306
+ "volatile_share": round(float(stat_item["volatile_share"]), 4),
1307
+ "stability": round(float(stat_item["stability"]), 4),
1308
+ "unique_hashes": stat_item["unique_hashes"],
1309
+ "sample_count": stat_item["sample_count"],
1310
+ "evidence": EVIDENCE_INFERRED,
1311
+ "confidence": "partial" if cache_friendliness.get("confidence") == "partial" else "hypothesis",
1312
+ "heuristic": True,
1313
+ "action": "Move diffs, logs, timestamps, and command output after stable reusable prompt prefixes.",
1314
+ })
1315
+ dynamic_prefix_breakers = dynamic_prefix_breakers[:PROMPT_AUDIT_MAX_FINDINGS]
1316
+
1317
+ hypotheses: list[dict[str, Any]] = []
1318
+ if not has_cache_any:
1319
+ hypotheses.append({
1320
+ "id": "cache-fields-missing",
1321
+ "evidence": EVIDENCE_UNAVAILABLE,
1322
+ "confidence": "unavailable" if not skipped else "partial",
1323
+ "reason": "No cache_read/cache_creation transcript fields were observed.",
1324
+ "action": "Hide cache-read UI or label cache telemetry as missing for this scan.",
1325
+ })
1326
+ if has_cache_creation and cache_creation > 0 and (not has_cache_read or cache_read == 0):
1327
+ hypotheses.append({
1328
+ "id": "cache-cold-or-prefix-changed",
1329
+ "evidence": EVIDENCE_INFERRED,
1330
+ "confidence": "hypothesis",
1331
+ "reason": "Cache creation tokens were observed without corresponding cache read tokens.",
1332
+ "action": "Check whether stable instructions changed or whether the session was cache-cold.",
1333
+ })
1334
+ if has_cache_creation and cache_creation >= 10_000 and cache_read > 0 and summary.cache_amortization < 0.5:
1335
+ hypotheses.append({
1336
+ "id": "cache-read-low-vs-write",
1337
+ "evidence": EVIDENCE_INFERRED,
1338
+ "confidence": "hypothesis",
1339
+ "reason": "Cache reads are small relative to observed cache writes.",
1340
+ "action": "Keep reusable prompt prefixes stable across turns before changing large context blocks.",
1341
+ })
1342
+ if dynamic_prefix_breakers:
1343
+ hypotheses.append({
1344
+ "id": "volatile-prefix-breakers",
1345
+ "evidence": EVIDENCE_INFERRED,
1346
+ "confidence": dynamic_prefix_breakers[0]["confidence"],
1347
+ "reason": "Redacted prompt segment hashes show volatile content near the prefix window.",
1348
+ "action": dynamic_prefix_breakers[0]["action"],
1349
+ })
1350
+ if skipped:
1351
+ hypotheses.append({
1352
+ "id": "partial-transcript-scan",
1353
+ "evidence": EVIDENCE_INFERRED,
1354
+ "confidence": "partial",
1355
+ "reason": "Some transcript files, records, or prompt structures were skipped/capped.",
1356
+ "action": "Rerun against narrower transcript paths or higher safe scan limits before making decisions.",
1357
+ })
1358
+
1359
+ ttl = build_ttl_diagnostics(summary, has_cache_any=has_cache_any, skipped=skipped)
1360
+ headroom = build_headroom_availability(summary)
1361
+ headroom_diagnostics = {
1362
+ **headroom,
1363
+ "historical_total_tokens_are_not_headroom": True,
1364
+ "required_observation": "live_statusline_snapshot",
1365
+ }
1366
+ status = "missing"
1367
+ if has_cache_any or samples:
1368
+ status = "partial" if skipped or cache_friendliness.get("status") == "partial" else "available"
1369
+ elif skipped:
1370
+ status = "partial"
1371
+
1372
+ diagnostics = {
1373
+ "schema_version": CACHE_DIAGNOSTICS_SCHEMA_VERSION,
1374
+ "status": status,
1375
+ "confidence": confidence,
1376
+ "evidence": EVIDENCE_INFERRED if (has_cache_any or samples) else EVIDENCE_UNAVAILABLE,
1377
+ "heuristic": True,
1378
+ "observations": {
1379
+ "cache_fields": cache_availability,
1380
+ "cache_read_tokens": cache_read,
1381
+ "cache_creation_tokens": cache_creation,
1382
+ },
1383
+ "derived_ratios": cache_availability["derived"],
1384
+ "stable_prefix_candidates": stable_prefix_candidates,
1385
+ "dynamic_prefix_breakers": dynamic_prefix_breakers,
1386
+ "cache_miss_hypotheses": hypotheses[:PROMPT_AUDIT_MAX_FINDINGS],
1387
+ "ttl_diagnostics": ttl,
1388
+ "headroom_diagnostics": headroom_diagnostics,
1389
+ "caveats": [
1390
+ "Cache diagnostics are local transcript heuristics and do not prove exact provider cache-prefix state.",
1391
+ "Provider cache read/write fields are diagnostic telemetry and do not prove ContextGuard-caused token reduction.",
1392
+ "Stable-prefix and breaker positions come from bounded redacted segment hashes, not raw prompt text.",
1393
+ ],
1394
+ }
1395
+ summary.cache_diagnostics_cache = diagnostics
1396
+ return diagnostics
1397
+
1398
+
1399
+ def cache_diagnostics_for_summary(summary: UsageSummary) -> dict[str, Any]:
1400
+ return build_cache_diagnostics(summary)
1401
+
1402
+
1403
+ def _dominant_transcript(summary: UsageSummary) -> dict[str, Any] | None:
1404
+ if summary.total_tokens <= 0 or not summary.by_file:
1405
+ return None
1406
+ _label, tokens = summary.by_file.most_common(1)[0]
1407
+ share = tokens / summary.total_tokens if summary.total_tokens else 0.0
1408
+ return {
1409
+ "tokens": tokens,
1410
+ "share": round(share, 4),
1411
+ "dominates": share >= 0.20 and tokens >= 1_000,
1412
+ }
1413
+
1414
+
1415
+ def _first_dynamic_breaker(cache_diagnostics: dict[str, Any]) -> dict[str, Any] | None:
1416
+ breakers = cache_diagnostics.get("dynamic_prefix_breakers") or []
1417
+ if not breakers:
1418
+ return None
1419
+ first = breakers[0]
1420
+ return first if isinstance(first, dict) else None
1421
+
1422
+
1423
+ def build_cache_layout_advice(summary: UsageSummary) -> dict[str, Any]:
1424
+ if summary.cache_layout_advice_cache is not None:
1425
+ return summary.cache_layout_advice_cache
1426
+
1427
+ cache_friendliness = cache_friendliness_for_summary(summary)
1428
+ cache_diagnostics = cache_diagnostics_for_summary(summary)
1429
+ signals = cache_friendliness.get("signals") if isinstance(cache_friendliness.get("signals"), dict) else {}
1430
+ dynamic_breaker = _first_dynamic_breaker(cache_diagnostics)
1431
+ dominant = _dominant_transcript(summary)
1432
+ cache_creation = summary.tokens.get("cache_creation", 0)
1433
+ cache_read = summary.tokens.get("cache_read", 0)
1434
+ cache_fields = cache_diagnostics.get("observations", {}).get("cache_fields", {}) if isinstance(cache_diagnostics.get("observations"), dict) else {}
1435
+ cache_status = cache_fields.get("status") if isinstance(cache_fields, dict) else None
1436
+ stable_prefix_share = signals.get("stable_prefix_share")
1437
+ volatile_prefix_share = signals.get("volatile_prefix_share")
1438
+ volatile_tail_share = signals.get("volatile_tail_share")
1439
+ max_prefix_position = dynamic_breaker.get("position") if dynamic_breaker else None
1440
+ max_prefix_position_volatile_share = dynamic_breaker.get("volatile_share") if dynamic_breaker else signals.get("max_prefix_position_volatile_share")
1441
+
1442
+ status = "missing"
1443
+ confidence = "unavailable"
1444
+ observed_issue = "unknown"
1445
+ priority = "P2"
1446
+ hypothesized_causes: list[dict[str, Any]] = []
1447
+ corroborated_causes: list[dict[str, Any]] = []
1448
+ next_checks: list[dict[str, Any]] = []
1449
+ recommended_experiments: list[dict[str, Any]] = []
1450
+
1451
+ has_cache_any = bool(
1452
+ summary.token_field_presence.get("cache_read", 0)
1453
+ or summary.token_field_presence.get("cache_creation", 0)
1454
+ )
1455
+ has_prompt_samples = bool(summary.prompt_cache_audit.samples)
1456
+ if has_cache_any or has_prompt_samples:
1457
+ status = "partial" if (
1458
+ not has_prompt_samples
1459
+ or cache_friendliness.get("status") == "partial"
1460
+ or cache_diagnostics.get("status") == "partial"
1461
+ or summary.skipped_files
1462
+ or summary.skipped_records
1463
+ or summary.parse_errors
1464
+ ) else "available"
1465
+ confidence = "partial" if status == "partial" else "hypothesis"
1466
+
1467
+ volatile_prefix_breaker = bool(
1468
+ dynamic_breaker
1469
+ and cache_creation > 0
1470
+ and (max_prefix_position in {0, 1} or (max_prefix_position_volatile_share or 0) >= PROMPT_PREFIX_VOLATILE_THRESHOLD)
1471
+ )
1472
+ long_session_dominates = bool(dominant and dominant.get("dominates"))
1473
+
1474
+ if volatile_prefix_breaker:
1475
+ observed_issue = "volatile_prefix_breaker"
1476
+ priority = "P0" if cache_creation >= 50_000 and max_prefix_position in {0, 1} else "P1"
1477
+ hypothesized_causes.append({
1478
+ "id": "prefix-position-churn",
1479
+ "confidence": confidence,
1480
+ "evidence": EVIDENCE_INFERRED,
1481
+ "reason": (
1482
+ "A highly volatile redacted prompt segment appears in the early prefix window; "
1483
+ "this identifies a layout issue, not a confirmed source."
1484
+ ),
1485
+ "next_check": "Check whether startup context, generated evidence, or tool/MCP catalog changes are moving before stable policy.",
1486
+ })
1487
+ if cache_diagnostics.get("stable_prefix_candidates"):
1488
+ hypothesized_causes.append({
1489
+ "id": "evidence-before-policy",
1490
+ "confidence": confidence,
1491
+ "evidence": EVIDENCE_INFERRED,
1492
+ "reason": (
1493
+ "Stable reusable segments appear elsewhere while the early prefix churns; "
1494
+ "check whether logs, diffs, timestamps, or file evidence precede stable instructions."
1495
+ ),
1496
+ "next_check": "Keep stable policy/instructions first and move generated run evidence later.",
1497
+ })
1498
+ next_checks.append({
1499
+ "id": "inspect-startup-context-size",
1500
+ "confidence": "hypothesis",
1501
+ "command_templates": [
1502
+ "context-guard-diet scan <repo>",
1503
+ "context-guard-diet structural-waste <repo>",
1504
+ ],
1505
+ "evidence_required_for_corroboration": (
1506
+ "Large or duplicate CLAUDE.md/AGENTS.md/GEMINI.md findings from diet output."
1507
+ ),
1508
+ })
1509
+ elif long_session_dominates:
1510
+ observed_issue = "long_session_accumulation"
1511
+ priority = "P1"
1512
+ elif cache_creation >= 10_000 and cache_read > 0 and summary.cache_amortization < 0.5:
1513
+ observed_issue = "low_cache_reuse"
1514
+ priority = "P1"
1515
+ elif cache_status == "missing" or not has_cache_any:
1516
+ observed_issue = "missing_cache_fields"
1517
+ priority = "P2"
1518
+
1519
+ if long_session_dominates:
1520
+ recommended_experiments.append({
1521
+ "id": "split-long-sessions",
1522
+ "order": len(recommended_experiments) + 1,
1523
+ "priority": "P1",
1524
+ "effort": "low",
1525
+ "action": "Use /clear between unrelated tasks and /compact focus on changed files, failing tests, and remaining TODO during long work.",
1526
+ "expected_signal": "Cache creation per comparable task decreases and one transcript no longer dominates observed tokens.",
1527
+ "verification": "Re-run context-guard-audit on a comparable window and compare cache_creation, cache_amortization, and top transcript share.",
1528
+ "evidence": dominant or {},
1529
+ })
1530
+ if volatile_prefix_breaker:
1531
+ recommended_experiments.append({
1532
+ "id": "stabilize-cache-prefix",
1533
+ "order": len(recommended_experiments) + 1,
1534
+ "priority": priority,
1535
+ "effort": "medium",
1536
+ "action": "Keep stable reusable instructions/policy before volatile logs, diffs, timestamps, and generated file evidence.",
1537
+ "expected_signal": "Stable prefix share rises and volatile prefix share falls on matched audit windows.",
1538
+ "verification": "Re-run context-guard-audit --json --recommend and compare cache_layout_advice plus cache_friendliness signals.",
1539
+ "evidence": {
1540
+ "dynamic_prefix_breaker_position": max_prefix_position,
1541
+ "dynamic_prefix_breaker_volatile_share": max_prefix_position_volatile_share,
1542
+ },
1543
+ })
1544
+ recommended_experiments.append({
1545
+ "id": "run-context-diet-checks",
1546
+ "order": len(recommended_experiments) + 1,
1547
+ "priority": "P1",
1548
+ "effort": "low",
1549
+ "action": "Run the generated diet command templates and treat any large/duplicate context-file findings as corroborating evidence before editing instructions.",
1550
+ "expected_signal": "Diet output identifies or rules out oversized/duplicated startup context as a contributor.",
1551
+ "verification": "Record diet JSON separately; do not convert prefix-position evidence alone into a confirmed startup-context cause.",
1552
+ "command_templates": [
1553
+ "context-guard-diet scan <repo> --json > diet.json",
1554
+ "context-guard-diet structural-waste <repo> --json > structural-waste.json",
1555
+ ],
1556
+ })
1557
+ if cache_creation >= 50_000 and summary.cache_amortization_defined and 1.0 <= summary.cache_amortization < 5.0:
1558
+ recommended_experiments.append({
1559
+ "id": "defer-longer-ttl-until-prefix-stable" if volatile_prefix_breaker else "evaluate-longer-ttl-after-stability-check",
1560
+ "order": len(recommended_experiments) + 1,
1561
+ "priority": "P2",
1562
+ "effort": "medium",
1563
+ "action": "Treat longer TTL as secondary; first corroborate stable prefix reuse and current provider TTL/pricing behavior.",
1564
+ "expected_signal": "TTL evaluation happens only after prefix volatility is reduced or ruled out.",
1565
+ "verification": "Use timestamped cache telemetry and provider-measured billing/cost evidence; historical token totals alone are insufficient.",
1566
+ })
1567
+ if not recommended_experiments and status == "partial":
1568
+ next_checks.append({
1569
+ "id": "rerun-narrower-audit",
1570
+ "confidence": "partial",
1571
+ "command_templates": ["context-guard-audit <transcript-or-project-dir> --json --recommend"],
1572
+ "evidence_required_for_corroboration": "Enough uncapped prompt/cache records to classify prefix layout.",
1573
+ })
1574
+ if not recommended_experiments and observed_issue == "missing_cache_fields":
1575
+ next_checks.append({
1576
+ "id": "collect-cache-telemetry",
1577
+ "confidence": "unavailable",
1578
+ "command_templates": ["context-guard-audit ~/.claude/projects --json --recommend"],
1579
+ "evidence_required_for_corroboration": "Transcript records with cache_read/cache_creation fields.",
1580
+ })
1581
+
1582
+ advice = {
1583
+ "schema_version": CACHE_LAYOUT_ADVICE_SCHEMA_VERSION,
1584
+ "status": status,
1585
+ "confidence": confidence,
1586
+ "heuristic": True,
1587
+ "observed_issue": observed_issue,
1588
+ "priority": priority,
1589
+ "observed_summary": {
1590
+ "cache_creation_tokens": cache_creation,
1591
+ "cache_read_tokens": cache_read,
1592
+ "cache_amortization": round(summary.cache_amortization, 4) if summary.cache_amortization_defined else None,
1593
+ "stable_prefix_share": stable_prefix_share,
1594
+ "volatile_prefix_share": volatile_prefix_share,
1595
+ "volatile_tail_share": volatile_tail_share,
1596
+ "max_prefix_position": max_prefix_position,
1597
+ "max_prefix_position_volatile_share": max_prefix_position_volatile_share,
1598
+ "dominant_transcript_share": dominant.get("share") if dominant else None,
1599
+ },
1600
+ "hypothesized_causes": hypothesized_causes,
1601
+ "corroborated_causes": corroborated_causes,
1602
+ "next_checks": next_checks,
1603
+ "recommended_experiments": recommended_experiments,
1604
+ "caveats": [
1605
+ "Cache layout advice is a local transcript heuristic, not billing authority or provider-cache proof.",
1606
+ "Observed issues come from cache fields and redacted segment statistics; causes remain hypotheses until corroborated by diet/structural evidence.",
1607
+ "Generated command templates use placeholders and must not be treated as observed user commands or paths.",
1608
+ "Use matched before/after audits before making token or cost savings claims.",
1609
+ ],
1610
+ }
1611
+ summary.cache_layout_advice_cache = advice
1612
+ return advice
1613
+
1614
+
1615
+ def cache_layout_advice_for_summary(summary: UsageSummary) -> dict[str, Any]:
1616
+ return build_cache_layout_advice(summary)
1617
+
1618
+
1146
1619
  def build_metric_caveats(summary: UsageSummary) -> list[str]:
1147
1620
  caveats = [
1148
1621
  "Values are observed from local Claude Code transcript JSON/JSONL fields and are not official billing records.",
@@ -1177,6 +1650,8 @@ def feasibility_json(
1177
1650
  stable_tokens = stable_token_counter(summary.tokens)
1178
1651
  stable_total_tokens = sum(stable_tokens.values())
1179
1652
  cache_friendliness = cache_friendliness_for_summary(summary)
1653
+ cache_diagnostics = cache_diagnostics_for_summary(summary)
1654
+ cache_layout_advice = cache_layout_advice_for_summary(summary)
1180
1655
  return {
1181
1656
  "schema_version": FEASIBILITY_SCHEMA_VERSION,
1182
1657
  "producer": FEASIBILITY_PRODUCER,
@@ -1195,6 +1670,8 @@ def feasibility_json(
1195
1670
  "context_availability",
1196
1671
  "headroom_availability",
1197
1672
  "cache_friendliness",
1673
+ "cache_diagnostics",
1674
+ "cache_layout_advice",
1198
1675
  "totals",
1199
1676
  ],
1200
1677
  "diagnostic_fields": ["summary"],
@@ -1222,6 +1699,8 @@ def feasibility_json(
1222
1699
  "context_availability": availability["context"],
1223
1700
  "headroom_availability": availability["headroom"],
1224
1701
  "cache_friendliness": cache_friendliness,
1702
+ "cache_diagnostics": cache_diagnostics,
1703
+ "cache_layout_advice": cache_layout_advice,
1225
1704
  "totals": {
1226
1705
  "total_tokens": stable_total_tokens,
1227
1706
  "tokens": stable_tokens,
@@ -1272,6 +1751,37 @@ def build_recommendations(summary: UsageSummary, top: int) -> list[dict[str, Any
1272
1751
  output_ratio = output_tokens / total
1273
1752
  input_ratio = input_tokens / total
1274
1753
  cache_friendliness = cache_friendliness_for_summary(summary)
1754
+ cache_diagnostics = cache_diagnostics_for_summary(summary)
1755
+ cache_layout_advice = cache_layout_advice_for_summary(summary)
1756
+ if cache_layout_advice.get("observed_issue") == "volatile_prefix_breaker":
1757
+ evidence = {
1758
+ "observed_issue": cache_layout_advice.get("observed_issue"),
1759
+ "priority": cache_layout_advice.get("priority"),
1760
+ "confidence": cache_layout_advice.get("confidence"),
1761
+ "cache_creation_tokens": cache_creation,
1762
+ "cache_read_tokens": cache_read,
1763
+ }
1764
+ observed_summary = cache_layout_advice.get("observed_summary")
1765
+ if isinstance(observed_summary, dict):
1766
+ for key in ("max_prefix_position", "max_prefix_position_volatile_share", "stable_prefix_share", "volatile_prefix_share"):
1767
+ evidence[key] = observed_summary.get(key)
1768
+ rec = recommendation(
1769
+ "prioritize-cache-prefix-stabilization",
1770
+ "Prioritize cache-prefix stabilization before TTL or output trimming",
1771
+ (
1772
+ "Cache creation remains material and redacted segment statistics show a volatile early prefix; "
1773
+ "this is an experiment-prioritization signal, not a confirmed root cause."
1774
+ ),
1775
+ (
1776
+ "If one transcript dominates, split unrelated work into shorter sessions; then check startup/context "
1777
+ "size and keep stable policy before volatile logs, diffs, timestamps, and generated evidence."
1778
+ ),
1779
+ str(cache_layout_advice.get("priority") or "P1"),
1780
+ evidence,
1781
+ )
1782
+ rec["heuristic"] = True
1783
+ rec["confidence"] = cache_layout_advice.get("confidence")
1784
+ recs.append(rec)
1275
1785
  for finding in cache_friendliness.get("findings", []):
1276
1786
  if isinstance(finding, dict) and finding.get("id") == "volatile-content-near-prefix":
1277
1787
  evidence = dict(finding.get("evidence") or {})
@@ -1331,25 +1841,57 @@ def build_recommendations(summary: UsageSummary, top: int) -> list[dict[str, Any
1331
1841
  },
1332
1842
  ))
1333
1843
  if cache_creation >= 50_000 and 1.0 <= summary.cache_amortization < 5.0:
1844
+ ttl = cache_diagnostics.get("ttl_diagnostics") or {}
1845
+ ttl_status = str(ttl.get("status") or "unavailable")
1846
+ ttl_confidence = str(ttl.get("confidence") or "unavailable")
1847
+ ttl_candidate = ttl.get("candidate")
1848
+ ttl_span = ttl.get("timestamped_cache_record_span_seconds")
1849
+ if ttl_status == "hypothesis" and ttl_candidate in {"between-5m-and-1h", "beyond-1h"}:
1850
+ ttl_reason = (
1851
+ f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
1852
+ f"{cache_creation} write tokens; timestamped cache telemetry spans {ttl_span} seconds "
1853
+ f"({ttl_candidate})."
1854
+ )
1855
+ ttl_action = (
1856
+ "Evaluate a longer provider prompt-cache TTL only after confirming the same stable prefix "
1857
+ "pattern in representative sessions and rechecking current provider TTL/pricing documentation."
1858
+ )
1859
+ elif ttl_status == "hypothesis":
1860
+ ttl_reason = (
1861
+ f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
1862
+ f"{cache_creation} write tokens, but timestamped cache telemetry currently points to {ttl_candidate}."
1863
+ )
1864
+ ttl_action = (
1865
+ "Keep collecting timestamped cache read/write evidence; do not enable a longer TTL solely from this scan."
1866
+ )
1867
+ else:
1868
+ ttl_reason = (
1869
+ f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
1870
+ f"{cache_creation} write tokens, but TTL diagnostics are {ttl_status} because this scan lacks "
1871
+ "at least two timestamped cache telemetry records."
1872
+ )
1873
+ ttl_action = (
1874
+ "Collect or inspect timestamped cache read/write evidence before evaluating a longer provider "
1875
+ "prompt-cache TTL; historical token totals alone are not TTL evidence."
1876
+ )
1334
1877
  recs.append(recommendation(
1335
1878
  "evaluate-1h-ttl-cache",
1336
- "Cache writes are large; evaluate the 1h TTL cache beta",
1337
- (
1338
- f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
1339
- f"{cache_creation} write tokens; absolute write cost is high and reuse is moderate. "
1340
- "This metric does not inspect timestamps, so confirm reuse spans >5min in a sample "
1341
- "session before enabling 1h TTL."
1342
- ),
1343
- (
1344
- "If sessions reuse the same prefix beyond the 5-minute default TTL, evaluate the 1h prompt cache "
1345
- "beta (write 2x, read 0.1x). It pays off when reuse spans the gap between two 5-min cache writes."
1346
- ),
1879
+ "Cache writes are large; validate TTL evidence before longer TTL",
1880
+ ttl_reason,
1881
+ ttl_action,
1347
1882
  "P2",
1348
1883
  {
1349
1884
  "cache_creation": cache_creation,
1350
1885
  "cache_read": cache_read,
1351
1886
  "cache_amortization": round(summary.cache_amortization, 4),
1352
1887
  "cache_hit_rate": round(summary.cache_hit_rate, 4),
1888
+ "ttl_status": ttl_status,
1889
+ "ttl_evidence": ttl.get("evidence") or EVIDENCE_UNAVAILABLE,
1890
+ "ttl_confidence": ttl_confidence,
1891
+ "ttl_candidate": ttl_candidate,
1892
+ "timestamped_cache_record_count": ttl.get("timestamped_cache_record_count"),
1893
+ "positive_timestamped_cache_record_count": ttl.get("positive_timestamped_cache_record_count"),
1894
+ "timestamped_cache_record_span_seconds": ttl_span,
1353
1895
  "heuristic": True,
1354
1896
  },
1355
1897
  ))
@@ -1462,6 +2004,8 @@ def summary_json(
1462
2004
  "top_commands": counter_json(summary.by_command, top),
1463
2005
  "top_tools": counter_json(summary.by_tool, top),
1464
2006
  "cache_friendliness": cache_friendliness_for_summary(summary),
2007
+ "cache_diagnostics": cache_diagnostics_for_summary(summary),
2008
+ "cache_layout_advice": cache_layout_advice_for_summary(summary),
1465
2009
  }
1466
2010
  if include_recommendations:
1467
2011
  data["recommendations"] = build_recommendations(summary, top)
@@ -1574,6 +2118,47 @@ def main() -> int:
1574
2118
  if isinstance(finding, dict):
1575
2119
  print(f" finding [{finding.get('severity')}] {finding.get('id')}: {finding.get('title')}")
1576
2120
 
2121
+ cache_diagnostics = cache_diagnostics_for_summary(summary)
2122
+ print("\nCache diagnostics")
2123
+ print(f" status {cache_diagnostics.get('status')}")
2124
+ print(f" confidence {cache_diagnostics.get('confidence')}")
2125
+ hypotheses = cache_diagnostics.get("cache_miss_hypotheses") or []
2126
+ if hypotheses:
2127
+ first = hypotheses[0]
2128
+ print(f" top_hypothesis {first.get('id')} ({first.get('confidence')})")
2129
+ stable_candidates = cache_diagnostics.get("stable_prefix_candidates") or []
2130
+ if stable_candidates:
2131
+ first = stable_candidates[0]
2132
+ print(f" stable_prefix_candidate position={first.get('position')} stability={first.get('stability')}")
2133
+ breakers = cache_diagnostics.get("dynamic_prefix_breakers") or []
2134
+ if breakers:
2135
+ first = breakers[0]
2136
+ print(f" dynamic_prefix_breaker position={first.get('position')} volatile_share={first.get('volatile_share')}")
2137
+ ttl = cache_diagnostics.get("ttl_diagnostics") or {}
2138
+ print(f" ttl_status {ttl.get('status')} ({ttl.get('confidence')})")
2139
+ headroom = cache_diagnostics.get("headroom_diagnostics") or {}
2140
+ print(f" headroom_status {headroom.get('status')} ({headroom.get('evidence')})")
2141
+
2142
+ cache_layout_advice = cache_layout_advice_for_summary(summary)
2143
+ if cache_layout_advice.get("status") != "missing" or cache_layout_advice.get("observed_issue") != "unknown":
2144
+ print("\nCache layout advice")
2145
+ print(f" status {cache_layout_advice.get('status')}")
2146
+ print(f" confidence {cache_layout_advice.get('confidence')}")
2147
+ print(f" observed_issue {cache_layout_advice.get('observed_issue')}")
2148
+ print(f" priority {cache_layout_advice.get('priority')}")
2149
+ experiments = cache_layout_advice.get("recommended_experiments") or []
2150
+ if experiments:
2151
+ first = experiments[0]
2152
+ print(f" first_experiment {first.get('id')} ({first.get('priority')})")
2153
+ print(f" experiment_action {first.get('action')}")
2154
+ checks = cache_layout_advice.get("next_checks") or []
2155
+ if checks:
2156
+ first = checks[0]
2157
+ print(f" next_check {first.get('id')}")
2158
+ templates = first.get("command_templates") or []
2159
+ if templates:
2160
+ print(f" command_template {templates[0]}")
2161
+
1577
2162
  model_totals = Counter({model: sum(tokens.values()) for model, tokens in summary.by_model.items()})
1578
2163
  print_counter("By model", model_totals, args.top)
1579
2164