@ictechgy/context-guard 0.4.3 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/README.ko.md +16 -3
  3. package/README.md +13 -3
  4. package/context-guard-kit/README.md +2 -2
  5. package/context-guard-kit/benchmark_runner.py +244 -6
  6. package/context-guard-kit/claude_transcript_cost_audit.py +443 -1
  7. package/docs/benchmark-fixtures/learned-compression-baseline-context-pack.prompt.example.md +19 -0
  8. package/docs/benchmark-fixtures/learned-compression-candidate-digest.prompt.example.md +21 -0
  9. package/docs/benchmark-fixtures/learned-compression.tasks.example.json +5 -1
  10. package/docs/benchmark-fixtures/output-transform-baseline-raw-output.prompt.example.md +20 -0
  11. package/docs/benchmark-fixtures/output-transform-digest-receipt.prompt.example.md +23 -0
  12. package/docs/benchmark-fixtures/output-transform.tasks.example.json +28 -0
  13. package/docs/benchmark-fixtures/output-transform.variants.example.json +10 -0
  14. package/docs/benchmark-fixtures/visual-ocr-cropped-ocr.prompt.example.md +22 -0
  15. package/docs/benchmark-fixtures/visual-ocr-full-visual.prompt.example.md +19 -0
  16. package/docs/benchmark-fixtures/visual-ocr.tasks.example.json +5 -1
  17. package/docs/benchmark-workflow-examples.md +6 -2
  18. package/docs/benchmark-workflows/self-hosted-metrics-ledger.example.jsonl +1 -0
  19. package/docs/cache-diagnostics-schema.md +25 -4
  20. package/docs/experimental-benchmark-fixtures.md +17 -6
  21. package/docs/mac-visibility-feasibility-schema.md +62 -0
  22. package/docs/mac-visibility-feasibility.example.json +130 -0
  23. package/package.json +5 -1
  24. package/packaging/homebrew/context-guard.rb.template +1 -1
  25. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  26. package/plugins/context-guard/README.ko.md +3 -3
  27. package/plugins/context-guard/README.md +3 -3
  28. package/plugins/context-guard/bin/context-guard-audit +443 -1
  29. package/plugins/context-guard/bin/context-guard-bench +244 -6
@@ -46,9 +46,11 @@ COST_KEYS = ("total_cost_usd", "cost_usd", "costUSD")
46
46
  MODEL_KEYS = ("model", "model_id", "modelId")
47
47
  QUERY_SOURCE_KEYS = ("query_source", "querySource")
48
48
  TIMESTAMP_KEYS = ("timestamp", "created_at", "createdAt", "time", "ts")
49
- FEASIBILITY_SCHEMA_VERSION = "contextguard.metric-feasibility.v1.2"
49
+ FEASIBILITY_SCHEMA_VERSION = "contextguard.metric-feasibility.v1.3"
50
+ MAC_VISIBILITY_SCHEMA_VERSION = "contextguard.mac-visibility.v1"
50
51
  FEASIBILITY_PRODUCER = "context-guard-audit"
51
52
  CACHE_DIAGNOSTICS_SCHEMA_VERSION = "contextguard.cache-diagnostics.v1"
53
+ CACHE_LAYOUT_ADVICE_SCHEMA_VERSION = "contextguard.cache-layout-advice.v1"
52
54
  MAX_ERROR_EXAMPLES = 20
53
55
  JSON_PARSE_RECURSION_LIMIT = 10_000
54
56
  READ_CHUNK_BYTES = 64 * 1024
@@ -184,6 +186,7 @@ class UsageSummary:
184
186
  prompt_cache_audit: PromptCacheAudit = field(default_factory=PromptCacheAudit)
185
187
  cache_friendliness_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
186
188
  cache_diagnostics_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
189
+ cache_layout_advice_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
187
190
 
188
191
  @property
189
192
  def total_tokens(self) -> int:
@@ -1398,6 +1401,222 @@ def cache_diagnostics_for_summary(summary: UsageSummary) -> dict[str, Any]:
1398
1401
  return build_cache_diagnostics(summary)
1399
1402
 
1400
1403
 
1404
+ def _dominant_transcript(summary: UsageSummary) -> dict[str, Any] | None:
1405
+ if summary.total_tokens <= 0 or not summary.by_file:
1406
+ return None
1407
+ _label, tokens = summary.by_file.most_common(1)[0]
1408
+ share = tokens / summary.total_tokens if summary.total_tokens else 0.0
1409
+ return {
1410
+ "tokens": tokens,
1411
+ "share": round(share, 4),
1412
+ "dominates": share >= 0.20 and tokens >= 1_000,
1413
+ }
1414
+
1415
+
1416
+ def _first_dynamic_breaker(cache_diagnostics: dict[str, Any]) -> dict[str, Any] | None:
1417
+ breakers = cache_diagnostics.get("dynamic_prefix_breakers") or []
1418
+ if not breakers:
1419
+ return None
1420
+ first = breakers[0]
1421
+ return first if isinstance(first, dict) else None
1422
+
1423
+
1424
+ def build_cache_layout_advice(summary: UsageSummary) -> dict[str, Any]:
1425
+ if summary.cache_layout_advice_cache is not None:
1426
+ return summary.cache_layout_advice_cache
1427
+
1428
+ cache_friendliness = cache_friendliness_for_summary(summary)
1429
+ cache_diagnostics = cache_diagnostics_for_summary(summary)
1430
+ signals = cache_friendliness.get("signals") if isinstance(cache_friendliness.get("signals"), dict) else {}
1431
+ dynamic_breaker = _first_dynamic_breaker(cache_diagnostics)
1432
+ dominant = _dominant_transcript(summary)
1433
+ cache_creation = summary.tokens.get("cache_creation", 0)
1434
+ cache_read = summary.tokens.get("cache_read", 0)
1435
+ cache_fields = cache_diagnostics.get("observations", {}).get("cache_fields", {}) if isinstance(cache_diagnostics.get("observations"), dict) else {}
1436
+ cache_status = cache_fields.get("status") if isinstance(cache_fields, dict) else None
1437
+ stable_prefix_share = signals.get("stable_prefix_share")
1438
+ volatile_prefix_share = signals.get("volatile_prefix_share")
1439
+ volatile_tail_share = signals.get("volatile_tail_share")
1440
+ max_prefix_position = dynamic_breaker.get("position") if dynamic_breaker else None
1441
+ max_prefix_position_volatile_share = dynamic_breaker.get("volatile_share") if dynamic_breaker else signals.get("max_prefix_position_volatile_share")
1442
+
1443
+ status = "missing"
1444
+ confidence = "unavailable"
1445
+ observed_issue = "unknown"
1446
+ priority = "P2"
1447
+ hypothesized_causes: list[dict[str, Any]] = []
1448
+ corroborated_causes: list[dict[str, Any]] = []
1449
+ next_checks: list[dict[str, Any]] = []
1450
+ recommended_experiments: list[dict[str, Any]] = []
1451
+
1452
+ has_cache_any = bool(
1453
+ summary.token_field_presence.get("cache_read", 0)
1454
+ or summary.token_field_presence.get("cache_creation", 0)
1455
+ )
1456
+ has_prompt_samples = bool(summary.prompt_cache_audit.samples)
1457
+ if has_cache_any or has_prompt_samples:
1458
+ status = "partial" if (
1459
+ not has_prompt_samples
1460
+ or cache_friendliness.get("status") == "partial"
1461
+ or cache_diagnostics.get("status") == "partial"
1462
+ or summary.skipped_files
1463
+ or summary.skipped_records
1464
+ or summary.parse_errors
1465
+ ) else "available"
1466
+ confidence = "partial" if status == "partial" else "hypothesis"
1467
+
1468
+ volatile_prefix_breaker = bool(
1469
+ dynamic_breaker
1470
+ and cache_creation > 0
1471
+ and (max_prefix_position in {0, 1} or (max_prefix_position_volatile_share or 0) >= PROMPT_PREFIX_VOLATILE_THRESHOLD)
1472
+ )
1473
+ long_session_dominates = bool(dominant and dominant.get("dominates"))
1474
+
1475
+ if volatile_prefix_breaker:
1476
+ observed_issue = "volatile_prefix_breaker"
1477
+ priority = "P0" if cache_creation >= 50_000 and max_prefix_position in {0, 1} else "P1"
1478
+ hypothesized_causes.append({
1479
+ "id": "prefix-position-churn",
1480
+ "confidence": confidence,
1481
+ "evidence": EVIDENCE_INFERRED,
1482
+ "reason": (
1483
+ "A highly volatile redacted prompt segment appears in the early prefix window; "
1484
+ "this identifies a layout issue, not a confirmed source."
1485
+ ),
1486
+ "next_check": "Check whether startup context, generated evidence, or tool/MCP catalog changes are moving before stable policy.",
1487
+ })
1488
+ if cache_diagnostics.get("stable_prefix_candidates"):
1489
+ hypothesized_causes.append({
1490
+ "id": "evidence-before-policy",
1491
+ "confidence": confidence,
1492
+ "evidence": EVIDENCE_INFERRED,
1493
+ "reason": (
1494
+ "Stable reusable segments appear elsewhere while the early prefix churns; "
1495
+ "check whether logs, diffs, timestamps, or file evidence precede stable instructions."
1496
+ ),
1497
+ "next_check": "Keep stable policy/instructions first and move generated run evidence later.",
1498
+ })
1499
+ next_checks.append({
1500
+ "id": "inspect-startup-context-size",
1501
+ "confidence": "hypothesis",
1502
+ "command_templates": [
1503
+ "context-guard-diet scan <repo>",
1504
+ "context-guard-diet structural-waste <repo>",
1505
+ ],
1506
+ "evidence_required_for_corroboration": (
1507
+ "Large or duplicate CLAUDE.md/AGENTS.md/GEMINI.md findings from diet output."
1508
+ ),
1509
+ })
1510
+ elif long_session_dominates:
1511
+ observed_issue = "long_session_accumulation"
1512
+ priority = "P1"
1513
+ elif cache_creation >= 10_000 and cache_read > 0 and summary.cache_amortization < 0.5:
1514
+ observed_issue = "low_cache_reuse"
1515
+ priority = "P1"
1516
+ elif cache_status == "missing" or not has_cache_any:
1517
+ observed_issue = "missing_cache_fields"
1518
+ priority = "P2"
1519
+
1520
+ if long_session_dominates:
1521
+ recommended_experiments.append({
1522
+ "id": "split-long-sessions",
1523
+ "order": len(recommended_experiments) + 1,
1524
+ "priority": "P1",
1525
+ "effort": "low",
1526
+ "action": "Use /clear between unrelated tasks and /compact focus on changed files, failing tests, and remaining TODO during long work.",
1527
+ "expected_signal": "Cache creation per comparable task decreases and one transcript no longer dominates observed tokens.",
1528
+ "verification": "Re-run context-guard-audit on a comparable window and compare cache_creation, cache_amortization, and top transcript share.",
1529
+ "evidence": dominant or {},
1530
+ })
1531
+ if volatile_prefix_breaker:
1532
+ recommended_experiments.append({
1533
+ "id": "stabilize-cache-prefix",
1534
+ "order": len(recommended_experiments) + 1,
1535
+ "priority": priority,
1536
+ "effort": "medium",
1537
+ "action": "Keep stable reusable instructions/policy before volatile logs, diffs, timestamps, and generated file evidence.",
1538
+ "expected_signal": "Stable prefix share rises and volatile prefix share falls on matched audit windows.",
1539
+ "verification": "Re-run context-guard-audit --json --recommend and compare cache_layout_advice plus cache_friendliness signals.",
1540
+ "evidence": {
1541
+ "dynamic_prefix_breaker_position": max_prefix_position,
1542
+ "dynamic_prefix_breaker_volatile_share": max_prefix_position_volatile_share,
1543
+ },
1544
+ })
1545
+ recommended_experiments.append({
1546
+ "id": "run-context-diet-checks",
1547
+ "order": len(recommended_experiments) + 1,
1548
+ "priority": "P1",
1549
+ "effort": "low",
1550
+ "action": "Run the generated diet command templates and treat any large/duplicate context-file findings as corroborating evidence before editing instructions.",
1551
+ "expected_signal": "Diet output identifies or rules out oversized/duplicated startup context as a contributor.",
1552
+ "verification": "Record diet JSON separately; do not convert prefix-position evidence alone into a confirmed startup-context cause.",
1553
+ "command_templates": [
1554
+ "context-guard-diet scan <repo> --json > diet.json",
1555
+ "context-guard-diet structural-waste <repo> --json > structural-waste.json",
1556
+ ],
1557
+ })
1558
+ if cache_creation >= 50_000 and summary.cache_amortization_defined and 1.0 <= summary.cache_amortization < 5.0:
1559
+ recommended_experiments.append({
1560
+ "id": "defer-longer-ttl-until-prefix-stable" if volatile_prefix_breaker else "evaluate-longer-ttl-after-stability-check",
1561
+ "order": len(recommended_experiments) + 1,
1562
+ "priority": "P2",
1563
+ "effort": "medium",
1564
+ "action": "Treat longer TTL as secondary; first corroborate stable prefix reuse and current provider TTL/pricing behavior.",
1565
+ "expected_signal": "TTL evaluation happens only after prefix volatility is reduced or ruled out.",
1566
+ "verification": "Use timestamped cache telemetry and provider-measured billing/cost evidence; historical token totals alone are insufficient.",
1567
+ })
1568
+ if not recommended_experiments and status == "partial":
1569
+ next_checks.append({
1570
+ "id": "rerun-narrower-audit",
1571
+ "confidence": "partial",
1572
+ "command_templates": ["context-guard-audit <transcript-or-project-dir> --json --recommend"],
1573
+ "evidence_required_for_corroboration": "Enough uncapped prompt/cache records to classify prefix layout.",
1574
+ })
1575
+ if not recommended_experiments and observed_issue == "missing_cache_fields":
1576
+ next_checks.append({
1577
+ "id": "collect-cache-telemetry",
1578
+ "confidence": "unavailable",
1579
+ "command_templates": ["context-guard-audit ~/.claude/projects --json --recommend"],
1580
+ "evidence_required_for_corroboration": "Transcript records with cache_read/cache_creation fields.",
1581
+ })
1582
+
1583
+ advice = {
1584
+ "schema_version": CACHE_LAYOUT_ADVICE_SCHEMA_VERSION,
1585
+ "status": status,
1586
+ "confidence": confidence,
1587
+ "heuristic": True,
1588
+ "observed_issue": observed_issue,
1589
+ "priority": priority,
1590
+ "observed_summary": {
1591
+ "cache_creation_tokens": cache_creation,
1592
+ "cache_read_tokens": cache_read,
1593
+ "cache_amortization": round(summary.cache_amortization, 4) if summary.cache_amortization_defined else None,
1594
+ "stable_prefix_share": stable_prefix_share,
1595
+ "volatile_prefix_share": volatile_prefix_share,
1596
+ "volatile_tail_share": volatile_tail_share,
1597
+ "max_prefix_position": max_prefix_position,
1598
+ "max_prefix_position_volatile_share": max_prefix_position_volatile_share,
1599
+ "dominant_transcript_share": dominant.get("share") if dominant else None,
1600
+ },
1601
+ "hypothesized_causes": hypothesized_causes,
1602
+ "corroborated_causes": corroborated_causes,
1603
+ "next_checks": next_checks,
1604
+ "recommended_experiments": recommended_experiments,
1605
+ "caveats": [
1606
+ "Cache layout advice is a local transcript heuristic, not billing authority or provider-cache proof.",
1607
+ "Observed issues come from cache fields and redacted segment statistics; causes remain hypotheses until corroborated by diet/structural evidence.",
1608
+ "Generated command templates use placeholders and must not be treated as observed user commands or paths.",
1609
+ "Use matched before/after audits before making token or cost savings claims.",
1610
+ ],
1611
+ }
1612
+ summary.cache_layout_advice_cache = advice
1613
+ return advice
1614
+
1615
+
1616
+ def cache_layout_advice_for_summary(summary: UsageSummary) -> dict[str, Any]:
1617
+ return build_cache_layout_advice(summary)
1618
+
1619
+
1401
1620
  def build_metric_caveats(summary: UsageSummary) -> list[str]:
1402
1621
  caveats = [
1403
1622
  "Values are observed from local Claude Code transcript JSON/JSONL fields and are not official billing records.",
@@ -1417,6 +1636,168 @@ def build_metric_caveats(summary: UsageSummary) -> list[str]:
1417
1636
  return caveats
1418
1637
 
1419
1638
 
1639
+ def _mac_card(
1640
+ card_id: str,
1641
+ title: str,
1642
+ status: str,
1643
+ binding_paths: list[str],
1644
+ *,
1645
+ required_observation: str | None = None,
1646
+ ) -> dict[str, Any]:
1647
+ card: dict[str, Any] = {
1648
+ "id": card_id,
1649
+ "title": title,
1650
+ "status": status,
1651
+ "binding_paths": binding_paths,
1652
+ }
1653
+ if required_observation:
1654
+ card["required_observation"] = required_observation
1655
+ return card
1656
+
1657
+
1658
+ def build_mac_visibility_contract(
1659
+ *,
1660
+ availability: dict[str, Any],
1661
+ integrity: dict[str, Any],
1662
+ cache_layout_advice: dict[str, Any],
1663
+ ) -> dict[str, Any]:
1664
+ """Build the pre-GUI macOS visibility binding contract.
1665
+
1666
+ This is intentionally a thin index over already-emitted stable feasibility
1667
+ fields. It does not recompute metrics, read diagnostic summary data, or infer
1668
+ live context/headroom from historical transcript totals.
1669
+ """
1670
+ token_status = str((availability.get("tokens") or {}).get("status", "missing"))
1671
+ scan_status = str(integrity.get("status", "partial"))
1672
+ if token_status == "available" and scan_status == "complete":
1673
+ readiness_status = "ready"
1674
+ readiness_reason = "Transcript token totals are available and the scan completed within configured limits."
1675
+ elif token_status in {"available", "partial"}:
1676
+ readiness_status = "partial"
1677
+ readiness_reason = "Some stable fields can be shown, but scan integrity or metric availability is partial."
1678
+ else:
1679
+ readiness_status = "missing"
1680
+ readiness_reason = "Token totals are missing from the transcript scan; show setup or unavailable state."
1681
+
1682
+ context_status = str((availability.get("context") or {}).get("status", "missing"))
1683
+ headroom_status = str((availability.get("headroom") or {}).get("status", "missing"))
1684
+ cache_status = str((availability.get("cache") or {}).get("status", "missing"))
1685
+ cost_status = str((availability.get("cost") or {}).get("status", "missing"))
1686
+ advice_status = str(cache_layout_advice.get("status", "missing"))
1687
+
1688
+ missing_live_observations: list[dict[str, Any]] = []
1689
+ if context_status == "missing":
1690
+ missing_live_observations.append({
1691
+ "id": "live_context_window",
1692
+ "required_observation": "live_statusline_snapshot",
1693
+ "affects": ["context_availability", "metric_availability.context"],
1694
+ "reason": "Historical transcript scans do not include live Claude Code context_window data.",
1695
+ })
1696
+ if headroom_status == "missing":
1697
+ missing_live_observations.append({
1698
+ "id": "live_headroom",
1699
+ "required_observation": "live_statusline_snapshot",
1700
+ "affects": ["headroom_availability", "cache_diagnostics.headroom_diagnostics"],
1701
+ "reason": "Historical transcript totals are not remaining-token or live headroom observations.",
1702
+ })
1703
+
1704
+ return {
1705
+ "schema_version": MAC_VISIBILITY_SCHEMA_VERSION,
1706
+ "surface_kind": "local_macos_visibility_contract",
1707
+ "readiness": {
1708
+ "status": readiness_status,
1709
+ "reason": readiness_reason,
1710
+ },
1711
+ "bind_to_top_level_fields": [
1712
+ "source_kind",
1713
+ "source_freshness",
1714
+ "scan_integrity",
1715
+ "metric_availability",
1716
+ "metric_caveats",
1717
+ "redaction_mode",
1718
+ "context_availability",
1719
+ "headroom_availability",
1720
+ "cache_friendliness",
1721
+ "cache_diagnostics",
1722
+ "cache_layout_advice",
1723
+ "totals",
1724
+ ],
1725
+ "diagnostic_only_fields": ["summary"],
1726
+ "primary_cards": [
1727
+ _mac_card(
1728
+ "source_freshness",
1729
+ "Source freshness",
1730
+ "available",
1731
+ ["source_kind", "source_freshness.status", "source_freshness.generated_at"],
1732
+ ),
1733
+ _mac_card(
1734
+ "scan_integrity",
1735
+ "Scan integrity",
1736
+ scan_status,
1737
+ [
1738
+ "scan_integrity.status",
1739
+ "scan_integrity.files_scanned",
1740
+ "scan_integrity.records_scanned",
1741
+ "scan_integrity.skipped_files",
1742
+ "scan_integrity.skipped_records",
1743
+ ],
1744
+ ),
1745
+ _mac_card(
1746
+ "token_totals",
1747
+ "Token totals",
1748
+ token_status,
1749
+ [
1750
+ "totals.total_tokens",
1751
+ "totals.tokens.input",
1752
+ "totals.tokens.output",
1753
+ "totals.tokens.cache_read",
1754
+ "totals.tokens.cache_creation",
1755
+ ],
1756
+ ),
1757
+ _mac_card(
1758
+ "cache_reuse",
1759
+ "Cache-read share and reuse ratio",
1760
+ cache_status,
1761
+ ["totals.cache_read_share", "totals.cache_reuse_ratio", "metric_availability.cache"],
1762
+ ),
1763
+ _mac_card(
1764
+ "observed_cost",
1765
+ "Observed transcript cost",
1766
+ cost_status,
1767
+ ["totals.cost_usd_observed", "metric_availability.cost"],
1768
+ ),
1769
+ _mac_card(
1770
+ "context_availability",
1771
+ "Context availability",
1772
+ context_status,
1773
+ ["context_availability", "metric_availability.context"],
1774
+ required_observation="live_statusline_snapshot" if context_status == "missing" else None,
1775
+ ),
1776
+ _mac_card(
1777
+ "headroom_availability",
1778
+ "Headroom availability",
1779
+ headroom_status,
1780
+ ["headroom_availability", "cache_diagnostics.headroom_diagnostics"],
1781
+ required_observation="live_statusline_snapshot" if headroom_status == "missing" else None,
1782
+ ),
1783
+ _mac_card(
1784
+ "cache_layout_advice",
1785
+ "Cache layout advice",
1786
+ advice_status,
1787
+ ["cache_layout_advice", "cache_friendliness", "cache_diagnostics.dynamic_prefix_breakers"],
1788
+ ),
1789
+ ],
1790
+ "missing_live_observations": missing_live_observations,
1791
+ "claim_boundaries": [
1792
+ "Local transcript observations are not invoice-grade billing records.",
1793
+ "Provider cache fields are telemetry, not ContextGuard-caused token reduction and do not prove provider cache hits.",
1794
+ "Historical transcript totals do not infer live context headroom or remaining tokens.",
1795
+ "This contract does not guarantee token or cost savings.",
1796
+ ],
1797
+ "redaction_required": True,
1798
+ }
1799
+
1800
+
1420
1801
  def feasibility_json(
1421
1802
  summary: UsageSummary,
1422
1803
  top: int = 15,
@@ -1433,6 +1814,12 @@ def feasibility_json(
1433
1814
  stable_total_tokens = sum(stable_tokens.values())
1434
1815
  cache_friendliness = cache_friendliness_for_summary(summary)
1435
1816
  cache_diagnostics = cache_diagnostics_for_summary(summary)
1817
+ cache_layout_advice = cache_layout_advice_for_summary(summary)
1818
+ mac_visibility = build_mac_visibility_contract(
1819
+ availability=availability,
1820
+ integrity=integrity,
1821
+ cache_layout_advice=cache_layout_advice,
1822
+ )
1436
1823
  return {
1437
1824
  "schema_version": FEASIBILITY_SCHEMA_VERSION,
1438
1825
  "producer": FEASIBILITY_PRODUCER,
@@ -1452,6 +1839,8 @@ def feasibility_json(
1452
1839
  "headroom_availability",
1453
1840
  "cache_friendliness",
1454
1841
  "cache_diagnostics",
1842
+ "cache_layout_advice",
1843
+ "mac_visibility",
1455
1844
  "totals",
1456
1845
  ],
1457
1846
  "diagnostic_fields": ["summary"],
@@ -1480,6 +1869,8 @@ def feasibility_json(
1480
1869
  "headroom_availability": availability["headroom"],
1481
1870
  "cache_friendliness": cache_friendliness,
1482
1871
  "cache_diagnostics": cache_diagnostics,
1872
+ "cache_layout_advice": cache_layout_advice,
1873
+ "mac_visibility": mac_visibility,
1483
1874
  "totals": {
1484
1875
  "total_tokens": stable_total_tokens,
1485
1876
  "tokens": stable_tokens,
@@ -1531,6 +1922,36 @@ def build_recommendations(summary: UsageSummary, top: int) -> list[dict[str, Any
1531
1922
  input_ratio = input_tokens / total
1532
1923
  cache_friendliness = cache_friendliness_for_summary(summary)
1533
1924
  cache_diagnostics = cache_diagnostics_for_summary(summary)
1925
+ cache_layout_advice = cache_layout_advice_for_summary(summary)
1926
+ if cache_layout_advice.get("observed_issue") == "volatile_prefix_breaker":
1927
+ evidence = {
1928
+ "observed_issue": cache_layout_advice.get("observed_issue"),
1929
+ "priority": cache_layout_advice.get("priority"),
1930
+ "confidence": cache_layout_advice.get("confidence"),
1931
+ "cache_creation_tokens": cache_creation,
1932
+ "cache_read_tokens": cache_read,
1933
+ }
1934
+ observed_summary = cache_layout_advice.get("observed_summary")
1935
+ if isinstance(observed_summary, dict):
1936
+ for key in ("max_prefix_position", "max_prefix_position_volatile_share", "stable_prefix_share", "volatile_prefix_share"):
1937
+ evidence[key] = observed_summary.get(key)
1938
+ rec = recommendation(
1939
+ "prioritize-cache-prefix-stabilization",
1940
+ "Prioritize cache-prefix stabilization before TTL or output trimming",
1941
+ (
1942
+ "Cache creation remains material and redacted segment statistics show a volatile early prefix; "
1943
+ "this is an experiment-prioritization signal, not a confirmed root cause."
1944
+ ),
1945
+ (
1946
+ "If one transcript dominates, split unrelated work into shorter sessions; then check startup/context "
1947
+ "size and keep stable policy before volatile logs, diffs, timestamps, and generated evidence."
1948
+ ),
1949
+ str(cache_layout_advice.get("priority") or "P1"),
1950
+ evidence,
1951
+ )
1952
+ rec["heuristic"] = True
1953
+ rec["confidence"] = cache_layout_advice.get("confidence")
1954
+ recs.append(rec)
1534
1955
  for finding in cache_friendliness.get("findings", []):
1535
1956
  if isinstance(finding, dict) and finding.get("id") == "volatile-content-near-prefix":
1536
1957
  evidence = dict(finding.get("evidence") or {})
@@ -1754,6 +2175,7 @@ def summary_json(
1754
2175
  "top_tools": counter_json(summary.by_tool, top),
1755
2176
  "cache_friendliness": cache_friendliness_for_summary(summary),
1756
2177
  "cache_diagnostics": cache_diagnostics_for_summary(summary),
2178
+ "cache_layout_advice": cache_layout_advice_for_summary(summary),
1757
2179
  }
1758
2180
  if include_recommendations:
1759
2181
  data["recommendations"] = build_recommendations(summary, top)
@@ -1887,6 +2309,26 @@ def main() -> int:
1887
2309
  headroom = cache_diagnostics.get("headroom_diagnostics") or {}
1888
2310
  print(f" headroom_status {headroom.get('status')} ({headroom.get('evidence')})")
1889
2311
 
2312
+ cache_layout_advice = cache_layout_advice_for_summary(summary)
2313
+ if cache_layout_advice.get("status") != "missing" or cache_layout_advice.get("observed_issue") != "unknown":
2314
+ print("\nCache layout advice")
2315
+ print(f" status {cache_layout_advice.get('status')}")
2316
+ print(f" confidence {cache_layout_advice.get('confidence')}")
2317
+ print(f" observed_issue {cache_layout_advice.get('observed_issue')}")
2318
+ print(f" priority {cache_layout_advice.get('priority')}")
2319
+ experiments = cache_layout_advice.get("recommended_experiments") or []
2320
+ if experiments:
2321
+ first = experiments[0]
2322
+ print(f" first_experiment {first.get('id')} ({first.get('priority')})")
2323
+ print(f" experiment_action {first.get('action')}")
2324
+ checks = cache_layout_advice.get("next_checks") or []
2325
+ if checks:
2326
+ first = checks[0]
2327
+ print(f" next_check {first.get('id')}")
2328
+ templates = first.get("command_templates") or []
2329
+ if templates:
2330
+ print(f" command_template {templates[0]}")
2331
+
1890
2332
  model_totals = Counter({model: sum(tokens.values()) for model, tokens in summary.by_model.items()})
1891
2333
  print_counter("By model", model_totals, args.top)
1892
2334