@ictechgy/context-guard 0.4.3 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/README.ko.md +16 -3
- package/README.md +13 -3
- package/context-guard-kit/README.md +2 -2
- package/context-guard-kit/benchmark_runner.py +244 -6
- package/context-guard-kit/claude_transcript_cost_audit.py +443 -1
- package/docs/benchmark-fixtures/learned-compression-baseline-context-pack.prompt.example.md +19 -0
- package/docs/benchmark-fixtures/learned-compression-candidate-digest.prompt.example.md +21 -0
- package/docs/benchmark-fixtures/learned-compression.tasks.example.json +5 -1
- package/docs/benchmark-fixtures/output-transform-baseline-raw-output.prompt.example.md +20 -0
- package/docs/benchmark-fixtures/output-transform-digest-receipt.prompt.example.md +23 -0
- package/docs/benchmark-fixtures/output-transform.tasks.example.json +28 -0
- package/docs/benchmark-fixtures/output-transform.variants.example.json +10 -0
- package/docs/benchmark-fixtures/visual-ocr-cropped-ocr.prompt.example.md +22 -0
- package/docs/benchmark-fixtures/visual-ocr-full-visual.prompt.example.md +19 -0
- package/docs/benchmark-fixtures/visual-ocr.tasks.example.json +5 -1
- package/docs/benchmark-workflow-examples.md +6 -2
- package/docs/benchmark-workflows/self-hosted-metrics-ledger.example.jsonl +1 -0
- package/docs/cache-diagnostics-schema.md +25 -4
- package/docs/experimental-benchmark-fixtures.md +17 -6
- package/docs/mac-visibility-feasibility-schema.md +62 -0
- package/docs/mac-visibility-feasibility.example.json +130 -0
- package/package.json +5 -1
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +3 -3
- package/plugins/context-guard/README.md +3 -3
- package/plugins/context-guard/bin/context-guard-audit +443 -1
- package/plugins/context-guard/bin/context-guard-bench +244 -6
|
@@ -46,9 +46,11 @@ COST_KEYS = ("total_cost_usd", "cost_usd", "costUSD")
|
|
|
46
46
|
MODEL_KEYS = ("model", "model_id", "modelId")
|
|
47
47
|
QUERY_SOURCE_KEYS = ("query_source", "querySource")
|
|
48
48
|
TIMESTAMP_KEYS = ("timestamp", "created_at", "createdAt", "time", "ts")
|
|
49
|
-
FEASIBILITY_SCHEMA_VERSION = "contextguard.metric-feasibility.v1.
|
|
49
|
+
FEASIBILITY_SCHEMA_VERSION = "contextguard.metric-feasibility.v1.3"
|
|
50
|
+
MAC_VISIBILITY_SCHEMA_VERSION = "contextguard.mac-visibility.v1"
|
|
50
51
|
FEASIBILITY_PRODUCER = "context-guard-audit"
|
|
51
52
|
CACHE_DIAGNOSTICS_SCHEMA_VERSION = "contextguard.cache-diagnostics.v1"
|
|
53
|
+
CACHE_LAYOUT_ADVICE_SCHEMA_VERSION = "contextguard.cache-layout-advice.v1"
|
|
52
54
|
MAX_ERROR_EXAMPLES = 20
|
|
53
55
|
JSON_PARSE_RECURSION_LIMIT = 10_000
|
|
54
56
|
READ_CHUNK_BYTES = 64 * 1024
|
|
@@ -184,6 +186,7 @@ class UsageSummary:
|
|
|
184
186
|
prompt_cache_audit: PromptCacheAudit = field(default_factory=PromptCacheAudit)
|
|
185
187
|
cache_friendliness_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
|
|
186
188
|
cache_diagnostics_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
|
|
189
|
+
cache_layout_advice_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
|
|
187
190
|
|
|
188
191
|
@property
|
|
189
192
|
def total_tokens(self) -> int:
|
|
@@ -1398,6 +1401,222 @@ def cache_diagnostics_for_summary(summary: UsageSummary) -> dict[str, Any]:
|
|
|
1398
1401
|
return build_cache_diagnostics(summary)
|
|
1399
1402
|
|
|
1400
1403
|
|
|
1404
|
+
def _dominant_transcript(summary: UsageSummary) -> dict[str, Any] | None:
|
|
1405
|
+
if summary.total_tokens <= 0 or not summary.by_file:
|
|
1406
|
+
return None
|
|
1407
|
+
_label, tokens = summary.by_file.most_common(1)[0]
|
|
1408
|
+
share = tokens / summary.total_tokens if summary.total_tokens else 0.0
|
|
1409
|
+
return {
|
|
1410
|
+
"tokens": tokens,
|
|
1411
|
+
"share": round(share, 4),
|
|
1412
|
+
"dominates": share >= 0.20 and tokens >= 1_000,
|
|
1413
|
+
}
|
|
1414
|
+
|
|
1415
|
+
|
|
1416
|
+
def _first_dynamic_breaker(cache_diagnostics: dict[str, Any]) -> dict[str, Any] | None:
|
|
1417
|
+
breakers = cache_diagnostics.get("dynamic_prefix_breakers") or []
|
|
1418
|
+
if not breakers:
|
|
1419
|
+
return None
|
|
1420
|
+
first = breakers[0]
|
|
1421
|
+
return first if isinstance(first, dict) else None
|
|
1422
|
+
|
|
1423
|
+
|
|
1424
|
+
def build_cache_layout_advice(summary: UsageSummary) -> dict[str, Any]:
|
|
1425
|
+
if summary.cache_layout_advice_cache is not None:
|
|
1426
|
+
return summary.cache_layout_advice_cache
|
|
1427
|
+
|
|
1428
|
+
cache_friendliness = cache_friendliness_for_summary(summary)
|
|
1429
|
+
cache_diagnostics = cache_diagnostics_for_summary(summary)
|
|
1430
|
+
signals = cache_friendliness.get("signals") if isinstance(cache_friendliness.get("signals"), dict) else {}
|
|
1431
|
+
dynamic_breaker = _first_dynamic_breaker(cache_diagnostics)
|
|
1432
|
+
dominant = _dominant_transcript(summary)
|
|
1433
|
+
cache_creation = summary.tokens.get("cache_creation", 0)
|
|
1434
|
+
cache_read = summary.tokens.get("cache_read", 0)
|
|
1435
|
+
cache_fields = cache_diagnostics.get("observations", {}).get("cache_fields", {}) if isinstance(cache_diagnostics.get("observations"), dict) else {}
|
|
1436
|
+
cache_status = cache_fields.get("status") if isinstance(cache_fields, dict) else None
|
|
1437
|
+
stable_prefix_share = signals.get("stable_prefix_share")
|
|
1438
|
+
volatile_prefix_share = signals.get("volatile_prefix_share")
|
|
1439
|
+
volatile_tail_share = signals.get("volatile_tail_share")
|
|
1440
|
+
max_prefix_position = dynamic_breaker.get("position") if dynamic_breaker else None
|
|
1441
|
+
max_prefix_position_volatile_share = dynamic_breaker.get("volatile_share") if dynamic_breaker else signals.get("max_prefix_position_volatile_share")
|
|
1442
|
+
|
|
1443
|
+
status = "missing"
|
|
1444
|
+
confidence = "unavailable"
|
|
1445
|
+
observed_issue = "unknown"
|
|
1446
|
+
priority = "P2"
|
|
1447
|
+
hypothesized_causes: list[dict[str, Any]] = []
|
|
1448
|
+
corroborated_causes: list[dict[str, Any]] = []
|
|
1449
|
+
next_checks: list[dict[str, Any]] = []
|
|
1450
|
+
recommended_experiments: list[dict[str, Any]] = []
|
|
1451
|
+
|
|
1452
|
+
has_cache_any = bool(
|
|
1453
|
+
summary.token_field_presence.get("cache_read", 0)
|
|
1454
|
+
or summary.token_field_presence.get("cache_creation", 0)
|
|
1455
|
+
)
|
|
1456
|
+
has_prompt_samples = bool(summary.prompt_cache_audit.samples)
|
|
1457
|
+
if has_cache_any or has_prompt_samples:
|
|
1458
|
+
status = "partial" if (
|
|
1459
|
+
not has_prompt_samples
|
|
1460
|
+
or cache_friendliness.get("status") == "partial"
|
|
1461
|
+
or cache_diagnostics.get("status") == "partial"
|
|
1462
|
+
or summary.skipped_files
|
|
1463
|
+
or summary.skipped_records
|
|
1464
|
+
or summary.parse_errors
|
|
1465
|
+
) else "available"
|
|
1466
|
+
confidence = "partial" if status == "partial" else "hypothesis"
|
|
1467
|
+
|
|
1468
|
+
volatile_prefix_breaker = bool(
|
|
1469
|
+
dynamic_breaker
|
|
1470
|
+
and cache_creation > 0
|
|
1471
|
+
and (max_prefix_position in {0, 1} or (max_prefix_position_volatile_share or 0) >= PROMPT_PREFIX_VOLATILE_THRESHOLD)
|
|
1472
|
+
)
|
|
1473
|
+
long_session_dominates = bool(dominant and dominant.get("dominates"))
|
|
1474
|
+
|
|
1475
|
+
if volatile_prefix_breaker:
|
|
1476
|
+
observed_issue = "volatile_prefix_breaker"
|
|
1477
|
+
priority = "P0" if cache_creation >= 50_000 and max_prefix_position in {0, 1} else "P1"
|
|
1478
|
+
hypothesized_causes.append({
|
|
1479
|
+
"id": "prefix-position-churn",
|
|
1480
|
+
"confidence": confidence,
|
|
1481
|
+
"evidence": EVIDENCE_INFERRED,
|
|
1482
|
+
"reason": (
|
|
1483
|
+
"A highly volatile redacted prompt segment appears in the early prefix window; "
|
|
1484
|
+
"this identifies a layout issue, not a confirmed source."
|
|
1485
|
+
),
|
|
1486
|
+
"next_check": "Check whether startup context, generated evidence, or tool/MCP catalog changes are moving before stable policy.",
|
|
1487
|
+
})
|
|
1488
|
+
if cache_diagnostics.get("stable_prefix_candidates"):
|
|
1489
|
+
hypothesized_causes.append({
|
|
1490
|
+
"id": "evidence-before-policy",
|
|
1491
|
+
"confidence": confidence,
|
|
1492
|
+
"evidence": EVIDENCE_INFERRED,
|
|
1493
|
+
"reason": (
|
|
1494
|
+
"Stable reusable segments appear elsewhere while the early prefix churns; "
|
|
1495
|
+
"check whether logs, diffs, timestamps, or file evidence precede stable instructions."
|
|
1496
|
+
),
|
|
1497
|
+
"next_check": "Keep stable policy/instructions first and move generated run evidence later.",
|
|
1498
|
+
})
|
|
1499
|
+
next_checks.append({
|
|
1500
|
+
"id": "inspect-startup-context-size",
|
|
1501
|
+
"confidence": "hypothesis",
|
|
1502
|
+
"command_templates": [
|
|
1503
|
+
"context-guard-diet scan <repo>",
|
|
1504
|
+
"context-guard-diet structural-waste <repo>",
|
|
1505
|
+
],
|
|
1506
|
+
"evidence_required_for_corroboration": (
|
|
1507
|
+
"Large or duplicate CLAUDE.md/AGENTS.md/GEMINI.md findings from diet output."
|
|
1508
|
+
),
|
|
1509
|
+
})
|
|
1510
|
+
elif long_session_dominates:
|
|
1511
|
+
observed_issue = "long_session_accumulation"
|
|
1512
|
+
priority = "P1"
|
|
1513
|
+
elif cache_creation >= 10_000 and cache_read > 0 and summary.cache_amortization < 0.5:
|
|
1514
|
+
observed_issue = "low_cache_reuse"
|
|
1515
|
+
priority = "P1"
|
|
1516
|
+
elif cache_status == "missing" or not has_cache_any:
|
|
1517
|
+
observed_issue = "missing_cache_fields"
|
|
1518
|
+
priority = "P2"
|
|
1519
|
+
|
|
1520
|
+
if long_session_dominates:
|
|
1521
|
+
recommended_experiments.append({
|
|
1522
|
+
"id": "split-long-sessions",
|
|
1523
|
+
"order": len(recommended_experiments) + 1,
|
|
1524
|
+
"priority": "P1",
|
|
1525
|
+
"effort": "low",
|
|
1526
|
+
"action": "Use /clear between unrelated tasks and /compact focus on changed files, failing tests, and remaining TODO during long work.",
|
|
1527
|
+
"expected_signal": "Cache creation per comparable task decreases and one transcript no longer dominates observed tokens.",
|
|
1528
|
+
"verification": "Re-run context-guard-audit on a comparable window and compare cache_creation, cache_amortization, and top transcript share.",
|
|
1529
|
+
"evidence": dominant or {},
|
|
1530
|
+
})
|
|
1531
|
+
if volatile_prefix_breaker:
|
|
1532
|
+
recommended_experiments.append({
|
|
1533
|
+
"id": "stabilize-cache-prefix",
|
|
1534
|
+
"order": len(recommended_experiments) + 1,
|
|
1535
|
+
"priority": priority,
|
|
1536
|
+
"effort": "medium",
|
|
1537
|
+
"action": "Keep stable reusable instructions/policy before volatile logs, diffs, timestamps, and generated file evidence.",
|
|
1538
|
+
"expected_signal": "Stable prefix share rises and volatile prefix share falls on matched audit windows.",
|
|
1539
|
+
"verification": "Re-run context-guard-audit --json --recommend and compare cache_layout_advice plus cache_friendliness signals.",
|
|
1540
|
+
"evidence": {
|
|
1541
|
+
"dynamic_prefix_breaker_position": max_prefix_position,
|
|
1542
|
+
"dynamic_prefix_breaker_volatile_share": max_prefix_position_volatile_share,
|
|
1543
|
+
},
|
|
1544
|
+
})
|
|
1545
|
+
recommended_experiments.append({
|
|
1546
|
+
"id": "run-context-diet-checks",
|
|
1547
|
+
"order": len(recommended_experiments) + 1,
|
|
1548
|
+
"priority": "P1",
|
|
1549
|
+
"effort": "low",
|
|
1550
|
+
"action": "Run the generated diet command templates and treat any large/duplicate context-file findings as corroborating evidence before editing instructions.",
|
|
1551
|
+
"expected_signal": "Diet output identifies or rules out oversized/duplicated startup context as a contributor.",
|
|
1552
|
+
"verification": "Record diet JSON separately; do not convert prefix-position evidence alone into a confirmed startup-context cause.",
|
|
1553
|
+
"command_templates": [
|
|
1554
|
+
"context-guard-diet scan <repo> --json > diet.json",
|
|
1555
|
+
"context-guard-diet structural-waste <repo> --json > structural-waste.json",
|
|
1556
|
+
],
|
|
1557
|
+
})
|
|
1558
|
+
if cache_creation >= 50_000 and summary.cache_amortization_defined and 1.0 <= summary.cache_amortization < 5.0:
|
|
1559
|
+
recommended_experiments.append({
|
|
1560
|
+
"id": "defer-longer-ttl-until-prefix-stable" if volatile_prefix_breaker else "evaluate-longer-ttl-after-stability-check",
|
|
1561
|
+
"order": len(recommended_experiments) + 1,
|
|
1562
|
+
"priority": "P2",
|
|
1563
|
+
"effort": "medium",
|
|
1564
|
+
"action": "Treat longer TTL as secondary; first corroborate stable prefix reuse and current provider TTL/pricing behavior.",
|
|
1565
|
+
"expected_signal": "TTL evaluation happens only after prefix volatility is reduced or ruled out.",
|
|
1566
|
+
"verification": "Use timestamped cache telemetry and provider-measured billing/cost evidence; historical token totals alone are insufficient.",
|
|
1567
|
+
})
|
|
1568
|
+
if not recommended_experiments and status == "partial":
|
|
1569
|
+
next_checks.append({
|
|
1570
|
+
"id": "rerun-narrower-audit",
|
|
1571
|
+
"confidence": "partial",
|
|
1572
|
+
"command_templates": ["context-guard-audit <transcript-or-project-dir> --json --recommend"],
|
|
1573
|
+
"evidence_required_for_corroboration": "Enough uncapped prompt/cache records to classify prefix layout.",
|
|
1574
|
+
})
|
|
1575
|
+
if not recommended_experiments and observed_issue == "missing_cache_fields":
|
|
1576
|
+
next_checks.append({
|
|
1577
|
+
"id": "collect-cache-telemetry",
|
|
1578
|
+
"confidence": "unavailable",
|
|
1579
|
+
"command_templates": ["context-guard-audit ~/.claude/projects --json --recommend"],
|
|
1580
|
+
"evidence_required_for_corroboration": "Transcript records with cache_read/cache_creation fields.",
|
|
1581
|
+
})
|
|
1582
|
+
|
|
1583
|
+
advice = {
|
|
1584
|
+
"schema_version": CACHE_LAYOUT_ADVICE_SCHEMA_VERSION,
|
|
1585
|
+
"status": status,
|
|
1586
|
+
"confidence": confidence,
|
|
1587
|
+
"heuristic": True,
|
|
1588
|
+
"observed_issue": observed_issue,
|
|
1589
|
+
"priority": priority,
|
|
1590
|
+
"observed_summary": {
|
|
1591
|
+
"cache_creation_tokens": cache_creation,
|
|
1592
|
+
"cache_read_tokens": cache_read,
|
|
1593
|
+
"cache_amortization": round(summary.cache_amortization, 4) if summary.cache_amortization_defined else None,
|
|
1594
|
+
"stable_prefix_share": stable_prefix_share,
|
|
1595
|
+
"volatile_prefix_share": volatile_prefix_share,
|
|
1596
|
+
"volatile_tail_share": volatile_tail_share,
|
|
1597
|
+
"max_prefix_position": max_prefix_position,
|
|
1598
|
+
"max_prefix_position_volatile_share": max_prefix_position_volatile_share,
|
|
1599
|
+
"dominant_transcript_share": dominant.get("share") if dominant else None,
|
|
1600
|
+
},
|
|
1601
|
+
"hypothesized_causes": hypothesized_causes,
|
|
1602
|
+
"corroborated_causes": corroborated_causes,
|
|
1603
|
+
"next_checks": next_checks,
|
|
1604
|
+
"recommended_experiments": recommended_experiments,
|
|
1605
|
+
"caveats": [
|
|
1606
|
+
"Cache layout advice is a local transcript heuristic, not billing authority or provider-cache proof.",
|
|
1607
|
+
"Observed issues come from cache fields and redacted segment statistics; causes remain hypotheses until corroborated by diet/structural evidence.",
|
|
1608
|
+
"Generated command templates use placeholders and must not be treated as observed user commands or paths.",
|
|
1609
|
+
"Use matched before/after audits before making token or cost savings claims.",
|
|
1610
|
+
],
|
|
1611
|
+
}
|
|
1612
|
+
summary.cache_layout_advice_cache = advice
|
|
1613
|
+
return advice
|
|
1614
|
+
|
|
1615
|
+
|
|
1616
|
+
def cache_layout_advice_for_summary(summary: UsageSummary) -> dict[str, Any]:
|
|
1617
|
+
return build_cache_layout_advice(summary)
|
|
1618
|
+
|
|
1619
|
+
|
|
1401
1620
|
def build_metric_caveats(summary: UsageSummary) -> list[str]:
|
|
1402
1621
|
caveats = [
|
|
1403
1622
|
"Values are observed from local Claude Code transcript JSON/JSONL fields and are not official billing records.",
|
|
@@ -1417,6 +1636,168 @@ def build_metric_caveats(summary: UsageSummary) -> list[str]:
|
|
|
1417
1636
|
return caveats
|
|
1418
1637
|
|
|
1419
1638
|
|
|
1639
|
+
def _mac_card(
|
|
1640
|
+
card_id: str,
|
|
1641
|
+
title: str,
|
|
1642
|
+
status: str,
|
|
1643
|
+
binding_paths: list[str],
|
|
1644
|
+
*,
|
|
1645
|
+
required_observation: str | None = None,
|
|
1646
|
+
) -> dict[str, Any]:
|
|
1647
|
+
card: dict[str, Any] = {
|
|
1648
|
+
"id": card_id,
|
|
1649
|
+
"title": title,
|
|
1650
|
+
"status": status,
|
|
1651
|
+
"binding_paths": binding_paths,
|
|
1652
|
+
}
|
|
1653
|
+
if required_observation:
|
|
1654
|
+
card["required_observation"] = required_observation
|
|
1655
|
+
return card
|
|
1656
|
+
|
|
1657
|
+
|
|
1658
|
+
def build_mac_visibility_contract(
|
|
1659
|
+
*,
|
|
1660
|
+
availability: dict[str, Any],
|
|
1661
|
+
integrity: dict[str, Any],
|
|
1662
|
+
cache_layout_advice: dict[str, Any],
|
|
1663
|
+
) -> dict[str, Any]:
|
|
1664
|
+
"""Build the pre-GUI macOS visibility binding contract.
|
|
1665
|
+
|
|
1666
|
+
This is intentionally a thin index over already-emitted stable feasibility
|
|
1667
|
+
fields. It does not recompute metrics, read diagnostic summary data, or infer
|
|
1668
|
+
live context/headroom from historical transcript totals.
|
|
1669
|
+
"""
|
|
1670
|
+
token_status = str((availability.get("tokens") or {}).get("status", "missing"))
|
|
1671
|
+
scan_status = str(integrity.get("status", "partial"))
|
|
1672
|
+
if token_status == "available" and scan_status == "complete":
|
|
1673
|
+
readiness_status = "ready"
|
|
1674
|
+
readiness_reason = "Transcript token totals are available and the scan completed within configured limits."
|
|
1675
|
+
elif token_status in {"available", "partial"}:
|
|
1676
|
+
readiness_status = "partial"
|
|
1677
|
+
readiness_reason = "Some stable fields can be shown, but scan integrity or metric availability is partial."
|
|
1678
|
+
else:
|
|
1679
|
+
readiness_status = "missing"
|
|
1680
|
+
readiness_reason = "Token totals are missing from the transcript scan; show setup or unavailable state."
|
|
1681
|
+
|
|
1682
|
+
context_status = str((availability.get("context") or {}).get("status", "missing"))
|
|
1683
|
+
headroom_status = str((availability.get("headroom") or {}).get("status", "missing"))
|
|
1684
|
+
cache_status = str((availability.get("cache") or {}).get("status", "missing"))
|
|
1685
|
+
cost_status = str((availability.get("cost") or {}).get("status", "missing"))
|
|
1686
|
+
advice_status = str(cache_layout_advice.get("status", "missing"))
|
|
1687
|
+
|
|
1688
|
+
missing_live_observations: list[dict[str, Any]] = []
|
|
1689
|
+
if context_status == "missing":
|
|
1690
|
+
missing_live_observations.append({
|
|
1691
|
+
"id": "live_context_window",
|
|
1692
|
+
"required_observation": "live_statusline_snapshot",
|
|
1693
|
+
"affects": ["context_availability", "metric_availability.context"],
|
|
1694
|
+
"reason": "Historical transcript scans do not include live Claude Code context_window data.",
|
|
1695
|
+
})
|
|
1696
|
+
if headroom_status == "missing":
|
|
1697
|
+
missing_live_observations.append({
|
|
1698
|
+
"id": "live_headroom",
|
|
1699
|
+
"required_observation": "live_statusline_snapshot",
|
|
1700
|
+
"affects": ["headroom_availability", "cache_diagnostics.headroom_diagnostics"],
|
|
1701
|
+
"reason": "Historical transcript totals are not remaining-token or live headroom observations.",
|
|
1702
|
+
})
|
|
1703
|
+
|
|
1704
|
+
return {
|
|
1705
|
+
"schema_version": MAC_VISIBILITY_SCHEMA_VERSION,
|
|
1706
|
+
"surface_kind": "local_macos_visibility_contract",
|
|
1707
|
+
"readiness": {
|
|
1708
|
+
"status": readiness_status,
|
|
1709
|
+
"reason": readiness_reason,
|
|
1710
|
+
},
|
|
1711
|
+
"bind_to_top_level_fields": [
|
|
1712
|
+
"source_kind",
|
|
1713
|
+
"source_freshness",
|
|
1714
|
+
"scan_integrity",
|
|
1715
|
+
"metric_availability",
|
|
1716
|
+
"metric_caveats",
|
|
1717
|
+
"redaction_mode",
|
|
1718
|
+
"context_availability",
|
|
1719
|
+
"headroom_availability",
|
|
1720
|
+
"cache_friendliness",
|
|
1721
|
+
"cache_diagnostics",
|
|
1722
|
+
"cache_layout_advice",
|
|
1723
|
+
"totals",
|
|
1724
|
+
],
|
|
1725
|
+
"diagnostic_only_fields": ["summary"],
|
|
1726
|
+
"primary_cards": [
|
|
1727
|
+
_mac_card(
|
|
1728
|
+
"source_freshness",
|
|
1729
|
+
"Source freshness",
|
|
1730
|
+
"available",
|
|
1731
|
+
["source_kind", "source_freshness.status", "source_freshness.generated_at"],
|
|
1732
|
+
),
|
|
1733
|
+
_mac_card(
|
|
1734
|
+
"scan_integrity",
|
|
1735
|
+
"Scan integrity",
|
|
1736
|
+
scan_status,
|
|
1737
|
+
[
|
|
1738
|
+
"scan_integrity.status",
|
|
1739
|
+
"scan_integrity.files_scanned",
|
|
1740
|
+
"scan_integrity.records_scanned",
|
|
1741
|
+
"scan_integrity.skipped_files",
|
|
1742
|
+
"scan_integrity.skipped_records",
|
|
1743
|
+
],
|
|
1744
|
+
),
|
|
1745
|
+
_mac_card(
|
|
1746
|
+
"token_totals",
|
|
1747
|
+
"Token totals",
|
|
1748
|
+
token_status,
|
|
1749
|
+
[
|
|
1750
|
+
"totals.total_tokens",
|
|
1751
|
+
"totals.tokens.input",
|
|
1752
|
+
"totals.tokens.output",
|
|
1753
|
+
"totals.tokens.cache_read",
|
|
1754
|
+
"totals.tokens.cache_creation",
|
|
1755
|
+
],
|
|
1756
|
+
),
|
|
1757
|
+
_mac_card(
|
|
1758
|
+
"cache_reuse",
|
|
1759
|
+
"Cache-read share and reuse ratio",
|
|
1760
|
+
cache_status,
|
|
1761
|
+
["totals.cache_read_share", "totals.cache_reuse_ratio", "metric_availability.cache"],
|
|
1762
|
+
),
|
|
1763
|
+
_mac_card(
|
|
1764
|
+
"observed_cost",
|
|
1765
|
+
"Observed transcript cost",
|
|
1766
|
+
cost_status,
|
|
1767
|
+
["totals.cost_usd_observed", "metric_availability.cost"],
|
|
1768
|
+
),
|
|
1769
|
+
_mac_card(
|
|
1770
|
+
"context_availability",
|
|
1771
|
+
"Context availability",
|
|
1772
|
+
context_status,
|
|
1773
|
+
["context_availability", "metric_availability.context"],
|
|
1774
|
+
required_observation="live_statusline_snapshot" if context_status == "missing" else None,
|
|
1775
|
+
),
|
|
1776
|
+
_mac_card(
|
|
1777
|
+
"headroom_availability",
|
|
1778
|
+
"Headroom availability",
|
|
1779
|
+
headroom_status,
|
|
1780
|
+
["headroom_availability", "cache_diagnostics.headroom_diagnostics"],
|
|
1781
|
+
required_observation="live_statusline_snapshot" if headroom_status == "missing" else None,
|
|
1782
|
+
),
|
|
1783
|
+
_mac_card(
|
|
1784
|
+
"cache_layout_advice",
|
|
1785
|
+
"Cache layout advice",
|
|
1786
|
+
advice_status,
|
|
1787
|
+
["cache_layout_advice", "cache_friendliness", "cache_diagnostics.dynamic_prefix_breakers"],
|
|
1788
|
+
),
|
|
1789
|
+
],
|
|
1790
|
+
"missing_live_observations": missing_live_observations,
|
|
1791
|
+
"claim_boundaries": [
|
|
1792
|
+
"Local transcript observations are not invoice-grade billing records.",
|
|
1793
|
+
"Provider cache fields are telemetry, not ContextGuard-caused token reduction and do not prove provider cache hits.",
|
|
1794
|
+
"Historical transcript totals do not infer live context headroom or remaining tokens.",
|
|
1795
|
+
"This contract does not guarantee token or cost savings.",
|
|
1796
|
+
],
|
|
1797
|
+
"redaction_required": True,
|
|
1798
|
+
}
|
|
1799
|
+
|
|
1800
|
+
|
|
1420
1801
|
def feasibility_json(
|
|
1421
1802
|
summary: UsageSummary,
|
|
1422
1803
|
top: int = 15,
|
|
@@ -1433,6 +1814,12 @@ def feasibility_json(
|
|
|
1433
1814
|
stable_total_tokens = sum(stable_tokens.values())
|
|
1434
1815
|
cache_friendliness = cache_friendliness_for_summary(summary)
|
|
1435
1816
|
cache_diagnostics = cache_diagnostics_for_summary(summary)
|
|
1817
|
+
cache_layout_advice = cache_layout_advice_for_summary(summary)
|
|
1818
|
+
mac_visibility = build_mac_visibility_contract(
|
|
1819
|
+
availability=availability,
|
|
1820
|
+
integrity=integrity,
|
|
1821
|
+
cache_layout_advice=cache_layout_advice,
|
|
1822
|
+
)
|
|
1436
1823
|
return {
|
|
1437
1824
|
"schema_version": FEASIBILITY_SCHEMA_VERSION,
|
|
1438
1825
|
"producer": FEASIBILITY_PRODUCER,
|
|
@@ -1452,6 +1839,8 @@ def feasibility_json(
|
|
|
1452
1839
|
"headroom_availability",
|
|
1453
1840
|
"cache_friendliness",
|
|
1454
1841
|
"cache_diagnostics",
|
|
1842
|
+
"cache_layout_advice",
|
|
1843
|
+
"mac_visibility",
|
|
1455
1844
|
"totals",
|
|
1456
1845
|
],
|
|
1457
1846
|
"diagnostic_fields": ["summary"],
|
|
@@ -1480,6 +1869,8 @@ def feasibility_json(
|
|
|
1480
1869
|
"headroom_availability": availability["headroom"],
|
|
1481
1870
|
"cache_friendliness": cache_friendliness,
|
|
1482
1871
|
"cache_diagnostics": cache_diagnostics,
|
|
1872
|
+
"cache_layout_advice": cache_layout_advice,
|
|
1873
|
+
"mac_visibility": mac_visibility,
|
|
1483
1874
|
"totals": {
|
|
1484
1875
|
"total_tokens": stable_total_tokens,
|
|
1485
1876
|
"tokens": stable_tokens,
|
|
@@ -1531,6 +1922,36 @@ def build_recommendations(summary: UsageSummary, top: int) -> list[dict[str, Any
|
|
|
1531
1922
|
input_ratio = input_tokens / total
|
|
1532
1923
|
cache_friendliness = cache_friendliness_for_summary(summary)
|
|
1533
1924
|
cache_diagnostics = cache_diagnostics_for_summary(summary)
|
|
1925
|
+
cache_layout_advice = cache_layout_advice_for_summary(summary)
|
|
1926
|
+
if cache_layout_advice.get("observed_issue") == "volatile_prefix_breaker":
|
|
1927
|
+
evidence = {
|
|
1928
|
+
"observed_issue": cache_layout_advice.get("observed_issue"),
|
|
1929
|
+
"priority": cache_layout_advice.get("priority"),
|
|
1930
|
+
"confidence": cache_layout_advice.get("confidence"),
|
|
1931
|
+
"cache_creation_tokens": cache_creation,
|
|
1932
|
+
"cache_read_tokens": cache_read,
|
|
1933
|
+
}
|
|
1934
|
+
observed_summary = cache_layout_advice.get("observed_summary")
|
|
1935
|
+
if isinstance(observed_summary, dict):
|
|
1936
|
+
for key in ("max_prefix_position", "max_prefix_position_volatile_share", "stable_prefix_share", "volatile_prefix_share"):
|
|
1937
|
+
evidence[key] = observed_summary.get(key)
|
|
1938
|
+
rec = recommendation(
|
|
1939
|
+
"prioritize-cache-prefix-stabilization",
|
|
1940
|
+
"Prioritize cache-prefix stabilization before TTL or output trimming",
|
|
1941
|
+
(
|
|
1942
|
+
"Cache creation remains material and redacted segment statistics show a volatile early prefix; "
|
|
1943
|
+
"this is an experiment-prioritization signal, not a confirmed root cause."
|
|
1944
|
+
),
|
|
1945
|
+
(
|
|
1946
|
+
"If one transcript dominates, split unrelated work into shorter sessions; then check startup/context "
|
|
1947
|
+
"size and keep stable policy before volatile logs, diffs, timestamps, and generated evidence."
|
|
1948
|
+
),
|
|
1949
|
+
str(cache_layout_advice.get("priority") or "P1"),
|
|
1950
|
+
evidence,
|
|
1951
|
+
)
|
|
1952
|
+
rec["heuristic"] = True
|
|
1953
|
+
rec["confidence"] = cache_layout_advice.get("confidence")
|
|
1954
|
+
recs.append(rec)
|
|
1534
1955
|
for finding in cache_friendliness.get("findings", []):
|
|
1535
1956
|
if isinstance(finding, dict) and finding.get("id") == "volatile-content-near-prefix":
|
|
1536
1957
|
evidence = dict(finding.get("evidence") or {})
|
|
@@ -1754,6 +2175,7 @@ def summary_json(
|
|
|
1754
2175
|
"top_tools": counter_json(summary.by_tool, top),
|
|
1755
2176
|
"cache_friendliness": cache_friendliness_for_summary(summary),
|
|
1756
2177
|
"cache_diagnostics": cache_diagnostics_for_summary(summary),
|
|
2178
|
+
"cache_layout_advice": cache_layout_advice_for_summary(summary),
|
|
1757
2179
|
}
|
|
1758
2180
|
if include_recommendations:
|
|
1759
2181
|
data["recommendations"] = build_recommendations(summary, top)
|
|
@@ -1887,6 +2309,26 @@ def main() -> int:
|
|
|
1887
2309
|
headroom = cache_diagnostics.get("headroom_diagnostics") or {}
|
|
1888
2310
|
print(f" headroom_status {headroom.get('status')} ({headroom.get('evidence')})")
|
|
1889
2311
|
|
|
2312
|
+
cache_layout_advice = cache_layout_advice_for_summary(summary)
|
|
2313
|
+
if cache_layout_advice.get("status") != "missing" or cache_layout_advice.get("observed_issue") != "unknown":
|
|
2314
|
+
print("\nCache layout advice")
|
|
2315
|
+
print(f" status {cache_layout_advice.get('status')}")
|
|
2316
|
+
print(f" confidence {cache_layout_advice.get('confidence')}")
|
|
2317
|
+
print(f" observed_issue {cache_layout_advice.get('observed_issue')}")
|
|
2318
|
+
print(f" priority {cache_layout_advice.get('priority')}")
|
|
2319
|
+
experiments = cache_layout_advice.get("recommended_experiments") or []
|
|
2320
|
+
if experiments:
|
|
2321
|
+
first = experiments[0]
|
|
2322
|
+
print(f" first_experiment {first.get('id')} ({first.get('priority')})")
|
|
2323
|
+
print(f" experiment_action {first.get('action')}")
|
|
2324
|
+
checks = cache_layout_advice.get("next_checks") or []
|
|
2325
|
+
if checks:
|
|
2326
|
+
first = checks[0]
|
|
2327
|
+
print(f" next_check {first.get('id')}")
|
|
2328
|
+
templates = first.get("command_templates") or []
|
|
2329
|
+
if templates:
|
|
2330
|
+
print(f" command_template {templates[0]}")
|
|
2331
|
+
|
|
1890
2332
|
model_totals = Counter({model: sum(tokens.values()) for model, tokens in summary.by_model.items()})
|
|
1891
2333
|
print_counter("By model", model_totals, args.top)
|
|
1892
2334
|
|