npm - @ictechgy/context-guard - Versions diffs - 0.4.1 → 0.4.4 - Mend

@ictechgy/context-guard 0.4.1 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/CHANGELOG.md +15 -0
package/README.ko.md +62 -33
package/README.md +91 -23
package/context-guard-kit/README.md +39 -26
package/context-guard-kit/benchmark_runner.py +273 -8
package/context-guard-kit/claude_transcript_cost_audit.py +597 -12
package/context-guard-kit/context_compress.py +153 -1
package/context-guard-kit/context_filter.py +446 -0
package/context-guard-kit/context_guard_cli.py +3 -0
package/context-guard-kit/context_guard_diet.py +677 -2
package/context-guard-kit/context_pack.py +1694 -2
package/context-guard-kit/cost_guard.py +1870 -0
package/context-guard-kit/setup_wizard.py +820 -29
package/context-guard-kit/trim_command_output.py +396 -45
package/docs/benchmark-fixtures/learned-compression.tasks.example.json +24 -0
package/docs/benchmark-fixtures/learned-compression.variants.example.json +10 -0
package/docs/benchmark-fixtures/visual-ocr.tasks.example.json +24 -0
package/docs/benchmark-fixtures/visual-ocr.variants.example.json +10 -0
package/docs/benchmark-workflow-examples.md +40 -0
package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +169 -0
package/docs/benchmark-workflows/measured-token-workflow.example.json +170 -0
package/docs/benchmark-workflows/provider-cache-telemetry.example.json +170 -0
package/docs/cache-diagnostics-schema.md +96 -0
package/docs/cache-diagnostics.example.json +116 -0
package/docs/cache-diagnostics.schema.json +460 -0
package/docs/distribution.md +4 -2
package/docs/experimental-benchmark-fixtures.md +36 -0
package/package.json +11 -2
package/packaging/homebrew/context-guard.rb.template +3 -2
package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
package/plugins/context-guard/README.ko.md +22 -14
package/plugins/context-guard/README.md +24 -10
package/plugins/context-guard/bin/context-guard +3 -0
package/plugins/context-guard/bin/context-guard-audit +597 -12
package/plugins/context-guard/bin/context-guard-bench +273 -8
package/plugins/context-guard/bin/context-guard-compress +153 -1
package/plugins/context-guard/bin/context-guard-cost +1870 -0
package/plugins/context-guard/bin/context-guard-diet +677 -2
package/plugins/context-guard/bin/context-guard-filter +446 -0
package/plugins/context-guard/bin/context-guard-pack +1694 -2
package/plugins/context-guard/bin/context-guard-setup +820 -29
package/plugins/context-guard/bin/context-guard-trim-output +396 -45
package/plugins/context-guard/brief/README.md +10 -3
package/plugins/context-guard/skills/optimize/SKILL.md +5 -2
package/plugins/context-guard/skills/setup/SKILL.md +3 -1

package/context-guard-kit/claude_transcript_cost_audit.py CHANGED Viewed

@@ -45,8 +45,11 @@ TOKEN_TYPE_ALIASES = {
 COST_KEYS = ("total_cost_usd", "cost_usd", "costUSD")
 MODEL_KEYS = ("model", "model_id", "modelId")
 QUERY_SOURCE_KEYS = ("query_source", "querySource")
-FEASIBILITY_SCHEMA_VERSION = "contextguard.metric-feasibility.v1.1"
+TIMESTAMP_KEYS = ("timestamp", "created_at", "createdAt", "time", "ts")
+FEASIBILITY_SCHEMA_VERSION = "contextguard.metric-feasibility.v1.2"
 FEASIBILITY_PRODUCER = "context-guard-audit"
+CACHE_DIAGNOSTICS_SCHEMA_VERSION = "contextguard.cache-diagnostics.v1"
+CACHE_LAYOUT_ADVICE_SCHEMA_VERSION = "contextguard.cache-layout-advice.v1"
 MAX_ERROR_EXAMPLES = 20
 JSON_PARSE_RECURSION_LIMIT = 10_000
 READ_CHUNK_BYTES = 64 * 1024
@@ -177,8 +180,12 @@ class UsageSummary:
     by_tool: Counter[str] = field(default_factory=Counter)
     token_field_presence: Counter[str] = field(default_factory=Counter)
     cost_field_count: int = 0
+    cache_record_timestamps: list[_dt.datetime] = field(default_factory=list)
+    positive_cache_record_timestamps: list[_dt.datetime] = field(default_factory=list)
     prompt_cache_audit: PromptCacheAudit = field(default_factory=PromptCacheAudit)
     cache_friendliness_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
+    cache_diagnostics_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
+    cache_layout_advice_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
     @property
     def total_tokens(self) -> int:
@@ -295,6 +302,48 @@ def finite_nonnegative_number(value: Any, *, clamp_negative: bool) -> int | floa
     return None
+def parse_timestamp_value(value: Any) -> _dt.datetime | None:
+    if isinstance(value, str):
+        text = value.strip()
+        if not text:
+            return None
+        try:
+            if text.endswith("Z"):
+                text = text[:-1] + "+00:00"
+            parsed = _dt.datetime.fromisoformat(text)
+        except ValueError:
+            return None
+        if parsed.tzinfo is None:
+            parsed = parsed.replace(tzinfo=_dt.timezone.utc)
+        return parsed.astimezone(_dt.timezone.utc)
+    metric = finite_nonnegative_number(value, clamp_negative=False)
+    if metric is None:
+        return None
+    seconds = float(metric) / 1000.0 if float(metric) > 10_000_000_000 else float(metric)
+    try:
+        return _dt.datetime.fromtimestamp(seconds, tz=_dt.timezone.utc)
+    except (OverflowError, OSError, ValueError):
+        return None
+def record_timestamp(root: Any) -> _dt.datetime | None:
+    candidates: list[Any] = []
+    if isinstance(root, dict):
+        for key in TIMESTAMP_KEYS:
+            if key in root:
+                candidates.append(root.get(key))
+        message = root.get("message")
+        if isinstance(message, dict):
+            for key in TIMESTAMP_KEYS:
+                if key in message:
+                    candidates.append(message.get(key))
+    for candidate in candidates:
+        parsed = parse_timestamp_value(candidate)
+        if parsed is not None:
+            return parsed
+    return None
 def normalize_token_bucket(raw: str) -> str:
     return TOKEN_TYPE_ALIASES.get(raw, raw)
@@ -667,11 +716,15 @@ def add_usage(
 ) -> RecordUsage:
     root_model = None
     root_query_source = None
+    parsed_timestamp = None
     if isinstance(root, dict):
         root_model = first_string(root, MODEL_KEYS)
         root_query_source = first_string(root, QUERY_SOURCE_KEYS)
+        parsed_timestamp = record_timestamp(root)
     record = RecordUsage()
+    cache_telemetry_present = False
+    positive_cache_telemetry_present = False
     summary.prompt_cache_audit.observe(root)
     for d in walk(root):
         local_tokens: Counter[str] = Counter()
@@ -695,6 +748,10 @@ def add_usage(
         for bucket in present_buckets:
             summary.token_field_presence[bucket] += 1
+        if "cache_read" in present_buckets or "cache_creation" in present_buckets:
+            cache_telemetry_present = True
+            if local_tokens.get("cache_read", 0) > 0 or local_tokens.get("cache_creation", 0) > 0:
+                positive_cache_telemetry_present = True
         if local_tokens:
             summary.tokens.update(local_tokens)
@@ -713,6 +770,10 @@ def add_usage(
                 record.cost_usd += cost
                 summary.cost_field_count += 1
                 break
+    if parsed_timestamp is not None and cache_telemetry_present:
+        summary.cache_record_timestamps.append(parsed_timestamp)
+    if parsed_timestamp is not None and positive_cache_telemetry_present:
+        summary.positive_cache_record_timestamps.append(parsed_timestamp)
     commands, tools = collect_record_hints(root, show_commands=show_commands)
     record.commands = commands
     record.tools = tools
@@ -980,6 +1041,7 @@ def segment_position_stats(samples: list[PromptSegmentSample], attr: str, window
             "stability": stability,
             "volatile_share": 1.0 - stability,
             "unique_hashes": len(counts),
+            "sample_count": len(values),
         })
     return stats
@@ -1143,6 +1205,417 @@ def cache_friendliness_for_summary(summary: UsageSummary) -> dict[str, Any]:
     return summary.cache_friendliness_cache
+def _cache_diagnostic_confidence(*, skipped: bool, samples: bool, has_cache: bool) -> str:
+    if skipped:
+        return "partial"
+    if samples or has_cache:
+        return "hypothesis"
+    return "unavailable"
+def build_ttl_diagnostics(summary: UsageSummary, *, has_cache_any: bool, skipped: bool) -> dict[str, Any]:
+    timestamped_cache_record_count = len(summary.cache_record_timestamps)
+    timestamps = sorted(summary.positive_cache_record_timestamps)
+    caveats = [
+        "Timestamped cache telemetry records do not prove exact provider cache-prefix identity or provider cache TTL state.",
+        "5-minute versus 1-hour TTL guidance is a local hypothesis unless corroborated with provider telemetry and repeated stable prefixes.",
+    ]
+    if len(timestamps) < 2:
+        return {
+            "status": "unavailable",
+            "evidence": EVIDENCE_UNAVAILABLE,
+            "confidence": "unavailable" if not skipped else "partial",
+            "timestamped_cache_record_count": timestamped_cache_record_count,
+            "positive_timestamped_cache_record_count": len(timestamps),
+            "timestamped_cache_record_span_seconds": None,
+            "candidate": None,
+            "reason": (
+                "Fewer than two positive timestamped cache telemetry records were observed, so TTL reuse intervals cannot be inferred."
+            ),
+            "interval_basis": "positive_timestamped_cache_records",
+            "caveats": caveats,
+        }
+    interval = max(0, int((timestamps[-1] - timestamps[0]).total_seconds()))
+    candidate = "within-5m" if interval <= 5 * 60 else ("between-5m-and-1h" if interval <= 60 * 60 else "beyond-1h")
+    return {
+        "status": "hypothesis" if has_cache_any else "unavailable",
+        "evidence": EVIDENCE_INFERRED if has_cache_any else EVIDENCE_UNAVAILABLE,
+        "confidence": "partial" if skipped else "hypothesis",
+        "timestamped_cache_record_count": timestamped_cache_record_count,
+        "positive_timestamped_cache_record_count": len(timestamps),
+        "timestamped_cache_record_span_seconds": interval,
+        "candidate": candidate,
+        "reason": (
+            "Positive timestamped cache telemetry records bound the local cache-observation span, but exact provider cache TTL reuse remains a hypothesis."
+        ),
+        "interval_basis": "positive_timestamped_cache_records",
+        "caveats": caveats,
+    }
+def build_cache_diagnostics(summary: UsageSummary) -> dict[str, Any]:
+    if summary.cache_diagnostics_cache is not None:
+        return summary.cache_diagnostics_cache
+    availability = build_metric_availability(summary)
+    cache_availability = availability["cache"]
+    cache_friendliness = cache_friendliness_for_summary(summary)
+    skipped = bool(
+        summary.skipped_files
+        or summary.skipped_records
+        or summary.parse_errors
+        or cache_friendliness.get("skipped_evidence")
+    )
+    has_cache_read = summary.token_field_presence.get("cache_read", 0) > 0
+    has_cache_creation = summary.token_field_presence.get("cache_creation", 0) > 0
+    has_cache_any = has_cache_read or has_cache_creation
+    cache_read = summary.tokens.get("cache_read", 0)
+    cache_creation = summary.tokens.get("cache_creation", 0)
+    samples = summary.prompt_cache_audit.samples
+    prefix_stats = segment_position_stats(samples, "prefix_hashes", PROMPT_AUDIT_PREFIX_SEGMENTS) if samples else []
+    confidence = _cache_diagnostic_confidence(skipped=skipped, samples=bool(samples), has_cache=has_cache_any)
+    stable_prefix_candidates: list[dict[str, Any]] = []
+    for stat_item in sorted(prefix_stats, key=lambda item: (-item["stability"], item["position"]))[:PROMPT_AUDIT_PREFIX_SEGMENTS]:
+        if stat_item["stability"] < 0.66:
+            continue
+        stable_prefix_candidates.append({
+            "position": stat_item["position"],
+            "stability": round(float(stat_item["stability"]), 4),
+            "volatile_share": round(float(stat_item["volatile_share"]), 4),
+            "unique_hashes": stat_item["unique_hashes"],
+            "sample_count": stat_item["sample_count"],
+            "evidence": EVIDENCE_INFERRED,
+            "confidence": "partial" if cache_friendliness.get("confidence") == "partial" else "hypothesis",
+            "action": "Keep stable instructions, policies, and reusable context before run-specific evidence.",
+        })
+    dynamic_prefix_breakers: list[dict[str, Any]] = []
+    breaker_trigger = "prefix_position"
+    for finding in cache_friendliness.get("findings", []):
+        if isinstance(finding, dict) and finding.get("id") == "volatile-content-near-prefix":
+            evidence = finding.get("evidence") if isinstance(finding.get("evidence"), dict) else {}
+            breaker_trigger = str(evidence.get("trigger") or breaker_trigger)
+            break
+    for stat_item in sorted(prefix_stats, key=lambda item: (-item["volatile_share"], item["position"])):
+        if stat_item["volatile_share"] < 0.34:
+            continue
+        dynamic_prefix_breakers.append({
+            "position": stat_item["position"],
+            "trigger": breaker_trigger,
+            "volatile_share": round(float(stat_item["volatile_share"]), 4),
+            "stability": round(float(stat_item["stability"]), 4),
+            "unique_hashes": stat_item["unique_hashes"],
+            "sample_count": stat_item["sample_count"],
+            "evidence": EVIDENCE_INFERRED,
+            "confidence": "partial" if cache_friendliness.get("confidence") == "partial" else "hypothesis",
+            "heuristic": True,
+            "action": "Move diffs, logs, timestamps, and command output after stable reusable prompt prefixes.",
+        })
+    dynamic_prefix_breakers = dynamic_prefix_breakers[:PROMPT_AUDIT_MAX_FINDINGS]
+    hypotheses: list[dict[str, Any]] = []
+    if not has_cache_any:
+        hypotheses.append({
+            "id": "cache-fields-missing",
+            "evidence": EVIDENCE_UNAVAILABLE,
+            "confidence": "unavailable" if not skipped else "partial",
+            "reason": "No cache_read/cache_creation transcript fields were observed.",
+            "action": "Hide cache-read UI or label cache telemetry as missing for this scan.",
+        })
+    if has_cache_creation and cache_creation > 0 and (not has_cache_read or cache_read == 0):
+        hypotheses.append({
+            "id": "cache-cold-or-prefix-changed",
+            "evidence": EVIDENCE_INFERRED,
+            "confidence": "hypothesis",
+            "reason": "Cache creation tokens were observed without corresponding cache read tokens.",
+            "action": "Check whether stable instructions changed or whether the session was cache-cold.",
+        })
+    if has_cache_creation and cache_creation >= 10_000 and cache_read > 0 and summary.cache_amortization < 0.5:
+        hypotheses.append({
+            "id": "cache-read-low-vs-write",
+            "evidence": EVIDENCE_INFERRED,
+            "confidence": "hypothesis",
+            "reason": "Cache reads are small relative to observed cache writes.",
+            "action": "Keep reusable prompt prefixes stable across turns before changing large context blocks.",
+        })
+    if dynamic_prefix_breakers:
+        hypotheses.append({
+            "id": "volatile-prefix-breakers",
+            "evidence": EVIDENCE_INFERRED,
+            "confidence": dynamic_prefix_breakers[0]["confidence"],
+            "reason": "Redacted prompt segment hashes show volatile content near the prefix window.",
+            "action": dynamic_prefix_breakers[0]["action"],
+        })
+    if skipped:
+        hypotheses.append({
+            "id": "partial-transcript-scan",
+            "evidence": EVIDENCE_INFERRED,
+            "confidence": "partial",
+            "reason": "Some transcript files, records, or prompt structures were skipped/capped.",
+            "action": "Rerun against narrower transcript paths or higher safe scan limits before making decisions.",
+        })
+    ttl = build_ttl_diagnostics(summary, has_cache_any=has_cache_any, skipped=skipped)
+    headroom = build_headroom_availability(summary)
+    headroom_diagnostics = {
+        **headroom,
+        "historical_total_tokens_are_not_headroom": True,
+        "required_observation": "live_statusline_snapshot",
+    }
+    status = "missing"
+    if has_cache_any or samples:
+        status = "partial" if skipped or cache_friendliness.get("status") == "partial" else "available"
+    elif skipped:
+        status = "partial"
+    diagnostics = {
+        "schema_version": CACHE_DIAGNOSTICS_SCHEMA_VERSION,
+        "status": status,
+        "confidence": confidence,
+        "evidence": EVIDENCE_INFERRED if (has_cache_any or samples) else EVIDENCE_UNAVAILABLE,
+        "heuristic": True,
+        "observations": {
+            "cache_fields": cache_availability,
+            "cache_read_tokens": cache_read,
+            "cache_creation_tokens": cache_creation,
+        },
+        "derived_ratios": cache_availability["derived"],
+        "stable_prefix_candidates": stable_prefix_candidates,
+        "dynamic_prefix_breakers": dynamic_prefix_breakers,
+        "cache_miss_hypotheses": hypotheses[:PROMPT_AUDIT_MAX_FINDINGS],
+        "ttl_diagnostics": ttl,
+        "headroom_diagnostics": headroom_diagnostics,
+        "caveats": [
+            "Cache diagnostics are local transcript heuristics and do not prove exact provider cache-prefix state.",
+            "Provider cache read/write fields are diagnostic telemetry and do not prove ContextGuard-caused token reduction.",
+            "Stable-prefix and breaker positions come from bounded redacted segment hashes, not raw prompt text.",
+        ],
+    }
+    summary.cache_diagnostics_cache = diagnostics
+    return diagnostics
+def cache_diagnostics_for_summary(summary: UsageSummary) -> dict[str, Any]:
+    return build_cache_diagnostics(summary)
+def _dominant_transcript(summary: UsageSummary) -> dict[str, Any] | None:
+    if summary.total_tokens <= 0 or not summary.by_file:
+        return None
+    _label, tokens = summary.by_file.most_common(1)[0]
+    share = tokens / summary.total_tokens if summary.total_tokens else 0.0
+    return {
+        "tokens": tokens,
+        "share": round(share, 4),
+        "dominates": share >= 0.20 and tokens >= 1_000,
+    }
+def _first_dynamic_breaker(cache_diagnostics: dict[str, Any]) -> dict[str, Any] | None:
+    breakers = cache_diagnostics.get("dynamic_prefix_breakers") or []
+    if not breakers:
+        return None
+    first = breakers[0]
+    return first if isinstance(first, dict) else None
+def build_cache_layout_advice(summary: UsageSummary) -> dict[str, Any]:
+    if summary.cache_layout_advice_cache is not None:
+        return summary.cache_layout_advice_cache
+    cache_friendliness = cache_friendliness_for_summary(summary)
+    cache_diagnostics = cache_diagnostics_for_summary(summary)
+    signals = cache_friendliness.get("signals") if isinstance(cache_friendliness.get("signals"), dict) else {}
+    dynamic_breaker = _first_dynamic_breaker(cache_diagnostics)
+    dominant = _dominant_transcript(summary)
+    cache_creation = summary.tokens.get("cache_creation", 0)
+    cache_read = summary.tokens.get("cache_read", 0)
+    cache_fields = cache_diagnostics.get("observations", {}).get("cache_fields", {}) if isinstance(cache_diagnostics.get("observations"), dict) else {}
+    cache_status = cache_fields.get("status") if isinstance(cache_fields, dict) else None
+    stable_prefix_share = signals.get("stable_prefix_share")
+    volatile_prefix_share = signals.get("volatile_prefix_share")
+    volatile_tail_share = signals.get("volatile_tail_share")
+    max_prefix_position = dynamic_breaker.get("position") if dynamic_breaker else None
+    max_prefix_position_volatile_share = dynamic_breaker.get("volatile_share") if dynamic_breaker else signals.get("max_prefix_position_volatile_share")
+    status = "missing"
+    confidence = "unavailable"
+    observed_issue = "unknown"
+    priority = "P2"
+    hypothesized_causes: list[dict[str, Any]] = []
+    corroborated_causes: list[dict[str, Any]] = []
+    next_checks: list[dict[str, Any]] = []
+    recommended_experiments: list[dict[str, Any]] = []
+    has_cache_any = bool(
+        summary.token_field_presence.get("cache_read", 0)
+        or summary.token_field_presence.get("cache_creation", 0)
+    )
+    has_prompt_samples = bool(summary.prompt_cache_audit.samples)
+    if has_cache_any or has_prompt_samples:
+        status = "partial" if (
+            not has_prompt_samples
+            or cache_friendliness.get("status") == "partial"
+            or cache_diagnostics.get("status") == "partial"
+            or summary.skipped_files
+            or summary.skipped_records
+            or summary.parse_errors
+        ) else "available"
+        confidence = "partial" if status == "partial" else "hypothesis"
+    volatile_prefix_breaker = bool(
+        dynamic_breaker
+        and cache_creation > 0
+        and (max_prefix_position in {0, 1} or (max_prefix_position_volatile_share or 0) >= PROMPT_PREFIX_VOLATILE_THRESHOLD)
+    )
+    long_session_dominates = bool(dominant and dominant.get("dominates"))
+    if volatile_prefix_breaker:
+        observed_issue = "volatile_prefix_breaker"
+        priority = "P0" if cache_creation >= 50_000 and max_prefix_position in {0, 1} else "P1"
+        hypothesized_causes.append({
+            "id": "prefix-position-churn",
+            "confidence": confidence,
+            "evidence": EVIDENCE_INFERRED,
+            "reason": (
+                "A highly volatile redacted prompt segment appears in the early prefix window; "
+                "this identifies a layout issue, not a confirmed source."
+            ),
+            "next_check": "Check whether startup context, generated evidence, or tool/MCP catalog changes are moving before stable policy.",
+        })
+        if cache_diagnostics.get("stable_prefix_candidates"):
+            hypothesized_causes.append({
+                "id": "evidence-before-policy",
+                "confidence": confidence,
+                "evidence": EVIDENCE_INFERRED,
+                "reason": (
+                    "Stable reusable segments appear elsewhere while the early prefix churns; "
+                    "check whether logs, diffs, timestamps, or file evidence precede stable instructions."
+                ),
+                "next_check": "Keep stable policy/instructions first and move generated run evidence later.",
+            })
+        next_checks.append({
+            "id": "inspect-startup-context-size",
+            "confidence": "hypothesis",
+            "command_templates": [
+                "context-guard-diet scan <repo>",
+                "context-guard-diet structural-waste <repo>",
+            ],
+            "evidence_required_for_corroboration": (
+                "Large or duplicate CLAUDE.md/AGENTS.md/GEMINI.md findings from diet output."
+            ),
+        })
+    elif long_session_dominates:
+        observed_issue = "long_session_accumulation"
+        priority = "P1"
+    elif cache_creation >= 10_000 and cache_read > 0 and summary.cache_amortization < 0.5:
+        observed_issue = "low_cache_reuse"
+        priority = "P1"
+    elif cache_status == "missing" or not has_cache_any:
+        observed_issue = "missing_cache_fields"
+        priority = "P2"
+    if long_session_dominates:
+        recommended_experiments.append({
+            "id": "split-long-sessions",
+            "order": len(recommended_experiments) + 1,
+            "priority": "P1",
+            "effort": "low",
+            "action": "Use /clear between unrelated tasks and /compact focus on changed files, failing tests, and remaining TODO during long work.",
+            "expected_signal": "Cache creation per comparable task decreases and one transcript no longer dominates observed tokens.",
+            "verification": "Re-run context-guard-audit on a comparable window and compare cache_creation, cache_amortization, and top transcript share.",
+            "evidence": dominant or {},
+        })
+    if volatile_prefix_breaker:
+        recommended_experiments.append({
+            "id": "stabilize-cache-prefix",
+            "order": len(recommended_experiments) + 1,
+            "priority": priority,
+            "effort": "medium",
+            "action": "Keep stable reusable instructions/policy before volatile logs, diffs, timestamps, and generated file evidence.",
+            "expected_signal": "Stable prefix share rises and volatile prefix share falls on matched audit windows.",
+            "verification": "Re-run context-guard-audit --json --recommend and compare cache_layout_advice plus cache_friendliness signals.",
+            "evidence": {
+                "dynamic_prefix_breaker_position": max_prefix_position,
+                "dynamic_prefix_breaker_volatile_share": max_prefix_position_volatile_share,
+            },
+        })
+        recommended_experiments.append({
+            "id": "run-context-diet-checks",
+            "order": len(recommended_experiments) + 1,
+            "priority": "P1",
+            "effort": "low",
+            "action": "Run the generated diet command templates and treat any large/duplicate context-file findings as corroborating evidence before editing instructions.",
+            "expected_signal": "Diet output identifies or rules out oversized/duplicated startup context as a contributor.",
+            "verification": "Record diet JSON separately; do not convert prefix-position evidence alone into a confirmed startup-context cause.",
+            "command_templates": [
+                "context-guard-diet scan <repo> --json > diet.json",
+                "context-guard-diet structural-waste <repo> --json > structural-waste.json",
+            ],
+        })
+    if cache_creation >= 50_000 and summary.cache_amortization_defined and 1.0 <= summary.cache_amortization < 5.0:
+        recommended_experiments.append({
+            "id": "defer-longer-ttl-until-prefix-stable" if volatile_prefix_breaker else "evaluate-longer-ttl-after-stability-check",
+            "order": len(recommended_experiments) + 1,
+            "priority": "P2",
+            "effort": "medium",
+            "action": "Treat longer TTL as secondary; first corroborate stable prefix reuse and current provider TTL/pricing behavior.",
+            "expected_signal": "TTL evaluation happens only after prefix volatility is reduced or ruled out.",
+            "verification": "Use timestamped cache telemetry and provider-measured billing/cost evidence; historical token totals alone are insufficient.",
+        })
+    if not recommended_experiments and status == "partial":
+        next_checks.append({
+            "id": "rerun-narrower-audit",
+            "confidence": "partial",
+            "command_templates": ["context-guard-audit <transcript-or-project-dir> --json --recommend"],
+            "evidence_required_for_corroboration": "Enough uncapped prompt/cache records to classify prefix layout.",
+        })
+    if not recommended_experiments and observed_issue == "missing_cache_fields":
+        next_checks.append({
+            "id": "collect-cache-telemetry",
+            "confidence": "unavailable",
+            "command_templates": ["context-guard-audit ~/.claude/projects --json --recommend"],
+            "evidence_required_for_corroboration": "Transcript records with cache_read/cache_creation fields.",
+        })
+    advice = {
+        "schema_version": CACHE_LAYOUT_ADVICE_SCHEMA_VERSION,
+        "status": status,
+        "confidence": confidence,
+        "heuristic": True,
+        "observed_issue": observed_issue,
+        "priority": priority,
+        "observed_summary": {
+            "cache_creation_tokens": cache_creation,
+            "cache_read_tokens": cache_read,
+            "cache_amortization": round(summary.cache_amortization, 4) if summary.cache_amortization_defined else None,
+            "stable_prefix_share": stable_prefix_share,
+            "volatile_prefix_share": volatile_prefix_share,
+            "volatile_tail_share": volatile_tail_share,
+            "max_prefix_position": max_prefix_position,
+            "max_prefix_position_volatile_share": max_prefix_position_volatile_share,
+            "dominant_transcript_share": dominant.get("share") if dominant else None,
+        },
+        "hypothesized_causes": hypothesized_causes,
+        "corroborated_causes": corroborated_causes,
+        "next_checks": next_checks,
+        "recommended_experiments": recommended_experiments,
+        "caveats": [
+            "Cache layout advice is a local transcript heuristic, not billing authority or provider-cache proof.",
+            "Observed issues come from cache fields and redacted segment statistics; causes remain hypotheses until corroborated by diet/structural evidence.",
+            "Generated command templates use placeholders and must not be treated as observed user commands or paths.",
+            "Use matched before/after audits before making token or cost savings claims.",
+        ],
+    }
+    summary.cache_layout_advice_cache = advice
+    return advice
+def cache_layout_advice_for_summary(summary: UsageSummary) -> dict[str, Any]:
+    return build_cache_layout_advice(summary)
 def build_metric_caveats(summary: UsageSummary) -> list[str]:
     caveats = [
         "Values are observed from local Claude Code transcript JSON/JSONL fields and are not official billing records.",
@@ -1177,6 +1650,8 @@ def feasibility_json(
     stable_tokens = stable_token_counter(summary.tokens)
     stable_total_tokens = sum(stable_tokens.values())
     cache_friendliness = cache_friendliness_for_summary(summary)
+    cache_diagnostics = cache_diagnostics_for_summary(summary)
+    cache_layout_advice = cache_layout_advice_for_summary(summary)
     return {
         "schema_version": FEASIBILITY_SCHEMA_VERSION,
         "producer": FEASIBILITY_PRODUCER,
@@ -1195,6 +1670,8 @@ def feasibility_json(
                 "context_availability",
                 "headroom_availability",
                 "cache_friendliness",
+                "cache_diagnostics",
+                "cache_layout_advice",
                 "totals",
             ],
             "diagnostic_fields": ["summary"],
@@ -1222,6 +1699,8 @@ def feasibility_json(
         "context_availability": availability["context"],
         "headroom_availability": availability["headroom"],
         "cache_friendliness": cache_friendliness,
+        "cache_diagnostics": cache_diagnostics,
+        "cache_layout_advice": cache_layout_advice,
         "totals": {
             "total_tokens": stable_total_tokens,
             "tokens": stable_tokens,
@@ -1272,6 +1751,37 @@ def build_recommendations(summary: UsageSummary, top: int) -> list[dict[str, Any
     output_ratio = output_tokens / total
     input_ratio = input_tokens / total
     cache_friendliness = cache_friendliness_for_summary(summary)
+    cache_diagnostics = cache_diagnostics_for_summary(summary)
+    cache_layout_advice = cache_layout_advice_for_summary(summary)
+    if cache_layout_advice.get("observed_issue") == "volatile_prefix_breaker":
+        evidence = {
+            "observed_issue": cache_layout_advice.get("observed_issue"),
+            "priority": cache_layout_advice.get("priority"),
+            "confidence": cache_layout_advice.get("confidence"),
+            "cache_creation_tokens": cache_creation,
+            "cache_read_tokens": cache_read,
+        }
+        observed_summary = cache_layout_advice.get("observed_summary")
+        if isinstance(observed_summary, dict):
+            for key in ("max_prefix_position", "max_prefix_position_volatile_share", "stable_prefix_share", "volatile_prefix_share"):
+                evidence[key] = observed_summary.get(key)
+        rec = recommendation(
+            "prioritize-cache-prefix-stabilization",
+            "Prioritize cache-prefix stabilization before TTL or output trimming",
+            (
+                "Cache creation remains material and redacted segment statistics show a volatile early prefix; "
+                "this is an experiment-prioritization signal, not a confirmed root cause."
+            ),
+            (
+                "If one transcript dominates, split unrelated work into shorter sessions; then check startup/context "
+                "size and keep stable policy before volatile logs, diffs, timestamps, and generated evidence."
+            ),
+            str(cache_layout_advice.get("priority") or "P1"),
+            evidence,
+        )
+        rec["heuristic"] = True
+        rec["confidence"] = cache_layout_advice.get("confidence")
+        recs.append(rec)
     for finding in cache_friendliness.get("findings", []):
         if isinstance(finding, dict) and finding.get("id") == "volatile-content-near-prefix":
             evidence = dict(finding.get("evidence") or {})
@@ -1331,25 +1841,57 @@ def build_recommendations(summary: UsageSummary, top: int) -> list[dict[str, Any
             },
         ))
     if cache_creation >= 50_000 and 1.0 <= summary.cache_amortization < 5.0:
+        ttl = cache_diagnostics.get("ttl_diagnostics") or {}
+        ttl_status = str(ttl.get("status") or "unavailable")
+        ttl_confidence = str(ttl.get("confidence") or "unavailable")
+        ttl_candidate = ttl.get("candidate")
+        ttl_span = ttl.get("timestamped_cache_record_span_seconds")
+        if ttl_status == "hypothesis" and ttl_candidate in {"between-5m-and-1h", "beyond-1h"}:
+            ttl_reason = (
+                f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
+                f"{cache_creation} write tokens; timestamped cache telemetry spans {ttl_span} seconds "
+                f"({ttl_candidate})."
+            )
+            ttl_action = (
+                "Evaluate a longer provider prompt-cache TTL only after confirming the same stable prefix "
+                "pattern in representative sessions and rechecking current provider TTL/pricing documentation."
+            )
+        elif ttl_status == "hypothesis":
+            ttl_reason = (
+                f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
+                f"{cache_creation} write tokens, but timestamped cache telemetry currently points to {ttl_candidate}."
+            )
+            ttl_action = (
+                "Keep collecting timestamped cache read/write evidence; do not enable a longer TTL solely from this scan."
+            )
+        else:
+            ttl_reason = (
+                f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
+                f"{cache_creation} write tokens, but TTL diagnostics are {ttl_status} because this scan lacks "
+                "at least two timestamped cache telemetry records."
+            )
+            ttl_action = (
+                "Collect or inspect timestamped cache read/write evidence before evaluating a longer provider "
+                "prompt-cache TTL; historical token totals alone are not TTL evidence."
+            )
         recs.append(recommendation(
             "evaluate-1h-ttl-cache",
-            "Cache writes are large; evaluate the 1h TTL cache beta",
-            (
-                f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
-                f"{cache_creation} write tokens; absolute write cost is high and reuse is moderate. "
-                "This metric does not inspect timestamps, so confirm reuse spans >5min in a sample "
-                "session before enabling 1h TTL."
-            ),
-            (
-                "If sessions reuse the same prefix beyond the 5-minute default TTL, evaluate the 1h prompt cache "
-                "beta (write 2x, read 0.1x). It pays off when reuse spans the gap between two 5-min cache writes."
-            ),
+            "Cache writes are large; validate TTL evidence before longer TTL",
+            ttl_reason,
+            ttl_action,
             "P2",
             {
                 "cache_creation": cache_creation,
                 "cache_read": cache_read,
                 "cache_amortization": round(summary.cache_amortization, 4),
                 "cache_hit_rate": round(summary.cache_hit_rate, 4),
+                "ttl_status": ttl_status,
+                "ttl_evidence": ttl.get("evidence") or EVIDENCE_UNAVAILABLE,
+                "ttl_confidence": ttl_confidence,
+                "ttl_candidate": ttl_candidate,
+                "timestamped_cache_record_count": ttl.get("timestamped_cache_record_count"),
+                "positive_timestamped_cache_record_count": ttl.get("positive_timestamped_cache_record_count"),
+                "timestamped_cache_record_span_seconds": ttl_span,
                 "heuristic": True,
             },
         ))
@@ -1462,6 +2004,8 @@ def summary_json(
         "top_commands": counter_json(summary.by_command, top),
         "top_tools": counter_json(summary.by_tool, top),
         "cache_friendliness": cache_friendliness_for_summary(summary),
+        "cache_diagnostics": cache_diagnostics_for_summary(summary),
+        "cache_layout_advice": cache_layout_advice_for_summary(summary),
     }
     if include_recommendations:
         data["recommendations"] = build_recommendations(summary, top)
@@ -1574,6 +2118,47 @@ def main() -> int:
             if isinstance(finding, dict):
                 print(f"  finding                 [{finding.get('severity')}] {finding.get('id')}: {finding.get('title')}")
+    cache_diagnostics = cache_diagnostics_for_summary(summary)
+    print("\nCache diagnostics")
+    print(f"  status                  {cache_diagnostics.get('status')}")
+    print(f"  confidence              {cache_diagnostics.get('confidence')}")
+    hypotheses = cache_diagnostics.get("cache_miss_hypotheses") or []
+    if hypotheses:
+        first = hypotheses[0]
+        print(f"  top_hypothesis          {first.get('id')} ({first.get('confidence')})")
+    stable_candidates = cache_diagnostics.get("stable_prefix_candidates") or []
+    if stable_candidates:
+        first = stable_candidates[0]
+        print(f"  stable_prefix_candidate position={first.get('position')} stability={first.get('stability')}")
+    breakers = cache_diagnostics.get("dynamic_prefix_breakers") or []
+    if breakers:
+        first = breakers[0]
+        print(f"  dynamic_prefix_breaker  position={first.get('position')} volatile_share={first.get('volatile_share')}")
+    ttl = cache_diagnostics.get("ttl_diagnostics") or {}
+    print(f"  ttl_status              {ttl.get('status')} ({ttl.get('confidence')})")
+    headroom = cache_diagnostics.get("headroom_diagnostics") or {}
+    print(f"  headroom_status         {headroom.get('status')} ({headroom.get('evidence')})")
+    cache_layout_advice = cache_layout_advice_for_summary(summary)
+    if cache_layout_advice.get("status") != "missing" or cache_layout_advice.get("observed_issue") != "unknown":
+        print("\nCache layout advice")
+        print(f"  status                  {cache_layout_advice.get('status')}")
+        print(f"  confidence              {cache_layout_advice.get('confidence')}")
+        print(f"  observed_issue          {cache_layout_advice.get('observed_issue')}")
+        print(f"  priority                {cache_layout_advice.get('priority')}")
+        experiments = cache_layout_advice.get("recommended_experiments") or []
+        if experiments:
+            first = experiments[0]
+            print(f"  first_experiment        {first.get('id')} ({first.get('priority')})")
+            print(f"  experiment_action       {first.get('action')}")
+        checks = cache_layout_advice.get("next_checks") or []
+        if checks:
+            first = checks[0]
+            print(f"  next_check              {first.get('id')}")
+            templates = first.get("command_templates") or []
+            if templates:
+                print(f"  command_template        {templates[0]}")
     model_totals = Counter({model: sum(tokens.values()) for model, tokens in summary.by_model.items()})
     print_counter("By model", model_totals, args.top)