PyPI - cli-agent-runner - Versions diffs - 0.1.26__tar.gz → 0.1.28__tar.gz - Mend

cli-agent-runner 0.1.26tar.gz → 0.1.28tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (198) hide show

{cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/CHANGELOG.md RENAMED Viewed

@@ -7,6 +7,36 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [0.1.28] - 2026-05-17
+### Added
+- `agent_usage_recorded` event: new fields `cache_creation_tokens` (claude only, 0 for gemini),
+  `tool_call_count`, `phase`, `success`. Enables full cost reconciliation and phase/status
+  segmentation by consumers.
+### Changed
+- gemini `models_breakdown` per-model entries no longer include raw `input` / `cached` keys;
+  canonical `input_tokens` / `cached_tokens` only. Consumers reading raw keys must migrate.
+### Removed
+- `agent_runner.api_types.ThrottleState` dead alias (0.1.23 back-compat; deprecation window
+  passed; switch to `TransientErrorState`).
+See `docs/migrations/0.1.28.md`.
+## [0.1.27] - 2026-05-17
+### Fixed
+- claude plugin: rate_limit_event with rateLimitType=null no longer misclassified as account
+  5h quota; falls through to api_error_status-based bucket (e.g. infra 429 → rate_limit_model).
+  Affects supervisors consuming transient_error_detected.
+### Added
+- docs/migrations/0.1.27.md: supervisor usage guide for transient_error_detected event
+  (4-bucket dispatch table + back-off recipe).
+See `docs/migrations/0.1.27.md`.
 ## [0.1.26] - 2026-05-17
 - Fix claude `agent_usage_recorded` `model` field (was always

{cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cli-agent-runner
-Version: 0.1.26
+Version: 0.1.28
 Summary: Restart-on-exit supervisor for autonomous CLI agents
 Project-URL: Homepage, https://github.com/wan9yu/cli-agent-runner
 Project-URL: Documentation, https://github.com/wan9yu/cli-agent-runner#readme

{cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/_emit.py RENAMED Viewed

@@ -216,6 +216,10 @@ def emit_agent_usage_recorded(
     cost_usd: float | None,
     duration_ms: int,
     models_breakdown: dict[str, dict[str, int]] | None = None,
+    cache_creation_tokens: int = 0,
+    tool_call_count: int = 0,
+    phase: str = "",
+    success: bool = True,
 ) -> None:
     """Emit per-round usage record from a CLI plugin.
@@ -229,6 +233,14 @@ def emit_agent_usage_recorded(
       (gemini has no cost field; claude exposes total_cost_usd).
     - ``models_breakdown``: only populated when a round used multiple models
       (gemini multi-model rounds). None for claude (always single-model).
+    - ``cache_creation_tokens``: claude only — ``usage.cache_creation_input_tokens``,
+      independent count from ``cached_tokens`` (cache_read). Billed at ~25% premium
+      over fresh input per Anthropic pricing. Gemini has no equivalent → 0.
+    - ``tool_call_count``: number of tool invocations the agent made in the round.
+      Claude: count of ``tool_use`` content blocks across all assistant events.
+      Gemini: ``stats.tool_calls``.
+    - ``phase``: phase label from HookContext (e.g. "planning"); empty string when None.
+    - ``success``: True when exit_code == 0 and not timed_out.
     """
     from agent_runner.events import AGENT_USAGE_RECORDED, emit
@@ -244,6 +256,10 @@ def emit_agent_usage_recorded(
         cost_usd=cost_usd,
         duration_ms=duration_ms,
         models_breakdown=models_breakdown,
+        cache_creation_tokens=cache_creation_tokens,
+        tool_call_count=tool_call_count,
+        phase=phase,
+        success=success,
     )

{cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/_version.py RENAMED Viewed

@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
 commit_id: str | None
 __commit_id__: str | None
-__version__ = version = '0.1.26'
-__version_tuple__ = version_tuple = (0, 1, 26)
+__version__ = version = '0.1.28'
+__version_tuple__ = version_tuple = (0, 1, 28)
 __commit_id__ = commit_id = None

{cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/api_types.py RENAMED Viewed

@@ -139,10 +139,6 @@ class TransientErrorState:
     since_round: int
-# 0.1.23 back-compat alias; drop in 0.1.24
-ThrottleState = TransientErrorState
 @dataclass(frozen=True)
 class RoundResult:
     """Result of one ``run_one_round`` call.

{cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/builtin_plugins/claude_rate_limit.py RENAMED Viewed

@@ -64,7 +64,13 @@ class ClaudeErrorDetector:
                 )
         if parsed.get("usage"):
-            emit_agent_usage_recorded(ctx.log_dir, round_num=ctx.round_num, **parsed["usage"])
+            emit_agent_usage_recorded(
+                ctx.log_dir,
+                round_num=ctx.round_num,
+                phase=ctx.phase or "",
+                success=(result.exit_code == 0 and not result.timed_out),
+                **parsed["usage"],
+            )
 def _parse_claude_log(log_path: Path) -> dict[str, Any]:
@@ -77,6 +83,7 @@ def _parse_claude_log(log_path: Path) -> dict[str, Any]:
     rate_limit_info: dict | None = None
     result_event: dict | None = None
     assistant_model: str | None = None
+    tool_call_count = 0
     for line in tail:
         line = line.strip()
         if not line:
@@ -97,6 +104,11 @@ def _parse_claude_log(log_path: Path) -> dict[str, Any]:
             model_val = msg.get("model") if isinstance(msg, dict) else None
             if model_val:
                 assistant_model = str(model_val)
+            content = msg.get("content", []) if isinstance(msg, dict) else []
+            if isinstance(content, list):
+                tool_call_count += sum(
+                    1 for c in content if isinstance(c, dict) and c.get("type") == "tool_use"
+                )
     out: dict[str, Any] = {}
@@ -105,7 +117,9 @@ def _parse_claude_log(log_path: Path) -> dict[str, Any]:
         out["transient_error"] = error_payload
     if result_event is not None:
-        usage_payload = _extract_usage(result_event, model=assistant_model)
+        usage_payload = _extract_usage(
+            result_event, model=assistant_model, tool_call_count=tool_call_count
+        )
         if usage_payload is not None:
             out["usage"] = usage_payload
@@ -118,13 +132,15 @@ def _classify_transient_error(
     """Refactored from prior _scan_log_for_transient_error 0.1.23 logic; same shape, same
     priority (rate_limit_event.rejected > 429 > 5xx > 408).
     """
-    if rate_limit_info is not None:
+    if rate_limit_info is not None and rate_limit_info.get("rateLimitType") == "five_hour":
         return {
             "classification": "rate_limit_account",
             "agent": "claude",
             "reset_at_epoch": int(rate_limit_info.get("resetsAt", time.time() + 300)),
             "raw": str((result_event or {}).get("result", ""))[:_RAW_CAP],
         }
+    # rate_limit_event with null/other rateLimitType falls through to status-based
+    # classification below.
     if result_event is None or result_event.get("is_error") is not True:
         return None
     status = result_event.get("api_error_status")
@@ -138,7 +154,7 @@ def _classify_transient_error(
     return None
-def _extract_usage(result_event: dict, *, model: str | None) -> dict | None:
+def _extract_usage(result_event: dict, *, model: str | None, tool_call_count: int) -> dict | None:
     """Extract usage payload from claude result event.
     Returns None if no usage field present.
@@ -149,8 +165,8 @@ def _extract_usage(result_event: dict, *, model: str | None) -> dict | None:
       (they're independent counts). Earlier 0.1.24 simplify pass incorrectly
       subtracted cached from input; 0.1.26 reverts to the correct direct read.
     - ``cached_tokens`` is cache reads only (``cache_read_input_tokens``).
-      Cache-creation is omitted from the unified schema; can be added in 0.1.27+
-      if aggregation needs distinguishing.
+    - ``cache_creation_tokens`` is ``cache_creation_input_tokens`` (write cost,
+      billed at ~25% premium over fresh input per Anthropic pricing).
     - ``models_breakdown`` always None for claude (single-model per round);
       only populated by gemini multi-model rounds.
     - ``model`` from caller — ``_parse_claude_log`` tracks the latest
@@ -166,9 +182,11 @@ def _extract_usage(result_event: dict, *, model: str | None) -> dict | None:
         "input_tokens": int(usage.get("input_tokens", 0)),
         "output_tokens": int(usage.get("output_tokens", 0)),
         "cached_tokens": int(usage.get("cache_read_input_tokens", 0)),
+        "cache_creation_tokens": int(usage.get("cache_creation_input_tokens", 0)),
         "cost_usd": result_event.get("total_cost_usd"),
         "duration_ms": int(result_event.get("duration_ms", 0)),
         "models_breakdown": None,
+        "tool_call_count": tool_call_count,
     }

{cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/builtin_plugins/gemini.py RENAMED Viewed

@@ -42,7 +42,13 @@ class GeminiErrorDetector:
             te = parsed["transient_error"]
             emit_transient_error_detected(ctx.log_dir, round_num=ctx.round_num, **te)
         if parsed.get("usage"):
-            emit_agent_usage_recorded(ctx.log_dir, round_num=ctx.round_num, **parsed["usage"])
+            emit_agent_usage_recorded(
+                ctx.log_dir,
+                round_num=ctx.round_num,
+                phase=ctx.phase or "",
+                success=(result.exit_code == 0 and not result.timed_out),
+                **parsed["usage"],
+            )
 def _parse_gemini_log(log_path: Path) -> dict[str, Any]:
@@ -104,15 +110,30 @@ def _extract_usage(stats: dict[str, Any]) -> dict[str, Any]:
     primary_model = (
         max(models, key=lambda m: models[m].get("total_tokens", 0)) if models else "unknown"
     )
+    breakdown = (
+        {
+            name: {
+                "total_tokens": int(m.get("total_tokens", 0)),
+                "input_tokens": int(m.get("input_tokens", m.get("input", 0))),
+                "output_tokens": int(m.get("output_tokens", 0)),
+                "cached_tokens": int(m.get("cached", 0)),
+            }
+            for name, m in models.items()
+        }
+        if len(models) > 1
+        else None
+    )
     return {
         "agent": "gemini",
         "model": primary_model,
         "input_tokens": int(stats.get("input", 0)),
         "output_tokens": int(stats.get("output_tokens", 0)),
         "cached_tokens": int(stats.get("cached", 0)),
+        "cache_creation_tokens": 0,  # gemini has no cache-creation concept
         "cost_usd": None,  # gemini doesn't expose USD
         "duration_ms": int(stats.get("duration_ms", 0)),
-        "models_breakdown": models if len(models) > 1 else None,
+        "models_breakdown": breakdown,
+        "tool_call_count": int(stats.get("tool_calls", 0)),
     }

cli_agent_runner-0.1.28/docs/migrations/0.1.27.md ADDED Viewed

@@ -0,0 +1,169 @@
+# 0.1.27 — Rate-limit classifier fix + supervisor usage guide
+## What changed
+The claude built-in plugin (`agent_runner.builtin_plugins.claude_rate_limit`) previously
+misclassified any `rate_limit_event` with `status="rejected"` as `rate_limit_account`
+(account-level 5-hour quota exhaustion), regardless of `rateLimitType`. As of 0.1.27, the
+`rate_limit_account` branch requires `rateLimitType == "five_hour"`; other `rate_limit_event`
+values (e.g. `rateLimitType: null` for claude.ai infrastructure throttling) fall through to
+status-code-based classification.
+Concretely: a claude.ai 429 with `rateLimitType: null` is now correctly emitted as
+`transient_error_detected` with `classification: "rate_limit_model"` and a 60-second default
+`reset_at_epoch`, instead of `rate_limit_account` with a synthetic 5-minute fallback epoch.
+No event schema changes. No new event kinds. No new public API.
+## Supervisor usage — consuming `transient_error_detected`
+External supervisors should subscribe to the `transient_error_detected` event family (added in
+0.1.23). The event carries a `classification` discriminator with one of 4 values:
+| classification     | Trigger                                              | reset_at_epoch semantics            | Suggested supervisor action               |
+|--------------------|------------------------------------------------------|-------------------------------------|-------------------------------------------|
+| rate_limit_account | rate_limit_event.rateLimitType == "five_hour"        | Server-provided `resetsAt` (exact)  | Sleep until reset_at_epoch (multi-hour)   |
+| rate_limit_model   | api_error_status == 429 (and not five_hour)          | now + 60s default (no server hint)  | Sleep until reset_at_epoch or exp-backoff |
+| api_transient_5xx  | api_error_status in {500, 502, 503, 504}             | now + 60s default                   | Sleep until reset_at_epoch or exp-backoff |
+| api_timeout        | api_error_status == 408                              | now + 30s default                   | Sleep until reset_at_epoch or exp-backoff |
+### Event payload shape
+Each line in `events-YYYY-MM.jsonl` looks like:
+```json
+{
+  "ts": "2026-05-17T02:13:44.123Z",
+  "event": "transient_error_detected",
+  "classification": "rate_limit_model",
+  "agent": "claude",
+  "reset_at_epoch": 1747450424,
+  "round_num": 7,
+  "raw": "API Error: Server is temporarily limiting requests (not your usage limit) · Rate limited"
+}
+```
+Fields:
+- `ts` — ISO 8601 UTC timestamp of event emission.
+- `event` — always `"transient_error_detected"`.
+- `classification` — one of the 4 buckets above.
+- `agent` — `"claude"` (gemini uses same schema via its own plugin).
+- `reset_at_epoch` — Unix epoch seconds; supervisor sleeps until this time. For
+  `rate_limit_account` this is the server-provided exact unblock time; for all other buckets
+  it is `now + default_seconds` at the moment of emission.
+- `round_num` — which agent round triggered the error.
+- `raw` — first 200 chars of the result text (useful for logging/alerting).
+### Dispatch recipe (Python)
+```python
+import time
+def handle_transient_error(event: dict) -> None:
+    """React to a transient_error_detected event from agent-runner."""
+    bucket = event["classification"]
+    reset_at = event["reset_at_epoch"]
+    now = time.time()
+    wait_s = max(reset_at - now, 0)
+    if bucket == "rate_limit_account":
+        # Server-provided exact unblock time; respect it (multi-hour wait typical).
+        time.sleep(wait_s)
+    elif bucket == "rate_limit_model":
+        # Infra-level 429; 60s default. Apply your own exp-backoff curve if desired.
+        time.sleep(wait_s)  # or: time.sleep(exp_backoff_with_cap(attempts, cap=300))
+    elif bucket == "api_transient_5xx":
+        # Transient server error; 60s default.
+        time.sleep(wait_s)
+    elif bucket == "api_timeout":
+        # Request timed out; 30s default.
+        time.sleep(wait_s)
+    else:
+        # Unknown future bucket — safe fallback.
+        time.sleep(max(wait_s, 30))
+```
+### Default back-off vs. your own curve
+Agent-runner's defaults (`rate_limit_model` and `api_transient_5xx` → 60s, `api_timeout` → 30s)
+are a conservative baseline — a flat one-shot sleep. Supervisors that track consecutive failures
+may apply an exponential curve with a cap (e.g. 30s → 60s → 120s → 300s max) for
+`rate_limit_model` and `api_transient_5xx`. For `rate_limit_account`, always respect
+`reset_at_epoch` verbatim — the server provides the exact unblock time.
+## Migration from legacy `rate_limit_rejected` event
+Consumers that still listen to `rate_limit_rejected` (added in 0.1.20) continue to receive it
+for `rate_limit_account` events only. It is emitted as a back-compat dual-emit alongside
+`transient_error_detected`. New consumers should subscribe to `transient_error_detected` for
+full 4-bucket coverage; `rate_limit_rejected` only fires for the `five_hour` bucket and carries
+no `classification` field.
+## Verification
+Write the incident JSONL into a temporary file and call `_parse_claude_log` directly:
+```bash
+mkdir -p /tmp/verify-0.1.27
+cat > /tmp/verify-0.1.27/round-1.log <<'EOF'
+{"type":"rate_limit_event","rate_limit_info":{"status":"rejected","rateLimitType":null}}
+{"type":"assistant","message":{"model":"claude-opus-4-7","content":[{"type":"text","text":"API Error: rate limited"}]}}
+{"type":"result","is_error":true,"api_error_status":429,"result":"API Error: rate limited","usage":{"input_tokens":100,"output_tokens":10,"cache_read_input_tokens":0},"duration_ms":1000,"total_cost_usd":0.01}
+EOF
+.venv/bin/python -c "
+from pathlib import Path
+from agent_runner.builtin_plugins.claude_rate_limit import _parse_claude_log
+import json
+print(json.dumps(_parse_claude_log(Path('/tmp/verify-0.1.27/round-1.log')), indent=2, default=str))
+"
+```
+Expected output:
+```json
+{
+  "transient_error": {
+    "classification": "rate_limit_model",
+    "agent": "claude",
+    "reset_at_epoch": 1747450484,
+    "raw": "API Error: rate limited"
+  },
+  "usage": {
+    "agent": "claude",
+    "model": "claude-opus-4-7",
+    "input_tokens": 100,
+    "output_tokens": 10,
+    "cached_tokens": 10,
+    "cost_usd": 0.01,
+    "duration_ms": 1000
+  }
+}
+```
+`classification` must be `"rate_limit_model"` and `reset_at_epoch` must be approximately
+`now + 60`. Clean up with `rm -rf /tmp/verify-0.1.27`.
+## Impact summary
+Supervisors that dispatch semantically on `classification == "rate_limit_account"` (e.g. "this
+is the 5-hour quota — wait until reset") will no longer trigger that path for infrastructure
+429s with `rateLimitType: null`. Instead, a `rate_limit_model` event fires with a 60-second
+`reset_at_epoch`.
+Supervisors that dispatch only on `reset_at_epoch` (ignoring classification) will see a shorter
+wait (60s instead of ~300s) for infra 429s — a net improvement.
+Supervisors subscribed to the legacy `rate_limit_rejected` event are unaffected: that event
+only fires for genuine `rate_limit_account` (five_hour) events, which continue to work as
+before.
+## What did NOT change
+- `transient_error_detected` event schema — field names, field types unchanged.
+- `_BACK_OFF_DEFAULTS` table — unchanged.
+- Legacy `rate_limit_rejected` back-compat emission for `rate_limit_account` — unchanged.
+- Other plugins (gemini) — no equivalent `rate_limit_event` semantics; not touched.
+- Public API surface — no new functions, no new event kinds, no signature changes.

cli_agent_runner-0.1.28/docs/migrations/0.1.28.md ADDED Viewed

@@ -0,0 +1,176 @@
+# Migration Guide — 0.1.28
+## What changed
+Three areas changed in 0.1.28. First, `agent_usage_recorded` events now carry four additional
+fields: `cache_creation_tokens`, `tool_call_count`, `phase`, and `success`. These are populated
+by the built-in claude and gemini plugins; third-party plugins inheriting from the same
+`emit_agent_usage_recorded` call get the new fields for free with safe defaults. Second, the
+gemini plugin's `models_breakdown` per-model dict no longer passes through the raw `input` and
+`cached` keys from the gemini JSONL; only canonical `input_tokens` and `cached_tokens` are
+present. Consumers iterating `models_breakdown` entries need a one-line rename. Third, the
+`ThrottleState` back-compat alias (introduced 0.1.23 when the class was renamed to
+`TransientErrorState`) is removed; any import of `ThrottleState` now raises `ImportError`.
+---
+## New fields reference
+| Field | Type | Semantics | claude value | gemini value |
+|---|---|---|---|---|
+| `cache_creation_tokens` | `int` | Tokens written into the prompt cache (billed at ~25 % premium over fresh input per Anthropic pricing). Independent count from `cached_tokens` (reads). | `usage.cache_creation_input_tokens` | `0` (no creation concept) |
+| `tool_call_count` | `int` | Number of tool invocations the agent made in the round. | Count of `tool_use` content blocks across all assistant events in the round JSONL. | `stats.tool_calls` |
+| `phase` | `str` | Phase label from `HookContext.phase`; empty string when the round has no phase. | `ctx.phase or ""` | `ctx.phase or ""` |
+| `success` | `bool` | `True` when `exit_code == 0` and `timed_out` is `False`. | `result.exit_code == 0 and not result.timed_out` | same |
+All four fields have safe defaults in `emit_agent_usage_recorded` (`0`, `0`, `""`, `True`),
+so third-party plugins that call the function without the new kwargs continue to work without
+modification.
+---
+## Updated event payload example
+### Claude variant
+```json
+{
+  "kind": "agent_usage_recorded",
+  "agent": "claude",
+  "model": "claude-opus-4-7",
+  "round_num": 3,
+  "input_tokens": 4200,
+  "output_tokens": 312,
+  "cached_tokens": 18900,
+  "cache_creation_tokens": 12223,
+  "cost_usd": 0.0812,
+  "duration_ms": 14470,
+  "models_breakdown": null,
+  "tool_call_count": 2,
+  "phase": "dev",
+  "success": true
+}
+```
+### Gemini variant
+```json
+{
+  "kind": "agent_usage_recorded",
+  "agent": "gemini",
+  "model": "gemini-3-flash-preview",
+  "round_num": 1,
+  "input_tokens": 4614,
+  "output_tokens": 91,
+  "cached_tokens": 15119,
+  "cache_creation_tokens": 0,
+  "cost_usd": null,
+  "duration_ms": 5337,
+  "models_breakdown": null,
+  "tool_call_count": 1,
+  "phase": "",
+  "success": true
+}
+```
+---
+## Cost reconciliation recipe
+```python
+def total_token_cost(event: dict) -> float | None:
+    """Return estimated USD cost for a round.
+    For claude: use the recorded cost_usd (includes cache read + write billing).
+    For gemini: cost_usd is null; billing requires provider pricing sheet.
+    """
+    if event.get("cost_usd") is not None:
+        return event["cost_usd"]
+    # gemini: no USD field; caller must apply provider pricing
+    # approximate: input_tokens * input_rate + output_tokens * output_rate + cached_tokens * cache_rate
+    return None
+def audit_cache_efficiency(event: dict) -> dict:
+    """Breakdown of cache hit vs creation vs fresh input for a claude round."""
+    return {
+        "fresh_input": event["input_tokens"],
+        "cache_read": event["cached_tokens"],
+        "cache_write": event["cache_creation_tokens"],
+        "total_throughput": (
+            event["input_tokens"] + event["cached_tokens"] + event["cache_creation_tokens"]
+        ),
+    }
+```
+---
+## `models_breakdown` migration
+Pre-0.1.28, gemini multi-model rounds passed raw gemini stat keys directly into each
+`models_breakdown` entry. Those entries contained both `input` (raw) and `input_tokens`
+(canonical) with the same value, and `cached` (raw) alongside `cached_tokens` missing entirely.
+0.1.28 normalises to canonical keys only:
+| Pre-0.1.28 key | Removed? | Canonical replacement |
+|---|---|---|
+| `input` | yes | `input_tokens` |
+| `cached` | yes | `cached_tokens` |
+| `input_tokens` | kept | — |
+| `output_tokens` | kept | — |
+| `total_tokens` | kept | — |
+Migration for any consumer iterating breakdown entries:
+```python
+# Before
+entry["input"]   # raw gemini field
+entry["cached"]  # raw gemini field
+# After
+entry["input_tokens"]   # canonical
+entry["cached_tokens"]  # canonical
+```
+---
+## `ThrottleState` import migration
+```python
+# Before (raises ImportError on 0.1.28+)
+from agent_runner.api_types import ThrottleState
+# After
+from agent_runner.api_types import TransientErrorState
+```
+`ThrottleState` was renamed to `TransientErrorState` in 0.1.23 when the `classification` field
+was added. The alias was retained through 0.1.27. It is now removed.
+---
+## Verification recipe
+Run one round and inspect the emitted event:
+```bash
+cd /path/to/your/project
+agent-runner serve --max-rounds 1
+# Find today's events file
+EVENTS=$(ls logs/events-*.jsonl | tail -1)
+# Print the usage event with pretty JSON
+grep '"kind":"agent_usage_recorded"' "$EVENTS" | python3 -m json.tool
+# Confirm all new fields are present
+grep '"kind":"agent_usage_recorded"' "$EVENTS" | python3 -c "
+import json, sys
+evt = json.loads(sys.stdin.read())
+for field in ['cache_creation_tokens', 'tool_call_count', 'phase', 'success']:
+    assert field in evt, f'missing field: {field}'
+    print(f'{field}: {evt[field]}')
+print('All new fields present.')
+"
+```

{cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/_test_helpers.py RENAMED Viewed

@@ -109,7 +109,13 @@ def read_events_for_current_month(log_dir: Path) -> list[dict]:
     return [json.loads(line) for line in events_path.read_text().splitlines() if line.strip()]
-def make_hook_context(tmp_path: Path, *, agent_name: str = "claude", round_num: int = 1):
+def make_hook_context(
+    tmp_path: Path,
+    *,
+    agent_name: str = "claude",
+    round_num: int = 1,
+    phase: str | None = None,
+):
     """Build a minimal HookContext for plugin testing.
     agent_log_path is populated to match where runner.py writes the
@@ -126,7 +132,7 @@ def make_hook_context(tmp_path: Path, *, agent_name: str = "claude", round_num:
         log_dir=tmp_path,
         project="testproj",
         round_num=round_num,
-        phase=None,
+        phase=phase,
         agent_name=agent_name,
         agent_log_path=agent_log_path,
     )

{cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_types.py RENAMED Viewed

@@ -114,3 +114,12 @@ def test_given_state_default_when_constructed_then_recent_hook_failures_empty()
         service=ServiceStatus(mode=ServiceMode.NONE, active=False),
     )
     assert state.recent_hook_failures == []
+def test_throttle_state_removed() -> None:
+    """ThrottleState alias was deprecated 0.1.23, removed 0.1.28.
+    Consumers should switch to TransientErrorState.
+    """
+    with pytest.raises(ImportError):
+        from agent_runner.api_types import ThrottleState  # noqa: F401

cli-agent-runner 0.1.26__tar.gz → 0.1.28__tar.gz

cli-agent-runner 0.1.26tar.gz → 0.1.28tar.gz