PyPI - cli-agent-runner - Versions diffs - 0.1.32__tar.gz → 0.1.33__tar.gz - Mend

cli-agent-runner 0.1.32tar.gz → 0.1.33tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (218) hide show

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.gitignore RENAMED Viewed

@@ -1,6 +1,9 @@
 # Internal working notes (specs, plans, drafts) — not for public repo.
 docs/internal/
+# Local git hooks — opt-in per clone; CI lint-commits is the authoritative gate.
+.githooks/
 # Python
 __pycache__/
 *.py[cod]

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/CHANGELOG.md RENAMED Viewed

@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [0.1.33] - 2026-05-19
+### Added
+- `_5XX_STATUSES` includes 529 (Anthropic's "overloaded") — now classified as `api_transient_5xx`.
+- Exp backoff for estimated-class transient errors (`rate_limit_model` / `api_transient_5xx` / `api_timeout`): consecutive failures multiply the wait `2^N` capped at 32× and 30 minutes absolute. Server-authoritative `rate_limit_account` unchanged.
+- `transient_error_backoff_capped` event gains `original_reset_at_epoch`, `applied_reset_at_epoch`, `consecutive_count`, `capped_by_absolute_max` fields for backoff-curve observability.
+- `docs/thesis.md` names the server-authoritative vs estimated reset principle.
+See `docs/migrations/0.1.33.md`.
 ## [0.1.32] - 2026-05-18
 ### Added

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/CONTRIBUTING.md RENAMED Viewed

@@ -9,7 +9,6 @@ git clone https://github.com/wan9yu/cli-agent-runner.git
 cd cli-agent-runner
 python3 -m venv .venv && source .venv/bin/activate
 pip install -e ".[dev]"
-git config core.hooksPath .githooks    # enables the commit-msg lint hook
 ./build.sh check
 ```
@@ -17,13 +16,6 @@ git config core.hooksPath .githooks    # enables the commit-msg lint hook
 + integration tests, the literate quickstart, and the docs CI gate. It's
 what GitHub Actions runs on every push and PR.
-`git config core.hooksPath .githooks` activates the in-repo
-[`.githooks/commit-msg`](.githooks/commit-msg) hook which rejects commit
-messages containing `Co-Authored-By:` trailers, robot emojis, or other
-AI-tool attribution patterns. The same check runs in CI (`lint-commits`
-job) and as a pytest invariant (`tests/invariants/test_no_ai_signatures.py`)
-— defense in depth.
 ## Workflow
 1. Open an issue first for non-trivial changes — saves wasted work on both sides.
@@ -33,6 +25,9 @@ job) and as a pytest invariant (`tests/invariants/test_no_ai_signatures.py`)
 5. Run `./build.sh check` locally before pushing.
 6. Conventional Commits: `feat:` / `fix:` / `docs:` / `refactor:` / `test:` /
    `chore:` / `ci:` / `build:` / `perf:`. Subjects in English, imperative mood.
+   CI (`lint-commits` job) and `tests/invariants/test_no_ai_signatures.py`
+   reject auto-generated trailers and robot signatures — keep messages
+   human-authored.
 ## Architecture / docs

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cli-agent-runner
-Version: 0.1.32
+Version: 0.1.33
 Summary: Restart-on-exit supervisor for autonomous CLI agents
 Project-URL: Homepage, https://github.com/wan9yu/cli-agent-runner
 Project-URL: Documentation, https://github.com/wan9yu/cli-agent-runner#readme

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/_emit.py RENAMED Viewed

@@ -281,15 +281,40 @@ def emit_transient_error_backoff_capped(
     agent: str,
     requested_sleep_s: int,
     applied_sleep_s: int,
+    original_reset_at_epoch: int | None = None,
+    applied_reset_at_epoch: int | None = None,
+    consecutive_count: int | None = None,
+    capped_by_absolute_max: bool | None = None,
 ) -> None:
-    """Emit defensive event when computed back-off exceeded 8h cap."""
+    """Emit when supervisor adjusts the plugin-emitted transient back-off.
+    Fires in two cases:
+    1. **Exp backoff applied** (0.1.33+): estimated-class transient errors
+       (`rate_limit_model` / `api_transient_5xx` / `api_timeout`) doubled
+       on consecutive failures. ``consecutive_count`` > 1, multiplier > 1×.
+    2. **Defensive cap hit** (0.1.20+): malformed `reset_at_epoch` or the
+       30-min absolute cap clipped the wait. ``capped_by_absolute_max`` True.
+    Fields ``original_reset_at_epoch`` / ``applied_reset_at_epoch`` /
+    ``consecutive_count`` / ``capped_by_absolute_max`` are 0.1.33+. Older
+    callers that pass only the first 4 kwargs continue to work; the new
+    fields are omitted from the payload when None.
+    """
     from agent_runner.events import TRANSIENT_ERROR_BACKOFF_CAPPED, emit
-    emit(
-        log_dir,
-        TRANSIENT_ERROR_BACKOFF_CAPPED,
-        classification=classification,
-        agent=agent,
-        requested_sleep_s=requested_sleep_s,
-        applied_sleep_s=applied_sleep_s,
-    )
+    kwargs: dict = {
+        "classification": classification,
+        "agent": agent,
+        "requested_sleep_s": requested_sleep_s,
+        "applied_sleep_s": applied_sleep_s,
+    }
+    if original_reset_at_epoch is not None:
+        kwargs["original_reset_at_epoch"] = original_reset_at_epoch
+    if applied_reset_at_epoch is not None:
+        kwargs["applied_reset_at_epoch"] = applied_reset_at_epoch
+    if consecutive_count is not None:
+        kwargs["consecutive_count"] = consecutive_count
+    if capped_by_absolute_max is not None:
+        kwargs["capped_by_absolute_max"] = capped_by_absolute_max
+    emit(log_dir, TRANSIENT_ERROR_BACKOFF_CAPPED, **kwargs)

cli_agent_runner-0.1.33/agent_runner/_throttle.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""Throttle state helpers — read events.jsonl tail for transient error state.
+Internal module. Callers: runner.py (serve loop back-off), api.py (peek).
+Separated from runner.py to satisfy the ouroboros defense: runner.py writes
+events.jsonl but must never read it back (§3 module boundary invariant).
+"""
+from __future__ import annotations
+import json
+import time
+from collections import deque
+from pathlib import Path
+from typing import Any
+from agent_runner.api_types import TransientErrorState
+def _check_throttle_state(log_dir: Path) -> TransientErrorState | None:
+    """Scan events.jsonl tail for latest unmatched transient error.
+    Reads `transient_error_detected` / `transient_error_recovered` event names.
+    Returns TransientErrorState if currently throttled (reset still in future,
+    no matching recovered after). Restart-safe.
+    """
+    candidates = sorted(log_dir.glob("events-*.jsonl"))
+    if not candidates:
+        return None
+    with candidates[-1].open() as f:
+        tail = deque(f, maxlen=100)
+    events: list[dict[str, Any]] = []
+    for line in tail:
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            events.append(json.loads(line))
+        except json.JSONDecodeError:
+            continue
+    latest_detected: dict[str, Any] | None = None
+    for ev in reversed(events):
+        kind = ev.get("event")
+        if kind == "transient_error_recovered":
+            return None
+        if kind == "transient_error_detected":
+            latest_detected = ev
+            break
+    if latest_detected is None:
+        return None
+    reset_at = int(latest_detected.get("reset_at_epoch", 0))
+    if reset_at <= time.time():
+        return None  # Reset already passed without recovery emit; treat as recovered
+    classification = str(latest_detected.get("classification", "rate_limit_account"))
+    return TransientErrorState(
+        reset_at_epoch=reset_at,
+        classification=classification,
+        agent=str(latest_detected.get("agent", "unknown")),
+        since_round=int(latest_detected.get("round_num", 0)),
+    )
+# Module-level supervisor state — bucket → consecutive-failure count.
+# Cleared by reset_counters() or by serve restart.
+_consecutive_failures: dict[str, int] = {}
+def compute_adjusted_reset_at(
+    *,
+    classification: str,
+    original_reset_at_epoch: int,
+    agent: str,
+    log_dir: Path,
+) -> tuple[int, int, bool]:
+    """Apply exp backoff for estimated-class transient errors.
+    Returns (applied_reset_at_epoch, consecutive_count, capped_by_absolute_max).
+    For server-authoritative classification (``rate_limit_account``): returns
+    the original reset epoch verbatim, never increments the counter, and
+    never emits an adjustment event. Anthropic's resetsAt is authoritative.
+    For estimated classifications (``rate_limit_model``, ``api_transient_5xx``,
+    ``api_timeout``): increments the counter for this bucket, computes
+    duration = base × 2^min(n, _EXP_CAP), caps at _ABSOLUTE_CAP_S, emits
+    ``transient_error_backoff_capped`` if multiplier > 1 or capped.
+    """
+    from agent_runner._emit import emit_transient_error_backoff_capped
+    from agent_runner.builtin_plugins._constants import (
+        _ABSOLUTE_CAP_S,
+        _BACK_OFF_DEFAULTS,
+        _EXP_CAP,
+    )
+    if classification == "rate_limit_account":
+        # Server-authoritative: respect resetsAt verbatim, no counter touch.
+        return (original_reset_at_epoch, 0, False)
+    # Estimated class: apply exp backoff.
+    base = _BACK_OFF_DEFAULTS[classification]
+    n = _consecutive_failures.get(classification, 0)
+    multiplier = 2 ** min(n, _EXP_CAP)
+    extended_duration = base * multiplier
+    capped_by_absolute_max = extended_duration > _ABSOLUTE_CAP_S
+    applied_duration = min(extended_duration, _ABSOLUTE_CAP_S)
+    applied_reset_at = int(time.time()) + applied_duration
+    new_count = n + 1
+    _consecutive_failures[classification] = new_count
+    # Emit observability event when supervisor adjusted the wait.
+    if multiplier > 1 or capped_by_absolute_max:
+        emit_transient_error_backoff_capped(
+            log_dir,
+            classification=classification,
+            agent=agent,
+            requested_sleep_s=int(base),
+            applied_sleep_s=applied_duration,
+            original_reset_at_epoch=original_reset_at_epoch,
+            applied_reset_at_epoch=applied_reset_at,
+            consecutive_count=new_count,
+            capped_by_absolute_max=capped_by_absolute_max,
+        )
+    return (applied_reset_at, new_count, capped_by_absolute_max)
+def reset_counters() -> None:
+    """Clear all bucket counters. Called by serve loop when no active throttle."""
+    _consecutive_failures.clear()

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/_version.py RENAMED Viewed

@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
 commit_id: str | None
 __commit_id__: str | None
-__version__ = version = '0.1.32'
-__version_tuple__ = version_tuple = (0, 1, 32)
+__version__ = version = '0.1.33'
+__version_tuple__ = version_tuple = (0, 1, 33)
 __commit_id__ = commit_id = None

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/builtin_plugins/_constants.py RENAMED Viewed

@@ -21,9 +21,11 @@ _BACK_OFF_DEFAULTS: dict[str, int] = {
 }
 # 5xx codes treated as transient (retry-worthy server errors per RFC 9110):
-# 500=unexpected, 502=bad gateway, 503=unavailable, 504=gateway timeout.
+# 500=unexpected, 502=bad gateway, 503=unavailable, 504=gateway timeout,
+# 529=overloaded (Anthropic's non-RFC code emitted during sustained capacity
+# issues; treated as transient per Anthropic SDK behavior).
 # Excluded: 501 (not implemented = permanent), 505 (HTTP version mismatch).
-_5XX_STATUSES: frozenset[int] = frozenset({500, 502, 503, 504})
+_5XX_STATUSES: frozenset[int] = frozenset({500, 502, 503, 504, 529})
 _CLASSIFICATIONS: frozenset[str] = frozenset(
     {
@@ -38,3 +40,17 @@ _CLASSIFICATIONS: frozenset[str] = frozenset(
 rate_limit_account uses server-provided resetsAt (excluded from
 _BACK_OFF_DEFAULTS table); others use defaults from that table.
 """
+_EXP_CAP: int = 5
+"""Maximum exponent for transient-error consecutive backoff: 2^5 = 32×.
+Beyond this, the multiplier plateaus. Combined with _ABSOLUTE_CAP_S, this
+prevents runaway wait times during sustained outages (max wait = 30min).
+"""
+_ABSOLUTE_CAP_S: int = 1800
+"""Absolute upper bound on supervisor-applied transient back-off (30 min).
+Applies after exp multiplier — even if base × 2^5 exceeds this, the wait
+is clipped here. Defends against an indefinitely-stuck supervisor.
+"""

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/serve_cmd.py RENAMED Viewed

@@ -20,6 +20,7 @@ from pathlib import Path
 from agent_runner._substrate import compute_git_head, compute_paths_hash
 from agent_runner._throttle import _check_throttle_state
+from agent_runner._throttle import reset_counters as _reset_counters
 from agent_runner.api import (
     check_self_terminated_sentinel,
     emit_fresh_eyes_round_triggered,
@@ -151,6 +152,10 @@ def cmd(args) -> int:
                 elif action == "stop":
                     emit_rate_limit_stop(log_dir)
                     break
+            else:
+                # No active throttle this round — supervisor counters can reset.
+                # Next failure (if any) restarts the exp backoff curve from 1×.
+                _reset_counters()
             if stop_file is not None and stop_file.exists():
                 try:
                     content = stop_file.read_text(encoding="utf-8", errors="replace")[:200]

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/runner.py RENAMED Viewed

@@ -44,17 +44,32 @@ _BACK_OFF_JITTER_MAX_S = 30
 def _apply_back_off(log_dir: Path, throttle: TransientErrorState) -> None:
-    """Sleep until throttle.reset_at_epoch + jitter; emit recovered (and capped if applicable).
+    """Sleep until adjusted reset_at + jitter; emit recovered (and capped if applicable).
-    Capped at _BACK_OFF_CAP_S to defend against malformed reset epochs.
+    For estimated-class classifications (rate_limit_model / api_transient_5xx /
+    api_timeout), applies exp backoff on consecutive failures via
+    `_throttle.compute_adjusted_reset_at`. For server-authoritative
+    rate_limit_account, the original reset_at_epoch is used verbatim.
+    Defensive 8h cap retained as last-line defense against malformed reset
+    epochs (e.g. an external/manual event with a far-future reset_at).
     """
+    from agent_runner import _throttle
+    adjusted_reset_at, _consecutive_count, _capped = _throttle.compute_adjusted_reset_at(
+        classification=throttle.classification,
+        original_reset_at_epoch=throttle.reset_at_epoch,
+        agent=throttle.agent,
+        log_dir=log_dir,
+    )
     now = time.time()
     requested = (
-        throttle.reset_at_epoch
-        - now
-        + random.uniform(_BACK_OFF_JITTER_MIN_S, _BACK_OFF_JITTER_MAX_S)
+        adjusted_reset_at - now + random.uniform(_BACK_OFF_JITTER_MIN_S, _BACK_OFF_JITTER_MAX_S)
     )
     if requested > _BACK_OFF_CAP_S:
+        # Defensive: malformed reset epoch (e.g. manual event with far-future ts).
+        # Exp backoff layer caps at 30min, so legitimate flow never hits this.
         api.emit_transient_error_backoff_capped(
             log_dir,
             classification=throttle.classification,

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/architecture.md RENAMED Viewed

@@ -65,7 +65,7 @@ surfacing everywhere.
 | `event_kind_registry` | Prevent events.emit() typos / unregistered kinds slipping past CI | `tests/invariants/test_event_kind_registry.py` |
 <!-- /gen:defenses-table -->
-## Monitor: 9 detectors
+## Monitor: 11 detectors
 Three categories by `auto_action`:

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/commands.md RENAMED Viewed

@@ -87,6 +87,8 @@ agent-runner peek
 agent-runner peek --json
 agent-runner peek --select system.disk_used_pct
 agent-runner peek --select defenses
+agent-runner peek --select events.agent_usage_recorded --window 5    # 0.1.32+: native event-kind query
+agent-runner peek --select events.transient_error_detected --window 20
 agent-runner peek --round 42 --log         # drill into round 42, include log tail
 agent-runner peek --events 50              # last 50 events
 ```

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/long-running-agents.md RENAMED Viewed

@@ -216,10 +216,11 @@ the underlying problem is unbounded lineage on a shared resource.
 event family is now `transient_error_detected` with a `classification`
 field (`rate_limit_account`, `rate_limit_model`, `api_transient_5xx`,
 `api_timeout`). The same back-off mechanism covers all 4 classifications.
-The legacy `rate_limit_rejected` event is still dual-emitted for the
-`rate_limit_account` case only (no removal date set); new subscribers
-should consume `transient_error_detected` for full coverage. See
-`docs/migrations/0.1.27.md` for the consumer dispatch recipe.
+The legacy `rate_limit_rejected` aliases were removed in 0.1.29 — subscribe
+to `transient_error_detected` (filter by `classification == "rate_limit_account"`
+if you only want 5h-quota events). See `docs/migrations/0.1.27.md` for the
+consumer dispatch recipe and `docs/migrations/0.1.29.md` for alias-removal
+migration recipes.
 ## Writing post_round_hook plugins

cli_agent_runner-0.1.33/docs/migrations/0.1.33.md ADDED Viewed

@@ -0,0 +1,88 @@
+# 0.1.33 — Transient-error exp backoff + 529 classification
+**Date**: 2026-05-19
+## What changed
+Two improvements to how agent-runner handles transient errors from claude.ai:
+1. **Exp backoff for estimated-class transient errors**. When a round fires the same
+   estimated-class transient (`rate_limit_model` / `api_transient_5xx` / `api_timeout`)
+   after waiting our previous estimate, the supervisor doubles the next wait. Curve:
+   `2^N` capped at 32× and 30 minutes absolute. Counter resets when a round completes
+   without firing a new transient. Defaults upgraded transparently — no config knobs,
+   no consumer action.
+2. **529 status code now classified as `api_transient_5xx`**. Anthropic's "overloaded"
+   response (HTTP 529, non-RFC) is correctly handled as a transient — supervisor
+   throttles instead of immediately re-dispatching. Previously fell through to
+   "unknown error, no transient detection" and hammered the upstream.
+Server-authoritative `rate_limit_account` (Anthropic's `resetsAt`) is unchanged —
+the server's exact unblock time is respected verbatim, no exp backoff applied.
+## Backoff curve reference (estimated classes)
+| Bucket | Base | 1× | 2× | 4× | 8× | 16× | 32× (cap) | Absolute cap |
+|---|---|---|---|---|---|---|---|---|
+| `rate_limit_model` | 60s | 60s | 120s | 240s | 480s | 960s | **1800s** | 30 min |
+| `api_transient_5xx` | 60s | 60s | 120s | 240s | 480s | 960s | **1800s** | 30 min |
+| `api_timeout` | 30s | 30s | 60s | 120s | 240s | 480s | 960s | 30 min |
+Multiplier = `2^min(consecutive_count - 1, 5)`. After 5 consecutive failures the
+multiplier plateaus at 32×. Effective wait may also be clipped by the 30-minute
+absolute cap (e.g. `rate_limit_model` after 6 consecutive failures: `60 × 32 = 1920s`
+clipped to `1800s`).
+## Observing the backoff curve
+The `transient_error_backoff_capped` event now fires whenever the supervisor adjusts
+the plugin-emitted wait — including the exp-backoff case (was previously
+only-on-defensive-8h-cap).
+```bash
+agent-runner peek --select events.transient_error_backoff_capped --window 20
+```
+The payload includes:
+- `original_reset_at_epoch` — what the plugin emitted (base × 1×)
+- `applied_reset_at_epoch` — what the supervisor will actually sleep to
+- `consecutive_count` — how many times this bucket fired in a row
+- `capped_by_absolute_max` — whether the 30-min ceiling kicked in
+## Server-authoritative class (unchanged)
+`rate_limit_account` events with Anthropic's `resetsAt` epoch are still respected
+exactly. The exp backoff machinery never increments the counter for this bucket
+and never emits `transient_error_backoff_capped`. The reasoning: server knows
+when the 5-hour quota resets; second-guessing it would be counter-productive.
+## Tuning
+No config knobs added. The curve parameters (base, multiplier, exp cap, absolute
+cap) are hardcoded to sensible defaults. If your scenario needs different values,
+open an issue with the specific case — we'll evaluate against
+`docs/thesis.md` ("Not a remediation framework — defaults are right").
+## 529 callout
+Anthropic returns HTTP 529 ("overloaded") during sustained capacity issues. This
+is not in the RFC 9110 5xx set but Anthropic's SDK treats it as transient. Adding
+it to `_5XX_STATUSES` keeps our classification consistent with upstream behavior.
+## No consumer action required
+All changes are default-on or additive:
+- Existing TOML keeps working unchanged.
+- Existing event subscribers see a more populated `transient_error_backoff_capped`
+  payload (additive fields) — old fields retained.
+- Plugins (`claude_error_detector`, `gemini_error_detector`) unchanged.
+- CLI surface unchanged.
+If your code reads `transient_error_backoff_capped` payload, the four new fields
+(`original_reset_at_epoch`, `applied_reset_at_epoch`, `consecutive_count`,
+`capped_by_absolute_max`) are absent for events emitted before 0.1.33 (or by the
+defensive 8h-cap path which still uses only the old payload shape). Defensive
+parsing recommended.

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/plugins.md RENAMED Viewed

@@ -95,9 +95,18 @@ class HookContext:
     project: str
     round_num: int
     phase: str | None
-    agent_name: str | None
+    agent_name: str | None       # cosmetic name from [agent].name TOML
+    agent_binary: str | None     # 0.1.30+: basename of agent.command[0]
+    # plus dry_run, anomaly_repetitive_*, agent_log_path — see source for full set
 ```
+For capability detection (e.g. "is this round running claude?"), plugins
+should check `ctx.agent_binary == "claude"`, NOT `ctx.agent_name`. The
+former is the actual binary basename; the latter is user-cosmetic and
+may be overridden in `[agent] name = "..."` (this was a real bug fixed
+in 0.1.30 — strict `agent_name` check silently suppressed events when
+operators set custom names).
 `PostRoundHook` additionally receives a `RoundResult` (`from agent_runner.api_types import RoundResult`).
 Its field set is stable across 0.1.x (additions only).
@@ -289,7 +298,7 @@ and applies the configured `transient_error_action` (default `back_off`;
 No configuration required to enable the detector; it activates for any
 project using claude as the agent CLI.
-Non-claude agents: the detector returns early when `ctx.agent_name != "claude"`.
+Non-claude agents: the detector returns early when `ctx.agent_binary != "claude"`.
 Third-party plugin authors may use the same `register_post_round_hook` API
 to ship equivalent detectors for other agent CLIs — the bundled
 `gemini_error_detector` is a working reference.
@@ -297,7 +306,7 @@ to ship equivalent detectors for other agent CLIs — the bundled
 ## Custom monitor detectors (§3.3)
 0.1.5 adds a fourth extension point — plugin authors can ship custom monitor
-detectors that run alongside the 10 builtins on every monitor poll.
+detectors that run alongside the 11 builtins on every monitor poll.
 ### Group + Protocol

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/thesis.md RENAMED Viewed

@@ -51,12 +51,50 @@ then silence = hung agent. Not generic anomaly. Per-project variance in
 token usage and round duration is large enough that rolling-baseline alerting
 would produce constant false positives across diverse workloads.
+The `anomaly_repetitive_active` detector (added 0.1.32) is the live example:
+it fires when the claude plugin emits `anomaly_repetitive_tool` events
+above a fixed threshold within a window — a specific signature, not N-σ.
+`max_grace_after_result_s` (0.1.31) is another: kills the subprocess after
+a fixed grace following the `result` event — specific signature, not "is
+this subprocess behaving unusually".
 > **Example**: A 2026-05-18 proposal requested a "cost spike detector" that
 > fires when this round's cost is N× the rolling 7-day average. Rejected.
 > The rolling baseline itself requires aggregation we don't own, and the
 > threshold N is project-specific. A consumer can compute this from the flat
 > events file.
+### How we handle transient errors: server-authoritative vs estimated
+`transient_error_detected` events carry a `reset_at_epoch` field telling
+the supervisor when to retry. Two cases with different policies:
+- **Server-authoritative**: Anthropic's `rate_limit_event.resetsAt` is an
+  exact unblock time. We respect it verbatim — no backoff multipliers, no
+  caps applied. Server knows best.
+- **Estimated**: For other classifications (`rate_limit_model`,
+  `api_transient_5xx`, `api_timeout`), the plugin emits a default guess
+  (`_BACK_OFF_DEFAULTS[bucket]`). Guesses can be wrong; if a round fires
+  the same bucket again after waiting our guess, we increase the wait
+  exponentially (`2^N`, capped at 32× and 30 minutes absolute).
+This split keeps the policy simple: trust the server when it talks, and
+back off our own estimates when they prove insufficient. It is **not**
+N-σ novelty detection (which we reject — see the section above); it
+codifies the specific scar of "fixed-per-bucket backoff insufficient
+during sustained upstream outage."
+Counter reset: any round that completes without firing a new
+`transient_error_detected` event clears all bucket counters back to zero.
+> **Example**: Gateway 2026-05-18 reported sustained 5xx + 529 from
+> Anthropic where our previous fixed 60s wait was too short — the next
+> round hit the same error, waited 60s again, and again. Rejected: adding
+> a config knob (`[runtime] transient_backoff_strategy = "fixed" |
+> "exp"`). Instead: upgraded the default policy to exp backoff
+> transparently, since "the default was wrong" is the right framing — not
+> "the operator should pick between two strategies."
 ### Not an analytics database
 No `--select`-able query language beyond simple peek selectors. No event

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_architecture.py RENAMED Viewed

@@ -44,7 +44,7 @@ ALLOWED_SERVE_FROM = [
         "agent_runner.round_log",
         {"ROUND_CURRENT_LINK", "atomic_relink", "next_round_num", "prune_old_round_logs"},
     ),
-    ("agent_runner._throttle", {"_check_throttle_state"}),
+    ("agent_runner._throttle", {"_check_throttle_state", "reset_counters"}),
     ("agent_runner.runner", {"_apply_back_off"}),
 ]

{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_claude_error_detector.py RENAMED Viewed

@@ -613,3 +613,19 @@ def test_given_non_claude_binary_when_after_round_then_no_event(tmp_path):
     with patch(f"{_MOD}.emit_agent_usage_recorded") as emit:
         ClaudeErrorDetector().after_round(ctx, result)
     emit.assert_not_called()
+def test_given_claude_log_with_529_overloaded_when_classified_then_api_transient_5xx(tmp_path):
+    """Anthropic's "overloaded" status (529) should classify as api_transient_5xx,
+    not fall through as unknown error. Real scar — gateway hits this during
+    sustained Anthropic capacity issues.
+    """
+    from agent_runner.builtin_plugins.claude_rate_limit import _parse_claude_log
+    log = tmp_path / "round-1.log"
+    log.write_text(
+        '{"type":"result","is_error":true,"api_error_status":529,"result":"Overloaded"}\n',
+        encoding="utf-8",
+    )
+    parsed = _parse_claude_log(log)
+    assert parsed["transient_error"]["classification"] == "api_transient_5xx"

cli-agent-runner 0.1.32__tar.gz → 0.1.33__tar.gz

cli-agent-runner 0.1.32tar.gz → 0.1.33tar.gz