PyPI - cli-agent-runner - Versions diffs - 0.1.40__tar.gz → 0.1.41__tar.gz - Mend

cli-agent-runner 0.1.40tar.gz → 0.1.41tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (232) hide show

{cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/CHANGELOG.md RENAMED Viewed

@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.1.41] - 2026-06-07
+### Added
+- New `codewhale` preset — supervise Hmbown/CodeWhale (DeepSeek terminal agent) via `codewhale exec --auto --output-format stream-json`. `agent-runner init --preset codewhale`.
+- New built-in `codewhale_error_detector` plugin — emits `agent_usage_recorded` (model + token counts) from codewhale's stream-json output. Transient-error classification is best-effort (mappable buckets only); auth failures surface via the existing monitor `oauth_fail` detector.
 ## [0.1.40] - 2026-05-31
 ### Security

{cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cli-agent-runner
-Version: 0.1.40
+Version: 0.1.41
 Summary: Restart-on-exit supervisor for autonomous CLI agents
 Project-URL: Homepage, https://github.com/wan9yu/cli-agent-runner
 Project-URL: Documentation, https://github.com/wan9yu/cli-agent-runner#readme

{cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/_version.py RENAMED Viewed

@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
 commit_id: str | None
 __commit_id__: str | None
-__version__ = version = '0.1.40'
-__version_tuple__ = version_tuple = (0, 1, 40)
+__version__ = version = '0.1.41'
+__version_tuple__ = version_tuple = (0, 1, 41)
 __commit_id__ = commit_id = None

cli_agent_runner-0.1.41/agent_runner/builtin_plugins/codewhale.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""Built-in post_round_hook for codewhale CLI: usage events + transient classifier.
+Third built-in plugin (after claude, gemini). Parses codewhale's `exec
+--output-format stream-json` NDJSON stdout tail; emits agent_usage_recorded
+from the terminal metadata record. Transient-error classification is
+best-effort and emits ONLY when an error maps to an existing bucket (like
+gemini): codewhale's exec stdout surfaces a {"type":"error"} record, but the
+only observed case so far is auth failure (oauth_fail territory, not a
+transient bucket), so nothing maps yet -- usage-only today. 429/5xx mapping
+is added when a real rate-limit sample is captured.
+"""
+from __future__ import annotations
+import json
+import time
+from collections import deque
+from pathlib import Path
+from typing import Any
+from agent_runner.api import (
+    emit_agent_usage_recorded,
+    emit_transient_error_detected,
+)
+from agent_runner.builtin_plugins._constants import (
+    _5XX_STATUSES,
+    _BACK_OFF_DEFAULTS,
+    _RAW_CAP,
+    _TAIL_LINES,
+)
+from agent_runner.hooks import HookContext, register_post_round_hook
+class CodewhaleErrorDetector:
+    """Parse codewhale round log tail; emit usage + transient_error_detected events."""
+    name = "codewhale_error_detector"
+    def after_round(self, ctx: HookContext, result: Any) -> None:
+        if ctx.agent_binary != "codewhale":
+            return
+        log_path = ctx.agent_log_path
+        if log_path is None or not log_path.exists():
+            return
+        parsed = _parse_codewhale_log(log_path)
+        if parsed.get("transient_error"):
+            emit_transient_error_detected(
+                ctx.log_dir, round_num=ctx.round_num, **parsed["transient_error"]
+            )
+        if parsed.get("usage"):
+            emit_agent_usage_recorded(
+                ctx.log_dir,
+                round_num=ctx.round_num,
+                phase=ctx.phase or "",
+                success=(result.exit_code == 0 and not result.timed_out),
+                **parsed["usage"],
+            )
+def _parse_codewhale_log(log_path: Path) -> dict[str, Any]:
+    """Scan last _TAIL_LINES of codewhale NDJSON; extract usage from the metadata
+    record; classify any {"type":"error"} that maps to a transient bucket.
+    Tolerates non-JSON lines (codewhale prefixes some stdout with terminal
+    escapes) via per-line try/except.
+    """
+    with log_path.open("r", encoding="utf-8", errors="replace") as f:
+        tail = deque(f, maxlen=_TAIL_LINES)
+    metadata: dict | None = None
+    error_event: dict | None = None
+    for line in tail:
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            event = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if not isinstance(event, dict):
+            continue
+        etype = event.get("type")
+        if etype == "metadata":
+            metadata = event.get("meta") or {}
+        elif etype == "error":
+            error_event = event
+    out: dict[str, Any] = {}
+    if metadata:
+        out["usage"] = {
+            "agent": "codewhale",
+            "model": str(metadata.get("model", "unknown")),
+            "input_tokens": int(metadata.get("input_tokens", 0)),
+            "output_tokens": int(metadata.get("output_tokens", 0)),
+            "cached_tokens": 0,  # codewhale exec stdout exposes no cache counts
+            "cost_usd": None,  # codewhale exec stdout exposes no USD
+            "duration_ms": 0,  # not in exec metadata
+        }
+    if error_event is not None:
+        classification = _classify_codewhale_error(error_event)
+        if classification:
+            duration = _BACK_OFF_DEFAULTS[classification]
+            out["transient_error"] = {
+                "classification": classification,
+                "agent": "codewhale",
+                "reset_at_epoch": int(time.time() + duration),
+                "raw": str(error_event.get("error", "error"))[:_RAW_CAP],
+            }
+    return out
+def _classify_codewhale_error(error_event: dict[str, Any]) -> str | None:
+    """Map a codewhale {"type":"error"} record to a transient bucket, or None.
+    None means 'not a transient error' (e.g. auth failure -> handled by the
+    monitor's oauth_fail log-scan, not the transient classifier). codewhale's
+    error record currently carries only a free-text 'error' string with no
+    status code; until a real rate-limit/5xx sample is captured we cannot map
+    to rate_limit_model / api_transient_5xx / api_timeout, so we return None.
+    A future revision keys on a numeric status field once observed.
+    """
+    code = error_event.get("code") or error_event.get("status_code")
+    if code == 429:
+        return "rate_limit_model"
+    if code in _5XX_STATUSES:
+        return "api_transient_5xx"
+    if code == 408:
+        return "api_timeout"
+    return None
+register_post_round_hook(CodewhaleErrorDetector())

{cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/cli/init_cmd.py RENAMED Viewed

@@ -2,15 +2,27 @@
 from __future__ import annotations
+import importlib.resources
 from agent_runner import api
 from agent_runner.cli.common import emit, fail, work_dir_from_args
+def _preset_names() -> list[str]:
+    """Discover scaffold presets from the shipped ``agent_runner/presets/*.toml``.
+    Derived (not hardcoded) so adding a preset is a single new .toml file — the
+    ``--preset`` choices and validation track the filesystem automatically.
+    """
+    presets = importlib.resources.files("agent_runner.presets")
+    return sorted(p.name[:-5] for p in presets.iterdir() if p.name.endswith(".toml"))
 def add_parser(sub, parent) -> None:
     p = sub.add_parser("init", parents=[parent], help="Scaffold agent-runner project files")
     p.add_argument(
         "--preset",
-        choices=["claude", "aider", "gemini"],
+        choices=_preset_names(),
         default="claude",
         help="Which agent CLI preset to scaffold (default: claude)",
     )

cli_agent_runner-0.1.41/agent_runner/presets/codewhale.toml ADDED Viewed

@@ -0,0 +1,30 @@
+# agent-runner.toml — generated by `agent-runner init --preset codewhale`.
+#
+# Prereqs:
+#   - codewhale installed (ships `codewhale` + `codewhale-tui`; both on PATH):
+#       npm i -g codewhale     (or cargo/brew per CodeWhale docs)
+#   - DEEPSEEK_API_KEY set on the supervisor host (or a key saved via
+#     `codewhale auth set`; resolution order is config > keyring > env)
+#   - work_dir is a git repo
+[agent]
+command = ["codewhale", "exec", "--auto", "--output-format", "stream-json"]
+prompt_arg_template = ["{prompt}"]
+name = "codewhale"
+[runtime]
+work_dir = "."
+log_dir = "~/.agent-runner/{project}/logs"
+round_timeout_s = 1800
+restart_delay_s = 3
+[prompt]
+file = "./prompts/main.md"
+inject_context = true
+[vcs]
+dirty_action = "stash"
+stash_idempotency_s = 5
+[monitor]
+auth_fail_hint = "Run `codewhale auth status` to inspect provider/credentials, or set DEEPSEEK_API_KEY on the supervisor host."

{cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/scaffold.py RENAMED Viewed

@@ -5,8 +5,8 @@ Writes three files into a git repo:
   prompts/main.md        — neutral 8-line placeholder
   .gitignore             — append "logs/" if missing
-Available presets ship as package data in `agent_runner/presets/*.toml`.
-Currently: `claude`, `aider`, `gemini`.
+Available presets ship as package data in `agent_runner/presets/*.toml`;
+`agent-runner init --preset <name>` discovers them from that directory.
 Optionally commits in one step (default true via the CLI).
 """

{cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/commands.md RENAMED Viewed

@@ -36,7 +36,7 @@ appends `logs/` to `.gitignore`. By default also creates a git commit.
 Flags:
-- `--preset {claude,aider,gemini}` — agent CLI preset to scaffold (default: `claude`)
+- `--preset {claude,aider,gemini,codewhale}` — agent CLI preset to scaffold (default: `claude`)
 - `--force` — overwrite an existing `agent-runner.toml`
 - `--no-commit` — skip the initial git commit

{cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/configuration.md RENAMED Viewed

@@ -103,10 +103,11 @@ working tree:
 `[agent.env]` is a flat `dict[str, str]` of environment variables injected into
 the agent subprocess **per round**. This is preset-supplied per CLI: e.g. the
 claude preset sets `DISABLE_AUTOUPDATER=1` to prevent mid-loop self-updates;
-the aider preset omits `[agent.env]` entirely. Override these values in your
-project's `agent-runner.toml` only when you need to deviate from the preset
-default. The runtime merges `[agent.env]` on top of the supervisor's own env;
-unset (empty string) does not unset an inherited variable.
+the aider and codewhale presets omit `[agent.env]` entirely (both resolve their
+API keys from the ambient environment or their own keyrings). Override these
+values in your project's `agent-runner.toml` only when you need to deviate from
+the preset default. The runtime merges `[agent.env]` on top of the supervisor's
+own env; unset (empty string) does not unset an inherited variable.
 ## `[monitor].auth_fail_hint` (preset-supplied)
@@ -117,6 +118,8 @@ guidance without authoring it themselves:
 - `--preset claude` → recommend `claude /login` / refresh `ANTHROPIC_API_KEY`.
 - `--preset aider` → verify provider env var (`OPENAI_API_KEY` /
   `ANTHROPIC_API_KEY` / `DEEPSEEK_API_KEY` / etc.); run `aider --models`.
+- `--preset codewhale` → run `codewhale auth status` to inspect provider
+  credentials, or set `DEEPSEEK_API_KEY` on the supervisor host.
 Override in your `agent-runner.toml` if you ship a custom CLI.

{cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/quickstart.md RENAMED Viewed

@@ -37,7 +37,7 @@ Edit `prompts/main.md` to describe what the agent should do per round.
 Edit `agent-runner.toml` if you need to change `round_timeout_s` or `[phases]`.
 The default preset (`--preset claude`) invokes `claude`. Other built-in
-presets: `--preset aider` and `--preset gemini`. To use any other CLI,
+presets: `--preset aider`, `--preset gemini`, and `--preset codewhale`. To use any other CLI,
 edit `agent.command` to your CLI's invocation and `agent.prompt_arg_template`
 to its prompt-argument syntax — for example:

cli_agent_runner-0.1.41/docs/recipes/codewhale.md ADDED Viewed

@@ -0,0 +1,98 @@
+# Running agent-runner with CodeWhale
+[CodeWhale](https://github.com/Hmbown/CodeWhale) is a DeepSeek-powered terminal
+agent. It runs one-shot via `codewhale exec --auto`, fitting agent-runner's
+per-round lifecycle naturally.
+## Prerequisites
+- `codewhale` installed (ships both `codewhale` and `codewhale-tui`; both must
+  be on PATH):
+  ```bash
+  npm i -g codewhale
+  ```
+  (or via cargo/brew — see the CodeWhale docs for alternative install methods)
+- DeepSeek API key available to codewhale via one of:
+  - `DEEPSEEK_API_KEY` environment variable on the supervisor host, **or**
+  - a key saved via `codewhale auth set` (resolution order: config > keyring > env)
+- A git repo as `work_dir` (required for VCS state tracking).
+## Scaffold
+```bash
+git init my-project && cd my-project
+agent-runner init --preset codewhale
+```
+This writes:
+- `agent-runner.toml` — codewhale preset (command, flags, auth hint).
+- `prompts/main.md` — neutral placeholder; replace with your task description.
+- `.gitignore` — adds `logs/` if missing.
+## CodeWhale preset (excerpt of `agent_runner/presets/codewhale.toml`)
+```toml
+[agent]
+command = ["codewhale", "exec", "--auto", "--output-format", "stream-json"]
+prompt_arg_template = ["{prompt}"]
+name = "codewhale"
+# [agent.env] omitted — DeepSeek key is ambient (env or codewhale keyring).
+[runtime]
+work_dir = "."
+log_dir = "~/.agent-runner/{project}/logs"
+round_timeout_s = 1800
+restart_delay_s = 3
+[prompt]
+file = "./prompts/main.md"
+inject_context = true
+[vcs]
+dirty_action = "stash"
+stash_idempotency_s = 5
+[monitor]
+auth_fail_hint = "Run `codewhale auth status` to inspect provider/credentials, or set DEEPSEEK_API_KEY on the supervisor host."
+```
+### Why each flag
+- `exec` — one-shot execution mode (non-interactive, no TUI).
+- `--auto` — non-interactive confirmation; **mandatory** for unattended supervisor
+  mode.
+- `--output-format stream-json` — emits NDJSON to stdout; required so the
+  `codewhale_error_detector` plugin can parse usage records. Without this flag
+  the plugin receives human-readable text and emits no `agent_usage_recorded`
+  events.
+### What's intentionally not configured
+- **No `[agent.env]`** — the DeepSeek key is resolved by codewhale from the
+  ambient environment or its own keyring. Set `DEEPSEEK_API_KEY` on the
+  supervisor host rather than in the TOML.
+## What the detector emits
+The built-in `codewhale_error_detector` plugin parses the round log tail after
+each round completes:
+- **`agent_usage_recorded`** — emitted from the `{"type":"metadata","meta":{...}}`
+  terminal record. Carries `model`, `input_tokens`, `output_tokens`. `cost_usd`
+  is always `None` (codewhale's stream-json output does not expose USD cost).
+- **`transient_error_detected`** — emitted only when a `{"type":"error"}` record
+  maps to an existing classification bucket (`rate_limit_model`, `api_transient_5xx`,
+  `api_timeout`). The only observed error so far is auth failure, which is **not**
+  a transient bucket — it surfaces via the monitor's `oauth_fail` detector instead.
+## Troubleshooting
+| Symptom | Probable cause |
+|---|---|
+| `codewhale: command not found` | codewhale not on PATH — `npm i -g codewhale` |
+| Round short-exits with non-zero exit code | likely auth failure; check `peek` and `~/.agent-runner/<project>/logs/rounds/R*.log` for the error record |
+| `oauth_fail` alert in `peek` | DeepSeek auth failure detected. Hint: "Run `codewhale auth status`…". Check key validity and re-export `DEEPSEEK_API_KEY`. |
+| No `agent_usage_recorded` events | `--output-format stream-json` may be missing from command; verify the preset was applied correctly |
+| `codewhale auth status` shows no key | Run `codewhale auth set` to save a key, or export `DEEPSEEK_API_KEY` before starting the supervisor |
+See also: [`docs/quickstart.md`](../quickstart.md), [`docs/configuration.md`](../configuration.md).

{cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/thesis.md RENAMED Viewed

@@ -88,7 +88,7 @@ during sustained upstream outage."
 Counter reset: any round that completes without firing a new
 `transient_error_detected` event clears all bucket counters back to zero.
-> **Example**: Gateway 2026-05-18 reported sustained 5xx + 529 from
+> **Example**: A 2026-05-18 field report described sustained 5xx + 529 from
 > Anthropic where our previous fixed 60s wait was too short — the next
 > round hit the same error, waited 60s again, and again. Rejected: adding
 > a config knob (`[runtime] transient_backoff_strategy = "fixed" |
@@ -159,6 +159,31 @@ That's prompt-engineering project policy. It varies per use case and evolves
 faster than a library version cycle. We are a runtime harness, not a usage
 methodology.
+### Not a remote-execution portal (agent-local, shell-remote)
+agent-runner assumes the agent and the supervisor run on the **same host**.
+The unattended model requires it: to survive a disconnected laptop and run
+24×7, the agent must live on the supervised host, not stream commands to it
+from elsewhere. We do not route an agent's tool calls to a remote shell (SSH,
+container, k8s pod). `monitor --host` provides remote *observation*, not remote
+*execution*.
+Tools like [zmx](https://zmx.sh) cover the complementary case — an
+interactive, attended agent that stays local while its shell runs remotely.
+That's a different niche (a human watching, full local MCP/skills, ephemeral
+sessions), and the two compose: a consumer can point `[agent].command` at an
+agent that itself routes through such a portal. But a portal adapter in core
+would be an anticipatory feature for a topology our model doesn't use.
+> **Example**: The 2026-04 zmx "ai portal" release (agent-local, shell-remote
+> via a session) prompted this entry. It validates our CLI-not-MCP stance
+> (its own prior-art notes call MCP servers a configuration pain), but adding
+> remote-execution routing to agent-runner is rejected until a consumer
+> presents a concrete unattended use case that needs it. Note for combined
+> deployments: command + output flowing through such a session is a secret
+> surface outside agent-runner's control (cf. the 0.1.40 event-log
+> containment) — the operator owns it.
 ---
 ## How to evaluate a feature request

{cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/pyproject.toml RENAMED Viewed

@@ -47,6 +47,7 @@ agent-runner = "agent_runner.cli:main"
 [project.entry-points."agent_runner.post_round_hooks"]
 claude_error_detector = "agent_runner.builtin_plugins.claude_rate_limit:ClaudeErrorDetector"
 gemini_error_detector = "agent_runner.builtin_plugins.gemini:GeminiErrorDetector"
+codewhale_error_detector = "agent_runner.builtin_plugins.codewhale:CodewhaleErrorDetector"
 [project.optional-dependencies]
 dev = [

{cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_scaffold_presets.py RENAMED Viewed

@@ -1,4 +1,4 @@
-"""End-to-end: `agent-runner init --preset {claude,aider,gemini}` produces valid scaffolds."""
+"""End-to-end: `agent-runner init --preset <name>` produces valid scaffolds (all presets)."""
 from __future__ import annotations
@@ -9,7 +9,7 @@ from pathlib import Path
 import pytest
-@pytest.mark.parametrize("preset_name", ["claude", "aider", "gemini"])
+@pytest.mark.parametrize("preset_name", ["claude", "aider", "gemini", "codewhale"])
 def test_given_preset_when_init_then_toml_is_loadable(tmp_git_repo: Path, preset_name: str) -> None:
     from agent_runner.api import init
     from agent_runner.config import load_config

{cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_doc_claims_match_ssot.py RENAMED Viewed

@@ -92,6 +92,17 @@ def test_doc_value_sets_match_ssot() -> None:
     if cls_doc != cls_ssot:
         failures.append(f"classification doc {cls_doc} != SSOT {cls_ssot}")
+    # --preset choices: commands.md "--preset {a,b,c}" must equal the derived SSOT.
+    # init_cmd derives choices from presets/*.toml; the hand-written doc list must track it.
+    from agent_runner.cli.init_cmd import _preset_names
+    preset_ssot = set(_preset_names())
+    cmds_text = (REPO / "docs/commands.md").read_text(encoding="utf-8")
+    pm = re.search(r"--preset \{([^}]+)\}", cmds_text)
+    preset_doc = set(pm.group(1).split(",")) if pm else set()
+    if preset_doc != preset_ssot:
+        failures.append(f"--preset doc {preset_doc} != SSOT {preset_ssot}")
     assert not failures, "doc value-set drift:\n" + "\n".join(failures)

cli_agent_runner-0.1.41/tests/unit/test_codewhale_plugin.py ADDED Viewed

@@ -0,0 +1,155 @@
+"""Unit tests for CodewhaleErrorDetector (usage; classify-only-what-maps)."""
+from __future__ import annotations
+from unittest.mock import MagicMock, patch
+from tests._test_helpers import make_hook_context, write_round_log
+_MOD = "agent_runner.builtin_plugins.codewhale"
+def test_given_success_round_when_after_round_then_usage_emitted_from_metadata(tmp_path):
+    from agent_runner.builtin_plugins.codewhale import CodewhaleErrorDetector
+    # Real captured codewhale exec stream-json terminal records.
+    write_round_log(
+        tmp_path,
+        1,
+        [
+            {"type": "content", "content": "working..."},
+            {"type": "tool_result", "id": "c1", "output": "ok", "status": "success"},
+            {
+                "type": "metadata",
+                "meta": {
+                    "model": "deepseek-v4-pro",
+                    "input_tokens": 66014,
+                    "output_tokens": 303,
+                    "session_id": "f029d9a9",
+                    "status": "completed",
+                },
+            },
+            {"type": "done"},
+        ],
+    )
+    result = MagicMock(exit_code=0, timed_out=False)
+    with patch(f"{_MOD}.emit_agent_usage_recorded") as usage_emit:
+        with patch(f"{_MOD}.emit_transient_error_detected") as err_emit:
+            CodewhaleErrorDetector().after_round(
+                make_hook_context(tmp_path, agent_name="codewhale"), result=result
+            )
+    usage_emit.assert_called_once()
+    kw = usage_emit.call_args.kwargs
+    assert kw["agent"] == "codewhale"
+    assert kw["model"] == "deepseek-v4-pro"
+    assert kw["input_tokens"] == 66014
+    assert kw["output_tokens"] == 303
+    assert kw["cost_usd"] is None
+    assert kw["cached_tokens"] == 0
+    err_emit.assert_not_called()
+def test_given_non_codewhale_binary_when_after_round_then_no_emit(tmp_path):
+    from agent_runner.builtin_plugins.codewhale import CodewhaleErrorDetector
+    write_round_log(
+        tmp_path,
+        1,
+        [
+            {
+                "type": "metadata",
+                "meta": {
+                    "model": "x",
+                    "input_tokens": 1,
+                    "output_tokens": 1,
+                    "status": "completed",
+                },
+            }
+        ],
+    )
+    result = MagicMock(exit_code=0, timed_out=False)
+    with patch(f"{_MOD}.emit_agent_usage_recorded") as usage_emit:
+        CodewhaleErrorDetector().after_round(
+            make_hook_context(tmp_path, agent_name="claude"), result=result
+        )
+    usage_emit.assert_not_called()
+def test_given_auth_error_round_when_after_round_then_no_transient_error(tmp_path):
+    """Auth failure is NOT a transient bucket (it's oauth_fail territory) -> usage only."""
+    from agent_runner.builtin_plugins.codewhale import CodewhaleErrorDetector
+    write_round_log(
+        tmp_path,
+        1,
+        [
+            {"type": "error", "error": "Authentication failed: invalid key"},
+            {
+                "type": "metadata",
+                "meta": {
+                    "model": "deepseek-v4-pro",
+                    "input_tokens": 0,
+                    "output_tokens": 0,
+                    "status": "failed",
+                },
+            },
+            {"type": "done"},
+        ],
+    )
+    result = MagicMock(exit_code=1, timed_out=False)
+    with patch(f"{_MOD}.emit_agent_usage_recorded") as usage_emit:
+        with patch(f"{_MOD}.emit_transient_error_detected") as err_emit:
+            CodewhaleErrorDetector().after_round(
+                make_hook_context(tmp_path, agent_name="codewhale"), result=result
+            )
+    err_emit.assert_not_called()  # auth error does not map to a transient bucket
+    usage_emit.assert_called_once()  # usage still emitted (status:failed round)
+def test_given_non_json_lines_when_after_round_then_tolerated(tmp_path):
+    """Real codewhale stdout has terminal-escape non-JSON lines; parser must skip them."""
+    from agent_runner.builtin_plugins.codewhale import CodewhaleErrorDetector
+    # Write raw lines manually (write_round_log only emits JSON dicts).
+    # Path must match make_hook_context default: tmp_path/rounds/R1-test.log
+    rounds_dir = tmp_path / "rounds"
+    rounds_dir.mkdir(parents=True, exist_ok=True)
+    meta_line = (
+        '{"type":"metadata","meta":{'
+        '"model":"deepseek-v4-pro","input_tokens":5,"output_tokens":2,'
+        '"status":"completed"}}'
+    )
+    (rounds_dir / "R1-test.log").write_text(
+        "\x1b]9;4;1\x07\x1b]0;\U0001f433 CodeWhale\x07"
+        '{"type":"content","content":"hi"}\n' + meta_line + "\n"
+        "not json at all\n"
+        '{"type":"done"}\n',
+        encoding="utf-8",
+    )
+    result = MagicMock(exit_code=0, timed_out=False)
+    with patch(f"{_MOD}.emit_agent_usage_recorded") as usage_emit:
+        CodewhaleErrorDetector().after_round(
+            make_hook_context(tmp_path, agent_name="codewhale"), result=result
+        )
+    usage_emit.assert_called_once()
+    assert usage_emit.call_args.kwargs["input_tokens"] == 5
+def test_classify_codewhale_error_maps_only_known_buckets():
+    """Lock the dormant forward-path: numeric status codes map to existing
+    buckets; everything else (incl. the captured free-text auth error) → None.
+    Guards against silent regression when a real rate-limit sample is wired in.
+    """
+    from agent_runner.builtin_plugins._constants import _BACK_OFF_DEFAULTS
+    from agent_runner.builtin_plugins.codewhale import _classify_codewhale_error
+    assert _classify_codewhale_error({"code": 429}) == "rate_limit_model"
+    assert _classify_codewhale_error({"status_code": 503}) == "api_transient_5xx"
+    assert _classify_codewhale_error({"code": 408}) == "api_timeout"
+    # free-text auth error (the only real captured shape) does not map
+    assert _classify_codewhale_error({"error": "Authentication failed: ..."}) is None
+    assert _classify_codewhale_error({"code": 418}) is None
+    # every non-None result must be a real classification bucket (SSOT)
+    for ev in ({"code": 429}, {"status_code": 500}, {"code": 408}):
+        cls = _classify_codewhale_error(ev)
+        assert cls in _BACK_OFF_DEFAULTS

cli-agent-runner 0.1.40__tar.gz → 0.1.41__tar.gz

cli-agent-runner 0.1.40tar.gz → 0.1.41tar.gz