PyPI - cli-agent-runner - Versions diffs - 0.1.38__tar.gz → 0.1.39__tar.gz - Mend

cli-agent-runner 0.1.38tar.gz → 0.1.39tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (225) hide show

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/CHANGELOG.md RENAMED Viewed

@@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+### Fixed
+- Grace-kill (`max_grace_after_result_s`) is no longer defeated by long-lived helper subprocesses (e.g. claude's persistent Bash-tool shell-snapshot). `[runtime] grace_kill_ignore_patterns` lists regexes for cmdlines to exclude from the liveness count; the claude preset ships a matching default.
+### Added
+- `[runtime] grace_kill_ignore_patterns: list[str]` — regex patterns; matching child cmdlines are excluded from the grace-kill liveness check.
+- `round_grace_extended` event payload gains `ignored_children` — cmdlines filtered by `grace_kill_ignore_patterns`.
+### Changed
+- Docs: `commands.md` documents `monitor --mode/--port` and `init --preset`; the Chinese verb list and `[monitor]` default values now point to the generated tables instead of restating them; runbook upgrade examples use a version placeholder.
+### Internal
+- New invariant `test_doc_claims_match_ssot` gates documented counts (detectors / defenses / verbs) and config value-sets (`dirty_action` / `context_injection_mode` / transient classification) against their code SSOT — count/enum doc drift now fails CI at the introducing commit.
+- Removed the unused `alert-kinds` docgen renderer; de-duplicated redundant defense-count and alert-kind guards to one canonical tripwire each.
 ## [0.1.38] - 2026-05-24
 ### Fixed

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cli-agent-runner
-Version: 0.1.38
+Version: 0.1.39
 Summary: Restart-on-exit supervisor for autonomous CLI agents
 Project-URL: Homepage, https://github.com/wan9yu/cli-agent-runner
 Project-URL: Documentation, https://github.com/wan9yu/cli-agent-runner#readme

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/agent_runner/_docgen.py RENAMED Viewed

@@ -110,11 +110,6 @@ def render_defenses_table() -> str:
     return "\n".join(lines)
-def render_alert_kinds_list() -> str:
-    """Flat bullet list of all known alert kinds, alphabetised."""
-    return "\n".join(f"- `{k}`" for k in sorted(KNOWN_ALERT_KINDS))
 def render_detector_list() -> str:
     """Bullet list of detectors; auto-stop kinds flagged inline."""
     lines: list[str] = []
@@ -155,7 +150,6 @@ def render_verb_table() -> str:
 RENDERERS: dict[str, Callable[[], str]] = {
     "defenses-table": render_defenses_table,
-    "alert-kinds": render_alert_kinds_list,
     "detector-list": render_detector_list,
     "event-kinds": render_event_kinds_list,
     "config-schema": render_config_schema_table,

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/agent_runner/_emit.py RENAMED Viewed

@@ -258,10 +258,16 @@ def emit_round_grace_extended(
     round_num: int,
     grace_s: int,
     live_children: list[str],
+    ignored_children: list[str] | None = None,
 ) -> None:
     """Emit when the grace-after-result timer expired but the agent still had
     live worker processes (e.g. a backgrounded build), so the round was NOT
     killed; it continues until it finishes or hits round_timeout_s.
+    ignored_children: cmdlines that matched a grace_kill_ignore_patterns entry
+        and were excluded from the liveness count — useful for verifying
+        patterns are firing and for noticing when an upstream CLI changes
+        its helper path.
     """
     from agent_runner.events import ROUND_GRACE_EXTENDED, emit
@@ -271,6 +277,7 @@ def emit_round_grace_extended(
         round_num=round_num,
         grace_s=grace_s,
         live_children=live_children,
+        ignored_children=ignored_children or [],
     )

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/agent_runner/_version.py RENAMED Viewed

@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
 commit_id: str | None
 __commit_id__: str | None
-__version__ = version = '0.1.38'
-__version_tuple__ = version_tuple = (0, 1, 38)
+__version__ = version = '0.1.39'
+__version_tuple__ = version_tuple = (0, 1, 39)
 __commit_id__ = commit_id = None

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/agent_runner/agent_runtime.py RENAMED Viewed

@@ -11,6 +11,7 @@ Defenses encoded here:
 from __future__ import annotations
 import os
+import re
 import signal
 import subprocess  # noqa: TID251 — sanctioned subprocess caller
 import time
@@ -57,18 +58,26 @@ def _kill_pgroup(proc: subprocess.Popen) -> None:
         pass
-def _live_children(proc: subprocess.Popen, *, max_n: int = 5, max_len: int = 120) -> list[str]:
-    """Cmdlines of live (non-zombie) descendant processes of ``proc``.
-    Empty when ``proc`` has no live workers (a stuck agent that emitted
-    type=result then hung). Non-empty when the round backgrounded work (e.g. a
-    build) still running. Bounded so the resulting event stays small.
+def _live_children(
+    proc: subprocess.Popen,
+    *,
+    ignore_patterns: list[re.Pattern[str]] | None = None,
+    max_n: int = 5,
+    max_len: int = 120,
+) -> tuple[list[str], list[str]]:
+    """Cmdlines of live (non-zombie) descendants of ``proc``, split into
+    ``(live, ignored)``: ``live`` is what counts toward the grace-kill
+    liveness check; ``ignored`` matched an ``ignore_patterns`` entry and is
+    excluded (e.g. claude's persistent shell-snapshot helper). Both lists
+    are bounded by ``max_n``/``max_len`` to keep events small. ``ignore_patterns
+    is None`` → no filtering, ``ignored`` is empty, ``live`` matches 0.1.38.
     """
     try:
         parent = psutil.Process(proc.pid)
     except (psutil.NoSuchProcess, psutil.AccessDenied):
-        return []
-    out: list[str] = []
+        return [], []
+    live: list[str] = []
+    ignored: list[str] = []
     for child in parent.children(recursive=True):
         try:
             if child.status() == psutil.STATUS_ZOMBIE:
@@ -76,10 +85,16 @@ def _live_children(proc: subprocess.Popen, *, max_n: int = 5, max_len: int = 120
             line = " ".join(child.cmdline()) or child.name()
         except (psutil.NoSuchProcess, psutil.AccessDenied):
             continue
-        out.append(line[:max_len])
-        if len(out) >= max_n:
+        short = line[:max_len]
+        if ignore_patterns and any(p.search(line) for p in ignore_patterns):
+            if len(ignored) < max_n:
+                ignored.append(short)
+        else:
+            if len(live) < max_n:
+                live.append(short)
+        if len(live) >= max_n and len(ignored) >= max_n:
             break
-    return out
+    return live, ignored
 # Exact compact bytes — matches claude CLI's no-whitespace JSONL output.
@@ -99,7 +114,8 @@ def run(
     max_grace_after_result_s: int = 0,
     progress_callback: Callable[[dict], None] | None = None,
     progress_interval_s: int = 0,
-    on_grace_extended: Callable[[list[str]], None] | None = None,
+    on_grace_extended: Callable[[list[str], list[str]], None] | None = None,
+    grace_kill_ignore_patterns: list[re.Pattern[str]] | None = None,
 ) -> RunResult:
     """Spawn the agent subprocess and wait for exit or timeout.
@@ -116,6 +132,10 @@ def run(
     progress_interval_s seconds with a dict of log stats (log_size_kb,
     last_write_age_s, wall_age_s). Keeps agent_runtime event-free; callers
     build the callback to emit events.
+    grace_kill_ignore_patterns: pre-compiled regex patterns; child cmdlines
+    matching any pattern (re.search) are excluded from the liveness count
+    (persistent helpers that aren't real workers). None = no filtering.
     """
     argv = _build_argv(command, prompt_arg_template, prompt)
     env = {**os.environ, **env_extra}
@@ -157,13 +177,13 @@ def run(
                     except OSError:
                         pass  # log not flushed yet; check next tick
                 if result_seen_at is not None and now - result_seen_at > max_grace_after_result_s:
-                    children = _live_children(proc)
-                    if children:
+                    live, ignored = _live_children(proc, ignore_patterns=grace_kill_ignore_patterns)
+                    if live:
                         # Busy: a backgrounded worker is still running. Don't
                         # reap — defer to the wall-clock ceiling. Signal once.
                         if not grace_extended_emitted:
                             if on_grace_extended is not None:
-                                on_grace_extended(children)
+                                on_grace_extended(live, ignored)
                             grace_extended_emitted = True
                     else:
                         _kill_pgroup(proc)

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/agent_runner/config.py RENAMED Viewed

@@ -2,6 +2,7 @@
 from __future__ import annotations
+import re
 import tomllib
 from dataclasses import dataclass, field
 from pathlib import Path
@@ -40,6 +41,12 @@ class RuntimeConfig:
     fresh_eyes_every_n: int | None = None  # None = disabled
     dry_run: bool = False
     max_grace_after_result_s: int = 0  # 0 = disabled
+    grace_kill_ignore_patterns: list[str] = field(default_factory=list)
+    """Regex patterns (re.search) tested against each child process's joined
+    cmdline. Matching children are excluded from the grace-kill liveness
+    check — for persistent helper subprocesses (e.g. claude's shell-snapshot
+    bash) that would otherwise defeat max_grace_after_result_s. Empty list
+    = no filtering (0.1.38 behavior preserved)."""
 @dataclass(frozen=True)
@@ -221,6 +228,25 @@ def _validate_remote_failure_tolerance(value: Any) -> int:
     return v
+def _validate_regex_list(value: Any, *, field: str) -> list[str]:
+    """Validate a list of regex pattern strings (each must compile). Returns the
+    raw strings unchanged; callers compile when they need ``re.Pattern`` objects."""
+    if not isinstance(value, list):
+        raise ValueError(f"{field}: expected a list of regex strings, got {type(value).__name__}")
+    out: list[str] = []
+    for p in value:
+        if not isinstance(p, str):
+            raise ValueError(
+                f"{field}: each pattern must be a string, got {type(p).__name__}: {p!r}"
+            )
+        try:
+            re.compile(p)
+        except re.error as e:
+            raise ValueError(f"{field}: invalid regex {p!r}: {e}") from e
+        out.append(p)
+    return out
 _PHASE_OVERRIDE_ALLOWED_FIELDS = frozenset(
     {
         "round_timeout_s",
@@ -392,6 +418,10 @@ def load_config(toml_path: Path) -> Config:
             runtime_d.get("max_grace_after_result_s", 0),
             field="runtime.max_grace_after_result_s",
         ),
+        grace_kill_ignore_patterns=_validate_regex_list(
+            runtime_d.get("grace_kill_ignore_patterns", []),
+            field="runtime.grace_kill_ignore_patterns",
+        ),
     )
     prompt_d = raw.get("prompt", {})
     mode = prompt_d.get("context_injection_mode", "prepend")

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/agent_runner/presets/claude.toml RENAMED Viewed

@@ -16,6 +16,7 @@ work_dir = "."
 log_dir = "~/.agent-runner/{project}/logs"
 round_timeout_s = 1800
 restart_delay_s = 3
+grace_kill_ignore_patterns = ['\.claude/shell-snapshots/snapshot-bash-']
 [prompt]
 file = "./prompts/main.md"

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/agent_runner/runner.py RENAMED Viewed

@@ -10,6 +10,7 @@ import hashlib
 import json
 import os
 import random
+import re
 import sys
 import time
 import traceback as tb_mod
@@ -466,12 +467,15 @@ def _run_one_round_inner(cfg: Config, *, phase_override: str | None = None) -> R
             **stats,
         )
-    def _grace_extended_emit(children: list[str]) -> None:
+    grace_kill_ignore_patterns = [re.compile(p) for p in cfg.runtime.grace_kill_ignore_patterns]
+    def _grace_extended_emit(live: list[str], ignored: list[str]) -> None:
         api.emit_round_grace_extended(
             log_dir,
             round_num=round_num,
             grace_s=cfg.runtime.max_grace_after_result_s,
-            live_children=children,
+            live_children=live,
+            ignored_children=ignored,
         )
     result = agent_runtime.run(
@@ -485,6 +489,7 @@ def _run_one_round_inner(cfg: Config, *, phase_override: str | None = None) -> R
         progress_callback=_progress_emit,
         progress_interval_s=cfg.monitor.round_progress_interval_s,
         on_grace_extended=_grace_extended_emit,
+        grace_kill_ignore_patterns=grace_kill_ignore_patterns,
     )
     events.emit(
         log_dir,

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/docs/commands.md RENAMED Viewed

@@ -34,8 +34,15 @@ are shared between `peek`, `watch`, and `monitor`.
 Scaffold a new project: writes `agent-runner.toml`, `prompts/main.md`, and
 appends `logs/` to `.gitignore`. By default also creates a git commit.
+Flags:
+- `--preset {claude,aider,gemini}` — agent CLI preset to scaffold (default: `claude`)
+- `--force` — overwrite an existing `agent-runner.toml`
+- `--no-commit` — skip the initial git commit
 ```bash
-agent-runner init                      # default: commit
+agent-runner init                      # default: claude preset, commit
+agent-runner init --preset aider       # aider preset
 agent-runner init --no-commit          # skip the commit
 agent-runner init --force              # overwrite an existing toml
 ```
@@ -133,7 +140,7 @@ agent-runner events --kind transient_error_backoff_capped --tail
 `peek` in a clear-and-refresh loop. Default 2s interval. Stop with Ctrl-C.
-### `agent-runner monitor [--host SSH-ALIAS] [--interval N] [--json]`
+### `agent-runner monitor [--host SSH-ALIAS] [--interval N] [--mode MODE] [--port PORT] [--json]`
 Anomaly-detection daemon. Runs the 12 detectors against the live state on every
 poll. Without `--host`, watches local logs at default 30s interval. With
@@ -143,15 +150,25 @@ When OAuth-fail or disk-critical detectors fire, monitor automatically issues a
 graceful stop (locally via `api.stop`; remotely via `ssh <host> 'agent-runner stop'`).
 Override with `[monitor]` config block (see configuration.md).
+Flags:
+- `--mode {anomaly,narrate,events,http}` — output mode (default: `anomaly`). `narrate`
+  streams a human-readable narrative; `events` streams raw event JSON; `http` serves
+  a local progress page.
+- `--port PORT` — HTTP port for `--mode http` (default: `8765`, local-only).
+- `--host SSH-ALIAS` — watch a remote agent-runner via ssh (anomaly mode only).
 ```bash
-agent-runner monitor                       # local
+agent-runner monitor                       # local anomaly mode
 agent-runner monitor --host pi             # remote
+agent-runner monitor --mode narrate        # streaming narrative
+agent-runner monitor --mode http --port 9000  # HTTP progress page on port 9000
 agent-runner monitor --json | jq -c        # pipe alerts to a downstream consumer
 ```
 ## 中文摘要
-16 个动词：`init / install / uninstall / start / stop / kill / cancel / restart / status / round / serve / upgrade / peek / watch / events / monitor`。
+16 个动词，完整列表见上方动词表（自动生成）。
 观察类（peek/watch/monitor）三视角对称，全部共用 `--round / --log / --events / --select / --json` 下钻参数。

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/docs/configuration.md RENAMED Viewed

@@ -47,6 +47,7 @@ running with newly-set `dirty_action = "auto_commit"` is undefined).
 | `fresh_eyes_every_n` | `int | None` | None |
 | `dry_run` | `bool` | False |
 | `max_grace_after_result_s` | `int` | 0 |
+| `grace_kill_ignore_patterns` | `list[str]` | [] |
 ### `[prompt]`
@@ -200,6 +201,10 @@ Unconfigured phases (and configs without `[phases]`) keep using the global
 ## `[monitor]` (optional, defaults shown)
+> Authoritative field-level defaults are in the generated schema table above
+> (`[monitor]` section). The snippet below shows only the fields most commonly
+> customised, with operational notes.
 ```toml
 [monitor]
 auto_stop_on = ["oauth_fail", "disk_critical"]

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/docs/long-running-agents.md RENAMED Viewed

@@ -140,7 +140,7 @@ token breakdown + cost (where the underlying CLI exposes it).
 ```
 Use as input to a cost-tracking detector or external billing reconciler.
-See `docs/migrations/0.1.28.md` for the current 12-field payload schema
+See `docs/migrations/0.1.28.md` for the current payload schema
 (includes `cache_creation_tokens`, `tool_call_count`, `phase`, `success`)
 plus a consumer dispatcher sketch. Aggregation (rollups, budget warnings)
 is the consumer's responsibility — agent-runner emits raw per-round

cli_agent_runner-0.1.39/docs/migrations/0.1.39.md ADDED Viewed

@@ -0,0 +1,74 @@
+# Migrating to 0.1.39
+## TL;DR
+```bash
+pip install --upgrade cli-agent-runner==0.1.39
+```
+**Claude users running 0.1.38**: add one line to `[runtime]` to unblock
+grace-kill against claude's persistent shell-snapshot helper (see below).
+New `agent-runner init --preset=claude` scaffolds get this automatically.
+**Everyone else**: no action.
+## Persistent-helper exclusion (the live fix)
+0.1.38's grace-kill liveness check was correctly conservative — it refused to
+reap a round with live worker children — but claude's `-p` mode keeps a
+persistent Bash-tool shell-snapshot subprocess alive for the whole session.
+That subprocess is not doing work; it's idle infrastructure. 0.1.38 saw it as
+a live worker and deferred every post-result hang to `round_timeout_s` instead
+of reaping at `max_grace_after_result_s`. This is the "persistent-helper
+caveat" 0.1.38's migration doc flagged.
+0.1.39 adds `[runtime] grace_kill_ignore_patterns` — a list of regex patterns;
+child cmdlines matching any pattern (via `re.search`) are excluded from the
+liveness count. `presets/claude.toml` ships a default pattern matching
+claude's shell-snapshot.
+### Existing claude operators — one line
+Add to your `[runtime]` block:
+```toml
+[runtime]
+grace_kill_ignore_patterns = ['\.claude/shell-snapshots/snapshot-bash-']
+```
+Or run `agent-runner init --preset=claude` in a scratch directory and diff
+the generated `agent-runner.toml` against yours.
+After the change, post-result hangs are reaped at `max_grace_after_result_s`.
+Without it, they continue to defer to `round_timeout_s` (the 0.1.38 behavior).
+### Verifying the pattern is firing
+The `round_grace_extended` event payload gains `ignored_children` listing
+cmdlines that matched a pattern. Use it to:
+- confirm the shell-snapshot is being filtered (`ignored_children` non-empty)
+- catch the day claude renames its helper (`live_children` shows a new
+  unfiltered persistent process)
+### Other presets
+`aider.toml` and `gemini.toml` ship no default patterns. Add operator-specific
+patterns to your own `agent-runner.toml` if needed.
+## SSOT consistency hardening (also in 0.1.39)
+A new invariant `test_doc_claims_match_ssot` gates documented counts
+(detectors / defenses / verbs) and config value-sets against code SSOT.
+`commands.md` documents `monitor --mode/--port` and `init --preset`.
+Redundant count guards collapsed to one canonical tripwire each. The unused
+`alert-kinds` docgen renderer was removed. No action required.
+## What did NOT change
+- The 0.1.38 grace-kill liveness semantics (still process-group-based;
+  patterns are an exclusion filter on top).
+- `round_grace_kill` (still fires only when the post-filter live set is empty).
+- `round_timeout_s` (still the hard ceiling).
+- `max_grace_after_result_s` (knob unchanged).
+- For non-claude deployments: zero behavior change.

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/docs/plugins.md RENAMED Viewed

@@ -170,6 +170,8 @@ Any exception raised by a hook is caught by the runner and emitted as a built-in
 }
 ```
+(Fields emitted by the `HOOK_FAILED` path in `runner.py` + `_summarize_error` in `hooks.py`.)
 The round itself continues — a broken plugin must not crash the supervisor.
 ### What `plugin_context_enrichers()` surfaces
@@ -293,7 +295,7 @@ event with `classification` ∈ {`rate_limit_account`, `rate_limit_model`,
 Per round (regardless of error state), also emits `agent_usage_recorded`
 with token/cost/duration data extracted from the claude result event —
-see `docs/migrations/0.1.28.md` for the full 12-field schema. The
+see `docs/migrations/0.1.28.md` for the full payload schema. The
 supervisor reads `transient_error_detected` on the next dispatch cycle
 and applies the configured `transient_error_action` (default `back_off`;
 `rate_limit_action` retained as a deprecated alias).
@@ -309,7 +311,7 @@ to ship equivalent detectors for other agent CLIs — the bundled
 ## Custom monitor detectors (§3.3)
 0.1.5 adds a fourth extension point — plugin authors can ship custom monitor
-detectors that run alongside the 11 builtins on every monitor poll.
+detectors that run alongside the 12 builtins on every monitor poll.
 ### Group + Protocol

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/docs/runbook.md RENAMED Viewed

@@ -153,7 +153,7 @@ RestartSec=5
 ### Path 1 — systemd --user service (installed via `agent-runner install`)
-    agent-runner upgrade --target 0.1.37
+    agent-runner upgrade --target <version>
 Does stop → pip → smoke → start, with auto-rollback on smoke failure.
@@ -167,7 +167,7 @@ package-only upgrade (pip + smoke + rollback), then prints the restart command.
 It never runs `sudo` and never starts a service it didn't install. Restart your
 supervisor yourself:
-    python3 -m pip install --user --break-system-packages --upgrade cli-agent-runner==0.1.37
+    python3 -m pip install --user --break-system-packages --upgrade cli-agent-runner==<version>
     agent-runner --version
     sudo systemctl restart <your-unit>
@@ -177,7 +177,7 @@ supervisor yourself:
 Use `--no-restart` to force package-only mode even on a systemd --user host
 (upgrade the package now, restart later):
-    agent-runner upgrade --target 0.1.37 --no-restart
+    agent-runner upgrade --target <version> --no-restart
 ### Manual rollback
@@ -584,6 +584,14 @@ work past `type=result`. Check the `live_children` field in the event to identif
 the process; consider restructuring the agent to emit `type=result` only when
 truly done.
+**Persistent-helper exclusion (0.1.39+):** when an agent CLI keeps long-lived
+helper subprocesses alive past `type=result` (claude does this with a Bash-tool
+shell-snapshot), they would otherwise count as "live workers" and defer every
+post-result hang to `round_timeout_s`. Set `[runtime] grace_kill_ignore_patterns
+= [<regex>, ...]` to exclude them; the `claude` preset ships a default. The
+`round_grace_extended` event's `ignored_children` field shows which cmdlines
+matched a pattern.
 ### Disk pressure
 **Symptom:** `[WARN] disk_warning` at >90%; `[CRIT] disk_critical` at >95% (auto-stops).

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/docs/thesis.md RENAMED Viewed

@@ -54,9 +54,10 @@ would produce constant false positives across diverse workloads.
 The `anomaly_repetitive_active` detector (added 0.1.32) is the live example:
 it fires when the claude plugin emits `anomaly_repetitive_tool` events
 above a fixed threshold within a window — a specific signature, not N-σ.
-`max_grace_after_result_s` (0.1.31) is another: kills the subprocess after
-a fixed grace following the `result` event — specific signature, not "is
-this subprocess behaving unusually".
+`max_grace_after_result_s` (0.1.31, refined 0.1.38) is another: a fixed
+grace after the `result` event, the subprocess is killed only if its
+process group has no live worker left — specific signature, not "is this
+subprocess behaving unusually".
 > **Example**: A 2026-05-18 proposal requested a "cost spike detector" that
 > fires when this round's cost is N× the rolling 7-day average. Rejected.

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/tests/integration/test_grace_kill_emission.py RENAMED Viewed

@@ -79,6 +79,28 @@ def test_grace_kill_emits_round_grace_kill_event(tmp_path: Path) -> None:
     assert len(timeout_events) == 0
+def _make_grace_config_with_patterns(
+    work_dir: Path, script_path: Path, grace_s: int, patterns: list[str]
+) -> Config:
+    log_dir = work_dir / "logs"
+    log_dir.mkdir(exist_ok=True)
+    prompt = work_dir / "p.md"
+    prompt.write_text("Test prompt. " * 50)
+    return Config(
+        agent=AgentConfig(command=[str(script_path)], prompt_arg_template=[]),
+        runtime=RuntimeConfig(
+            work_dir=work_dir,
+            log_dir=log_dir,
+            round_timeout_s=10,
+            max_grace_after_result_s=grace_s,
+            grace_kill_ignore_patterns=patterns,
+        ),
+        prompt=PromptConfig(file=prompt, inject_context=False),
+        vcs=VcsConfig(),
+        phases=PhasesConfig(),
+    )
 def test_round_grace_extended_emitted_when_worker_alive(tmp_path: Path) -> None:
     """Full runner flow: subprocess emits result then backgrounds a long child;
     round_grace_extended event fires (not round_grace_kill); wall timeout reaps."""
@@ -109,3 +131,44 @@ def test_round_grace_extended_emitted_when_worker_alive(tmp_path: Path) -> None:
     # round_grace_kill must NOT appear (round was busy, not idle)
     grace_kill_events = [e for e in events if e.get("event") == "round_grace_kill"]
     assert len(grace_kill_events) == 0
+def test_round_grace_extended_carries_ignored_children(tmp_path: Path) -> None:
+    """With grace_kill_ignore_patterns set, persistent helpers appear under
+    ignored_children, not live_children — even when a real worker is also alive."""
+    _init_git(tmp_path)
+    script = tmp_path / "agent.sh"
+    # Emit result, then background both a snapshot-like helper and a 'real' sleep.
+    # exec -a renames the subprocess's argv[0] so the pattern can match it.
+    script.write_text(
+        "#!/bin/bash\n"
+        'echo \'{"type":"result","is_error":false}\'\n'
+        "exec -a snapshot-bash-test sleep 30 &\n"
+        "sleep 30 &\n"
+        "wait\n",
+        encoding="utf-8",
+    )
+    script.chmod(0o755)
+    cfg = _make_grace_config_with_patterns(
+        tmp_path, script, grace_s=1, patterns=["snapshot-bash-test"]
+    )
+    result = run_one_round(cfg)
+    assert result.killed_for_grace is False  # real worker kept it alive
+    assert result.timed_out is True  # wall-clock reaped it
+    events_list = read_events_for_current_month(cfg.runtime.log_dir)
+    extended_events = [e for e in events_list if e.get("event") == "round_grace_extended"]
+    assert len(extended_events) == 1
+    ev = extended_events[0]
+    # The plain sleep goes to live_children (real worker)
+    assert any("sleep" in c for c in ev["live_children"])
+    # The exec -a snapshot-bash-test process goes to ignored_children
+    assert any("snapshot-bash-test" in c for c in ev["ignored_children"])
+    # round_grace_kill must NOT appear (real worker still alive)
+    grace_kill_events = [e for e in events_list if e.get("event") == "round_grace_kill"]
+    assert len(grace_kill_events) == 0

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/tests/invariants/test_architecture.py RENAMED Viewed

@@ -118,21 +118,8 @@ def test_given_api_types_when_inspected_then_all_frozen_dataclasses() -> None:
         assert cls.__dataclass_params__.frozen, f"{name} not frozen"
-def test_given_known_alert_kinds_when_inspected_then_matches_twelve_detectors() -> None:
+def test_given_known_alert_kinds_when_inspected_then_well_formed() -> None:
     from agent_runner.monitor import KNOWN_ALERT_KINDS
-    expected = {
-        "timeout_rate",
-        "hung",
-        "orphan_chain",
-        "disk_warning",
-        "disk_critical",
-        "mem_pressure",
-        "smoke_fail_rate",
-        "oauth_fail",
-        "network_fail",
-        "rate_limit_active",
-        "anomaly_repetitive_active",
-        "supervisor_stale",
-    }
-    assert KNOWN_ALERT_KINDS == expected
+    assert len(KNOWN_ALERT_KINDS) == 12
+    assert all(re.fullmatch(r"[a-z][a-z0-9_]*", k) for k in KNOWN_ALERT_KINDS)

{cli_agent_runner-0.1.38 → cli_agent_runner-0.1.39}/tests/invariants/test_catalogs.py RENAMED Viewed

@@ -26,7 +26,6 @@ def test_given_defenses_catalog_when_loaded_then_each_entry_has_required_fields(
         phases=None,
     )
     cat = catalog(cfg)
-    assert len(cat) == 11
     for d in cat:
         assert d.name and isinstance(d.name, str)
         assert d.current_state in {"active", "degraded", "off"}

cli-agent-runner 0.1.38__tar.gz → 0.1.39__tar.gz

cli-agent-runner 0.1.38tar.gz → 0.1.39tar.gz