cli-agent-runner 0.1.40__tar.gz → 0.1.42__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/CHANGELOG.md +21 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/PKG-INFO +5 -5
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/README.md +4 -4
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/README.zh.md +5 -5
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/_emit.py +23 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/_version.py +2 -2
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/api.py +56 -1
- cli_agent_runner-0.1.42/agent_runner/builtin_plugins/codewhale.py +133 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/init_cmd.py +13 -1
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/serve_cmd.py +26 -5
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/defenses.py +12 -2
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/events.py +2 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/monitor.py +0 -25
- cli_agent_runner-0.1.42/agent_runner/presets/codewhale.toml +30 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/runner.py +5 -2
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/scaffold.py +2 -2
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/vcs_state.py +51 -3
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/architecture.md +7 -5
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/commands.md +2 -2
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/configuration.md +7 -4
- cli_agent_runner-0.1.42/docs/migrations/0.1.42.md +58 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/quickstart.md +1 -1
- cli_agent_runner-0.1.42/docs/recipes/codewhale.md +98 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/thesis.md +38 -8
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/pyproject.toml +1 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/_test_helpers.py +8 -3
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_bounded_run.py +10 -2
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_fresh_eyes_signal.py +2 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_scaffold_presets.py +2 -2
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_substrate_fingerprint.py +5 -1
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_architecture.py +4 -1
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_doc_claims_match_ssot.py +11 -0
- cli_agent_runner-0.1.42/tests/unit/test_codewhale_plugin.py +155 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_defenses.py +2 -2
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_monitor_detectors.py +1 -18
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_presets.py +30 -2
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_runner.py +2 -2
- cli_agent_runner-0.1.42/tests/unit/test_serve_config_broken.py +33 -0
- cli_agent_runner-0.1.42/tests/unit/test_serve_crash_loop.py +128 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_vcs_state.py +69 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.codecov.yml +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.github/workflows/ci.yml +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.github/workflows/release.yml +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.gitignore +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.vulture-whitelist.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/CODE_OF_CONDUCT.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/CONTRIBUTING.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/LICENSE +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/SECURITY.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/__init__.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/_docgen.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/_redact.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/_registry.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/_substrate.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/_throttle.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/agent_runtime.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/api_types.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/builtin_plugins/__init__.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/builtin_plugins/_constants.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/builtin_plugins/claude_rate_limit.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/builtin_plugins/gemini.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/__init__.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/__main__.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/common.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/events_cmd.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/install_cmd.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/monitor_cmd.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/peek_cmd.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/round_cmd.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/service_cmd.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/upgrade_cmd.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/config.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/context_store.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/detector_helpers.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/hooks.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/http_progress.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/lifecycle.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/metrics.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/presets/__init__.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/presets/aider.toml +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/presets/claude.toml +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/presets/gemini.toml +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/prompt_loader.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/round_log.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/round_view.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/service_unit.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/startup_check.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/build.sh +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/deploy/example-agent-runner.toml +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/deploy/launchd.plist.tmpl +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/deploy/run-loop.sh +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/deploy/systemd.service.tmpl +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/README.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/events.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/long-running-agents.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/marketing/README.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/marketing/promo-cn.html +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.16.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.17.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.19.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.20.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.21.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.22.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.23.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.24.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.25.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.26.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.27.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.28.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.29.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.30.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.31.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.32.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.33.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.34.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.35.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.36.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.37.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.38.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.39.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.40.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/plugins.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/recipes/aider.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/runbook.md +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/__init__.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/conftest.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/contract/__init__.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/contract/test_public_api_surface.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/e2e/__init__.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/e2e/conftest.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/e2e/test_e2e_graceful_stop.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/e2e/test_e2e_install_systemd.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/e2e/test_e2e_monitor_remote.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/e2e/test_e2e_round_lifecycle.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/fixtures/cli-real-output/claude-2.1.143-assistant-tool-use.jsonl +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/fixtures/cli-real-output/claude-2.1.143-result-event.jsonl +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/fixtures/cli-real-output/gemini-0.42.0-result-event.jsonl +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/__init__.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_context_enricher_namespacing.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_grace_kill_emission.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_install_dry_run.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_monitor_seeded.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_plugin_detector_loaded.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_plugin_owned_paths.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_plugin_real_flow.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_run_one_round_with_fake_agent.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_serve_loop.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_transient_error_backoff.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/__init__.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_atomic_write_enforced.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_catalogs.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_classification_ssot.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_docs_generated.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_entry_points_resolve.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_event_kind_registry.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_event_kinds_ssot.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_events_doc_contract.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_layer_2_loop_size.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_module_boundaries.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_module_sizes.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_no_ai_signatures.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_no_pytest_skip_on_parse_fail.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_peek_schema_version.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_repo_constants_patched_in_tests.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_round_result_stable.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_stash_uses_sha_not_index.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_upstream_schema_canary.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/literate/__init__.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/literate/parser.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/literate/test_parser.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/literate/test_quickstart.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/__init__.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_agent_runtime.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_agent_runtime_grace.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_agent_runtime_progress.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_assemble_prompt.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_events_stream.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_install.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_observation.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_read_round_num.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_resolve_phase.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_service.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_types.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_auto_stop_gating.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_claude_error_detector.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_cli.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_cli_common.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_cli_init_install.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_cli_monitor_http.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_cli_service_peek_monitor.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_cli_upgrade.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_config.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_config_fresh_eyes.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_config_max_rounds.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_config_stop_file.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_config_substrate_fingerprint_paths.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_config_transient_error_action.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_context_store.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_detector_helpers.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_detector_protocol.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_docgen.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_events.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_events_cmd.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_fresh_eyes_trigger.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_gemini_plugin.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_hook_failure_isolation.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_hooks.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_http_progress.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_init_entry_points.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_lifecycle.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_metrics.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_monitor_assembly.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_monitor_detect_anomaly_repetitive.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_monitor_detect_rate_limit.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_monitor_detect_supervisor_stale.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_monitor_remote.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_peek_argparse.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_peek_select.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_prompt_loader.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_redact.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_round_log_helpers.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_round_view.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_runner_throttle.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_scaffold.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_serve_cmd_bounded.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_serve_round_log.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_serve_sentinel.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_serve_startup_hooks.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_service_unit.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_startup_check.py +0 -0
- {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_substrate.py +0 -0
|
@@ -5,6 +5,27 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.1.42] - 2026-06-25
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- `crash_loop` defense — serve stops after 5 consecutive *unknown* short crashes (non-zero exit, <60s, no classified transient), escalating the restart delay and recording the failure reason. Ends the respawn-forever crash loop; recoverable-slow failures (rate-limit / quota / 5xx / timeout) still ride the transient-error backoff unchanged.
|
|
12
|
+
- `config_broken` defense — a permanent startup-battery failure now halts serve (distinct no-retry exit code `78`) instead of respawning a broken config every round.
|
|
13
|
+
|
|
14
|
+
### Fixed
|
|
15
|
+
- `vcs.dirty_action` no longer sweeps the runner's own `log_dir` bookkeeping when `log_dir` is inside `work_dir`: `auto_commit` excludes it from the commit (no more phantom `git_head` advance on a zero-work round) and `stash` excludes it from `git stash push -u` (logs no longer vanish). `.evolving/` and agent work are unaffected.
|
|
16
|
+
|
|
17
|
+
### Removed
|
|
18
|
+
- The inert `smoke_fail_rate` monitor alert (could never fire — superseded by the always-on `config_broken` stop). Monitor now ships 11 detectors.
|
|
19
|
+
|
|
20
|
+
### Docs
|
|
21
|
+
- `thesis.md`: the stuck-loop defense is described honestly as a notify-level, opt-in-to-auto-stop monitor detector (`anomaly_repetitive_active`), not a default hard-stop; fixed the `stuck_loop_detected` naming drift.
|
|
22
|
+
|
|
23
|
+
## [0.1.41] - 2026-06-07
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
- New `codewhale` preset — supervise Hmbown/CodeWhale (DeepSeek terminal agent) via `codewhale exec --auto --output-format stream-json`. `agent-runner init --preset codewhale`.
|
|
27
|
+
- New built-in `codewhale_error_detector` plugin — emits `agent_usage_recorded` (model + token counts) from codewhale's stream-json output. Transient-error classification is best-effort (mappable buckets only); auth failures surface via the existing monitor `oauth_fail` detector.
|
|
28
|
+
|
|
8
29
|
## [0.1.40] - 2026-05-31
|
|
9
30
|
|
|
10
31
|
### Security
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cli-agent-runner
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.42
|
|
4
4
|
Summary: Restart-on-exit supervisor for autonomous CLI agents
|
|
5
5
|
Project-URL: Homepage, https://github.com/wan9yu/cli-agent-runner
|
|
6
6
|
Project-URL: Documentation, https://github.com/wan9yu/cli-agent-runner#readme
|
|
@@ -49,7 +49,7 @@ full disks, runaway memory.
|
|
|
49
49
|
|
|
50
50
|
```
|
|
51
51
|
┌──────────────────────────────────────────┐
|
|
52
|
-
│ Layer 3: The Witness (monitor) │
|
|
52
|
+
│ Layer 3: The Witness (monitor) │ 11 detectors + auto-stop
|
|
53
53
|
├──────────────────────────────────────────┤
|
|
54
54
|
│ Layer 2: The Loop (serve, ~120 LOC) │ signal-trapping restart loop
|
|
55
55
|
├──────────────────────────────────────────┤
|
|
@@ -86,14 +86,14 @@ Full walkthrough: [`docs/quickstart.md`](docs/quickstart.md).
|
|
|
86
86
|
|---|---|
|
|
87
87
|
| `init` / `install` / `uninstall` | `peek` — state snapshot |
|
|
88
88
|
| `start` / `stop` / `kill` / `cancel` | `watch` — peek in a refresh loop |
|
|
89
|
-
| `restart` / `status` | `monitor` —
|
|
89
|
+
| `restart` / `status` | `monitor` — 11 detectors, alerts, auto-stop |
|
|
90
90
|
| `round` / `serve` / `upgrade` | `events` — query / stream events.jsonl |
|
|
91
91
|
|
|
92
92
|
Verb reference: [`docs/commands.md`](docs/commands.md).
|
|
93
93
|
|
|
94
94
|
## Defenses (built in)
|
|
95
95
|
|
|
96
|
-
|
|
96
|
+
12 named defenses, structured as data — see `agent-runner peek --select defenses`.
|
|
97
97
|
Each carries the historical incident it codifies and the invariant test that
|
|
98
98
|
guards it. Highlights:
|
|
99
99
|
|
|
@@ -106,7 +106,7 @@ guards it. Highlights:
|
|
|
106
106
|
|
|
107
107
|
Full list and rationale: [`docs/architecture.md`](docs/architecture.md).
|
|
108
108
|
|
|
109
|
-
## Monitor:
|
|
109
|
+
## Monitor: 11 detectors
|
|
110
110
|
|
|
111
111
|
Notify only: `timeout_rate`, `hung`, `orphan_chain`, `disk_warning`,
|
|
112
112
|
`mem_pressure`, `smoke_fail_rate`, `network_fail`, `rate_limit_active`,
|
|
@@ -12,7 +12,7 @@ full disks, runaway memory.
|
|
|
12
12
|
|
|
13
13
|
```
|
|
14
14
|
┌──────────────────────────────────────────┐
|
|
15
|
-
│ Layer 3: The Witness (monitor) │
|
|
15
|
+
│ Layer 3: The Witness (monitor) │ 11 detectors + auto-stop
|
|
16
16
|
├──────────────────────────────────────────┤
|
|
17
17
|
│ Layer 2: The Loop (serve, ~120 LOC) │ signal-trapping restart loop
|
|
18
18
|
├──────────────────────────────────────────┤
|
|
@@ -49,14 +49,14 @@ Full walkthrough: [`docs/quickstart.md`](docs/quickstart.md).
|
|
|
49
49
|
|---|---|
|
|
50
50
|
| `init` / `install` / `uninstall` | `peek` — state snapshot |
|
|
51
51
|
| `start` / `stop` / `kill` / `cancel` | `watch` — peek in a refresh loop |
|
|
52
|
-
| `restart` / `status` | `monitor` —
|
|
52
|
+
| `restart` / `status` | `monitor` — 11 detectors, alerts, auto-stop |
|
|
53
53
|
| `round` / `serve` / `upgrade` | `events` — query / stream events.jsonl |
|
|
54
54
|
|
|
55
55
|
Verb reference: [`docs/commands.md`](docs/commands.md).
|
|
56
56
|
|
|
57
57
|
## Defenses (built in)
|
|
58
58
|
|
|
59
|
-
|
|
59
|
+
12 named defenses, structured as data — see `agent-runner peek --select defenses`.
|
|
60
60
|
Each carries the historical incident it codifies and the invariant test that
|
|
61
61
|
guards it. Highlights:
|
|
62
62
|
|
|
@@ -69,7 +69,7 @@ guards it. Highlights:
|
|
|
69
69
|
|
|
70
70
|
Full list and rationale: [`docs/architecture.md`](docs/architecture.md).
|
|
71
71
|
|
|
72
|
-
## Monitor:
|
|
72
|
+
## Monitor: 11 detectors
|
|
73
73
|
|
|
74
74
|
Notify only: `timeout_rate`, `hung`, `orphan_chain`, `disk_warning`,
|
|
75
75
|
`mem_pressure`, `smoke_fail_rate`, `network_fail`, `rate_limit_active`,
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
把任意 CLI agent(Claude Code、自研 agent、任何长跑命令)包装成可被
|
|
8
8
|
systemd / launchd 拉起、能被远程观测的服务。**每轮跑完进程退出**,外层
|
|
9
|
-
supervisor 重启 —— 这是核心模式。中间穿插
|
|
9
|
+
supervisor 重启 —— 这是核心模式。中间穿插 12 条防御,避开 production 上
|
|
10
10
|
最容易翻车的几条路:
|
|
11
11
|
|
|
12
12
|
- 轮卡死、Tool 调用空转 → 硬墙 timeout
|
|
@@ -20,7 +20,7 @@ supervisor 重启 —— 这是核心模式。中间穿插 11 条防御,避开
|
|
|
20
20
|
|
|
21
21
|
```
|
|
22
22
|
┌──────────────────────────────────────────┐
|
|
23
|
-
│ Layer 3:Witness(monitor) │
|
|
23
|
+
│ Layer 3:Witness(monitor) │ 11 个检测器 + 自动停服
|
|
24
24
|
├──────────────────────────────────────────┤
|
|
25
25
|
│ Layer 2:Loop(serve,~120 LOC 薄壳) │ 捕获信号,循环拉起 round
|
|
26
26
|
├──────────────────────────────────────────┤
|
|
@@ -63,7 +63,7 @@ agent-runner monitor # 实时异常检测,OAuth/磁盘 critical
|
|
|
63
63
|
|---|---|
|
|
64
64
|
| `init` / `install` / `uninstall` | `peek` —— 项目状态快照 |
|
|
65
65
|
| `start` / `stop` / `kill` / `cancel` | `watch` —— peek 在刷新循环里 |
|
|
66
|
-
| `restart` / `status` | `monitor` ——
|
|
66
|
+
| `restart` / `status` | `monitor` —— 11 个检测器 + 告警 + 自动停服 |
|
|
67
67
|
| `round` / `serve` / `upgrade` | `events` —— 查询 / 流式订阅 events.jsonl |
|
|
68
68
|
|
|
69
69
|
**停服三动词**有清晰的语义分层:
|
|
@@ -73,7 +73,7 @@ agent-runner monitor # 实时异常检测,OAuth/磁盘 critical
|
|
|
73
73
|
|
|
74
74
|
动词参考:[`docs/commands.md`](docs/commands.md)。
|
|
75
75
|
|
|
76
|
-
## 内置防御(
|
|
76
|
+
## 内置防御(12 条)
|
|
77
77
|
|
|
78
78
|
防御以数据形式定义在 `agent_runner/defenses.py`,可通过
|
|
79
79
|
`agent-runner peek --select defenses` 直接拿到。每条防御自带:
|
|
@@ -95,7 +95,7 @@ agent-runner monitor # 实时异常检测,OAuth/磁盘 critical
|
|
|
95
95
|
|
|
96
96
|
完整列表 + 历史出处:[`docs/architecture.md`](docs/architecture.md)。
|
|
97
97
|
|
|
98
|
-
## Monitor:
|
|
98
|
+
## Monitor:11 个检测器
|
|
99
99
|
|
|
100
100
|
**只告警**(warning 级,服务继续跑):
|
|
101
101
|
`timeout_rate` / `hung` / `orphan_chain` / `disk_warning` /
|
|
@@ -45,6 +45,29 @@ def emit_max_rounds_reached(log_dir: Path, *, rounds_completed: int, max_rounds:
|
|
|
45
45
|
emit(log_dir, MAX_ROUNDS_REACHED, rounds_completed=rounds_completed, max_rounds=max_rounds)
|
|
46
46
|
|
|
47
47
|
|
|
48
|
+
def emit_config_broken(log_dir: Path, *, reason: str) -> None:
|
|
49
|
+
"""Emit config_broken (serve stopped on a permanent startup-battery failure)."""
|
|
50
|
+
from agent_runner.events import CONFIG_BROKEN, emit
|
|
51
|
+
|
|
52
|
+
emit(log_dir, CONFIG_BROKEN, reason=reason)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def emit_crash_loop(log_dir: Path, *, consecutive: int, exit_code: int, log_path: Path) -> None:
|
|
56
|
+
"""Emit crash_loop (serve stopped after consecutive unknown short crashes).
|
|
57
|
+
|
|
58
|
+
Captures the failure reason — a redacted tail of the round log — so a
|
|
59
|
+
recurring unknown crash can later be classified into a transient bucket.
|
|
60
|
+
"""
|
|
61
|
+
from agent_runner._redact import redact_secrets
|
|
62
|
+
from agent_runner.events import CRASH_LOOP, emit
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
reason = redact_secrets(log_path.read_text(errors="replace")[-2000:])
|
|
66
|
+
except OSError:
|
|
67
|
+
reason = ""
|
|
68
|
+
emit(log_dir, CRASH_LOOP, consecutive=consecutive, exit_code=exit_code, reason=reason)
|
|
69
|
+
|
|
70
|
+
|
|
48
71
|
def emit_stop_file_detected(
|
|
49
72
|
log_dir: Path, *, stop_file: Path, content: str, rounds_completed: int
|
|
50
73
|
) -> None:
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.1.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
21
|
+
__version__ = version = '0.1.42'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 1, 42)
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -18,7 +18,7 @@ import sysconfig
|
|
|
18
18
|
import time
|
|
19
19
|
from collections.abc import Iterator
|
|
20
20
|
from pathlib import Path
|
|
21
|
-
from typing import Any
|
|
21
|
+
from typing import Any, Literal
|
|
22
22
|
|
|
23
23
|
from agent_runner import events, lifecycle
|
|
24
24
|
from agent_runner.api_types import (
|
|
@@ -45,6 +45,59 @@ from agent_runner.service_unit import (
|
|
|
45
45
|
serve_unit_filename,
|
|
46
46
|
)
|
|
47
47
|
|
|
48
|
+
# Exit code for a permanent (no-retry) startup-battery failure. A broken config
|
|
49
|
+
# does not self-heal between rounds, so serve STOPS rather than respawning it
|
|
50
|
+
# forever. 78 = EX_CONFIG (sysexits) — avoids argparse's 2 and the generic 1.
|
|
51
|
+
# Lives here (not runner.py) so serve_cmd can import it from the sanctioned api
|
|
52
|
+
# facade without coupling to runner (runner imports api, not the reverse).
|
|
53
|
+
PERMANENT_CONFIG_EXIT = 78
|
|
54
|
+
|
|
55
|
+
# Crash-loop circuit breaker (b12). The serve loop escalates the restart delay
|
|
56
|
+
# on consecutive UNKNOWN short crashes (non-zero exit, short duration, no
|
|
57
|
+
# classified transient) and STOPS after CRASH_LOOP_THRESHOLD of them — the Run 6
|
|
58
|
+
# ~100-empty-rounds scar. Recoverable-slow failures (rate limit / 5h quota / 5xx
|
|
59
|
+
# / timeout) are already handled by the transient-error throttle and never reach
|
|
60
|
+
# this path. A clean (exit 0), long, or classified-transient round resets the run.
|
|
61
|
+
CRASH_LOOP_THRESHOLD = 5
|
|
62
|
+
CRASH_LOOP_SHORT_EXIT_S = 60 # mirrors monitor.SHORT_EXIT_THRESHOLD_S
|
|
63
|
+
CRASH_LOOP_MAX_DELAY_S = 1800 # cap the escalating restart delay (30 min)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def post_round_decision(
|
|
67
|
+
*,
|
|
68
|
+
returncode: int,
|
|
69
|
+
duration_s: float,
|
|
70
|
+
throttle_active: bool,
|
|
71
|
+
consecutive: int,
|
|
72
|
+
restart_delay_s: int,
|
|
73
|
+
) -> tuple[Literal["config_broken", "crash_loop", "continue"], int, int]:
|
|
74
|
+
"""Restart policy after one round — keeps the serve loop a thin dispatcher.
|
|
75
|
+
|
|
76
|
+
Returns ``(action, delay_s, consecutive)`` where action is:
|
|
77
|
+
- ``"config_broken"`` — permanent startup failure (b18): stop.
|
|
78
|
+
- ``"crash_loop"`` — CRASH_LOOP_THRESHOLD consecutive unknown short crashes
|
|
79
|
+
(b12): stop. An unknown short crash is a non-zero, fast exit with no
|
|
80
|
+
classified transient (rate-limit/5xx/timeout are handled by the throttle).
|
|
81
|
+
- ``"continue"`` — sleep ``delay_s`` then run the next round.
|
|
82
|
+
|
|
83
|
+
A clean (exit 0), long, or transient round resets ``consecutive`` to 0; an
|
|
84
|
+
unknown short crash escalates the delay (restart × 2ⁿ, capped) until the stop.
|
|
85
|
+
"""
|
|
86
|
+
if returncode == PERMANENT_CONFIG_EXIT:
|
|
87
|
+
return ("config_broken", 0, consecutive)
|
|
88
|
+
unknown_short_crash = (
|
|
89
|
+
returncode != 0 and duration_s < CRASH_LOOP_SHORT_EXIT_S and not throttle_active
|
|
90
|
+
)
|
|
91
|
+
if unknown_short_crash:
|
|
92
|
+
consecutive += 1
|
|
93
|
+
if consecutive >= CRASH_LOOP_THRESHOLD:
|
|
94
|
+
return ("crash_loop", 0, consecutive)
|
|
95
|
+
delay = min(restart_delay_s * 2**consecutive, CRASH_LOOP_MAX_DELAY_S)
|
|
96
|
+
return ("continue", delay, consecutive)
|
|
97
|
+
delay = restart_delay_s if returncode == 0 else restart_delay_s * 2
|
|
98
|
+
return ("continue", delay, 0)
|
|
99
|
+
|
|
100
|
+
|
|
48
101
|
_PROJECT_NAME_RE = re.compile(r"^[A-Za-z0-9._-]+$")
|
|
49
102
|
|
|
50
103
|
_LINGER_HINT = (
|
|
@@ -730,6 +783,8 @@ def check_self_terminated_sentinel(log_dir: Path) -> bool:
|
|
|
730
783
|
from agent_runner._emit import ( # noqa: E402,F401 — intentional bottom re-export
|
|
731
784
|
emit_agent_usage_recorded,
|
|
732
785
|
emit_anomaly_repetitive_tool,
|
|
786
|
+
emit_config_broken,
|
|
787
|
+
emit_crash_loop,
|
|
733
788
|
emit_fresh_eyes_round_triggered,
|
|
734
789
|
emit_max_rounds_reached,
|
|
735
790
|
emit_rate_limit_stop,
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Built-in post_round_hook for codewhale CLI: usage events + transient classifier.
|
|
2
|
+
|
|
3
|
+
Third built-in plugin (after claude, gemini). Parses codewhale's `exec
|
|
4
|
+
--output-format stream-json` NDJSON stdout tail; emits agent_usage_recorded
|
|
5
|
+
from the terminal metadata record. Transient-error classification is
|
|
6
|
+
best-effort and emits ONLY when an error maps to an existing bucket (like
|
|
7
|
+
gemini): codewhale's exec stdout surfaces a {"type":"error"} record, but the
|
|
8
|
+
only observed case so far is auth failure (oauth_fail territory, not a
|
|
9
|
+
transient bucket), so nothing maps yet -- usage-only today. 429/5xx mapping
|
|
10
|
+
is added when a real rate-limit sample is captured.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import time
|
|
17
|
+
from collections import deque
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from agent_runner.api import (
|
|
22
|
+
emit_agent_usage_recorded,
|
|
23
|
+
emit_transient_error_detected,
|
|
24
|
+
)
|
|
25
|
+
from agent_runner.builtin_plugins._constants import (
|
|
26
|
+
_5XX_STATUSES,
|
|
27
|
+
_BACK_OFF_DEFAULTS,
|
|
28
|
+
_RAW_CAP,
|
|
29
|
+
_TAIL_LINES,
|
|
30
|
+
)
|
|
31
|
+
from agent_runner.hooks import HookContext, register_post_round_hook
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class CodewhaleErrorDetector:
|
|
35
|
+
"""Parse codewhale round log tail; emit usage + transient_error_detected events."""
|
|
36
|
+
|
|
37
|
+
name = "codewhale_error_detector"
|
|
38
|
+
|
|
39
|
+
def after_round(self, ctx: HookContext, result: Any) -> None:
|
|
40
|
+
if ctx.agent_binary != "codewhale":
|
|
41
|
+
return
|
|
42
|
+
log_path = ctx.agent_log_path
|
|
43
|
+
if log_path is None or not log_path.exists():
|
|
44
|
+
return
|
|
45
|
+
parsed = _parse_codewhale_log(log_path)
|
|
46
|
+
if parsed.get("transient_error"):
|
|
47
|
+
emit_transient_error_detected(
|
|
48
|
+
ctx.log_dir, round_num=ctx.round_num, **parsed["transient_error"]
|
|
49
|
+
)
|
|
50
|
+
if parsed.get("usage"):
|
|
51
|
+
emit_agent_usage_recorded(
|
|
52
|
+
ctx.log_dir,
|
|
53
|
+
round_num=ctx.round_num,
|
|
54
|
+
phase=ctx.phase or "",
|
|
55
|
+
success=(result.exit_code == 0 and not result.timed_out),
|
|
56
|
+
**parsed["usage"],
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _parse_codewhale_log(log_path: Path) -> dict[str, Any]:
|
|
61
|
+
"""Scan last _TAIL_LINES of codewhale NDJSON; extract usage from the metadata
|
|
62
|
+
record; classify any {"type":"error"} that maps to a transient bucket.
|
|
63
|
+
|
|
64
|
+
Tolerates non-JSON lines (codewhale prefixes some stdout with terminal
|
|
65
|
+
escapes) via per-line try/except.
|
|
66
|
+
"""
|
|
67
|
+
with log_path.open("r", encoding="utf-8", errors="replace") as f:
|
|
68
|
+
tail = deque(f, maxlen=_TAIL_LINES)
|
|
69
|
+
metadata: dict | None = None
|
|
70
|
+
error_event: dict | None = None
|
|
71
|
+
for line in tail:
|
|
72
|
+
line = line.strip()
|
|
73
|
+
if not line:
|
|
74
|
+
continue
|
|
75
|
+
try:
|
|
76
|
+
event = json.loads(line)
|
|
77
|
+
except json.JSONDecodeError:
|
|
78
|
+
continue
|
|
79
|
+
if not isinstance(event, dict):
|
|
80
|
+
continue
|
|
81
|
+
etype = event.get("type")
|
|
82
|
+
if etype == "metadata":
|
|
83
|
+
metadata = event.get("meta") or {}
|
|
84
|
+
elif etype == "error":
|
|
85
|
+
error_event = event
|
|
86
|
+
|
|
87
|
+
out: dict[str, Any] = {}
|
|
88
|
+
|
|
89
|
+
if metadata:
|
|
90
|
+
out["usage"] = {
|
|
91
|
+
"agent": "codewhale",
|
|
92
|
+
"model": str(metadata.get("model", "unknown")),
|
|
93
|
+
"input_tokens": int(metadata.get("input_tokens", 0)),
|
|
94
|
+
"output_tokens": int(metadata.get("output_tokens", 0)),
|
|
95
|
+
"cached_tokens": 0, # codewhale exec stdout exposes no cache counts
|
|
96
|
+
"cost_usd": None, # codewhale exec stdout exposes no USD
|
|
97
|
+
"duration_ms": 0, # not in exec metadata
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if error_event is not None:
|
|
101
|
+
classification = _classify_codewhale_error(error_event)
|
|
102
|
+
if classification:
|
|
103
|
+
duration = _BACK_OFF_DEFAULTS[classification]
|
|
104
|
+
out["transient_error"] = {
|
|
105
|
+
"classification": classification,
|
|
106
|
+
"agent": "codewhale",
|
|
107
|
+
"reset_at_epoch": int(time.time() + duration),
|
|
108
|
+
"raw": str(error_event.get("error", "error"))[:_RAW_CAP],
|
|
109
|
+
}
|
|
110
|
+
return out
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _classify_codewhale_error(error_event: dict[str, Any]) -> str | None:
|
|
114
|
+
"""Map a codewhale {"type":"error"} record to a transient bucket, or None.
|
|
115
|
+
|
|
116
|
+
None means 'not a transient error' (e.g. auth failure -> handled by the
|
|
117
|
+
monitor's oauth_fail log-scan, not the transient classifier). codewhale's
|
|
118
|
+
error record currently carries only a free-text 'error' string with no
|
|
119
|
+
status code; until a real rate-limit/5xx sample is captured we cannot map
|
|
120
|
+
to rate_limit_model / api_transient_5xx / api_timeout, so we return None.
|
|
121
|
+
A future revision keys on a numeric status field once observed.
|
|
122
|
+
"""
|
|
123
|
+
code = error_event.get("code") or error_event.get("status_code")
|
|
124
|
+
if code == 429:
|
|
125
|
+
return "rate_limit_model"
|
|
126
|
+
if code in _5XX_STATUSES:
|
|
127
|
+
return "api_transient_5xx"
|
|
128
|
+
if code == 408:
|
|
129
|
+
return "api_timeout"
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
register_post_round_hook(CodewhaleErrorDetector())
|
|
@@ -2,15 +2,27 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import importlib.resources
|
|
6
|
+
|
|
5
7
|
from agent_runner import api
|
|
6
8
|
from agent_runner.cli.common import emit, fail, work_dir_from_args
|
|
7
9
|
|
|
8
10
|
|
|
11
|
+
def _preset_names() -> list[str]:
|
|
12
|
+
"""Discover scaffold presets from the shipped ``agent_runner/presets/*.toml``.
|
|
13
|
+
|
|
14
|
+
Derived (not hardcoded) so adding a preset is a single new .toml file — the
|
|
15
|
+
``--preset`` choices and validation track the filesystem automatically.
|
|
16
|
+
"""
|
|
17
|
+
presets = importlib.resources.files("agent_runner.presets")
|
|
18
|
+
return sorted(p.name[:-5] for p in presets.iterdir() if p.name.endswith(".toml"))
|
|
19
|
+
|
|
20
|
+
|
|
9
21
|
def add_parser(sub, parent) -> None:
|
|
10
22
|
p = sub.add_parser("init", parents=[parent], help="Scaffold agent-runner project files")
|
|
11
23
|
p.add_argument(
|
|
12
24
|
"--preset",
|
|
13
|
-
choices=
|
|
25
|
+
choices=_preset_names(),
|
|
14
26
|
default="claude",
|
|
15
27
|
help="Which agent CLI preset to scaffold (default: claude)",
|
|
16
28
|
)
|
|
@@ -23,12 +23,15 @@ from agent_runner._throttle import _check_throttle_state
|
|
|
23
23
|
from agent_runner._throttle import reset_counters as _reset_counters
|
|
24
24
|
from agent_runner.api import (
|
|
25
25
|
check_self_terminated_sentinel,
|
|
26
|
+
emit_config_broken,
|
|
27
|
+
emit_crash_loop,
|
|
26
28
|
emit_fresh_eyes_round_triggered,
|
|
27
29
|
emit_max_rounds_reached,
|
|
28
30
|
emit_rate_limit_stop,
|
|
29
31
|
emit_round_substrate_after,
|
|
30
32
|
emit_round_substrate_before,
|
|
31
33
|
emit_stop_file_detected,
|
|
34
|
+
post_round_decision,
|
|
32
35
|
)
|
|
33
36
|
from agent_runner.cli.common import cfg_from_args
|
|
34
37
|
from agent_runner.hooks import run_serve_startup_hooks
|
|
@@ -135,6 +138,7 @@ def cmd(args) -> int:
|
|
|
135
138
|
stop_file = cfg.runtime.stop_file # cache: same pattern as effective_max_rounds
|
|
136
139
|
work_dir = cfg.runtime.work_dir
|
|
137
140
|
rounds_completed = 0
|
|
141
|
+
consecutive_crashes = 0 # b12: consecutive UNKNOWN short crashes (crash-loop breaker)
|
|
138
142
|
|
|
139
143
|
try:
|
|
140
144
|
pid_file.write(os.getpid())
|
|
@@ -197,6 +201,7 @@ def cmd(args) -> int:
|
|
|
197
201
|
every_n=cfg.runtime.fresh_eyes_every_n,
|
|
198
202
|
)
|
|
199
203
|
round_log_path = log_dir / f"round-{round_num}.log"
|
|
204
|
+
round_started = time.monotonic()
|
|
200
205
|
with round_log_path.open("w") as f:
|
|
201
206
|
r = subprocess.run(
|
|
202
207
|
[
|
|
@@ -211,6 +216,7 @@ def cmd(args) -> int:
|
|
|
211
216
|
stdout=f,
|
|
212
217
|
stderr=subprocess.STDOUT,
|
|
213
218
|
)
|
|
219
|
+
round_duration_s = time.monotonic() - round_started
|
|
214
220
|
atomic_relink(log_dir / ROUND_CURRENT_LINK, round_log_path)
|
|
215
221
|
git_head_after = compute_git_head(work_dir)
|
|
216
222
|
paths_hash_after = compute_paths_hash(work_dir, cfg.runtime.substrate_fingerprint_paths)
|
|
@@ -221,13 +227,28 @@ def cmd(args) -> int:
|
|
|
221
227
|
paths_hash=paths_hash_after,
|
|
222
228
|
)
|
|
223
229
|
rounds_completed += 1
|
|
230
|
+
# Restart policy (config_broken / crash_loop / continue) lives in the
|
|
231
|
+
# tested api.post_round_decision helper so this loop stays thin.
|
|
232
|
+
action, delay, consecutive_crashes = post_round_decision(
|
|
233
|
+
returncode=r.returncode,
|
|
234
|
+
duration_s=round_duration_s,
|
|
235
|
+
throttle_active=_check_throttle_state(log_dir) is not None,
|
|
236
|
+
consecutive=consecutive_crashes,
|
|
237
|
+
restart_delay_s=cfg.runtime.restart_delay_s,
|
|
238
|
+
)
|
|
239
|
+
if action == "config_broken":
|
|
240
|
+
emit_config_broken(log_dir, reason="startup battery permanent failure")
|
|
241
|
+
break
|
|
242
|
+
if action == "crash_loop":
|
|
243
|
+
emit_crash_loop(
|
|
244
|
+
log_dir,
|
|
245
|
+
consecutive=consecutive_crashes,
|
|
246
|
+
exit_code=r.returncode,
|
|
247
|
+
log_path=round_log_path,
|
|
248
|
+
)
|
|
249
|
+
break
|
|
224
250
|
if args.once or stop["requested"]:
|
|
225
251
|
break
|
|
226
|
-
delay = (
|
|
227
|
-
cfg.runtime.restart_delay_s
|
|
228
|
-
if r.returncode == 0
|
|
229
|
-
else cfg.runtime.restart_delay_s * 2
|
|
230
|
-
)
|
|
231
252
|
time.sleep(delay)
|
|
232
253
|
finally:
|
|
233
254
|
pid_file.unlink()
|
|
@@ -83,8 +83,18 @@ def catalog(cfg: Config) -> list[Defense]:
|
|
|
83
83
|
Defense(
|
|
84
84
|
name="startup_smoke_check",
|
|
85
85
|
value="6 checks (config / log_dir / agent_cli / git / prompt_file / prompt_smoke)",
|
|
86
|
-
codifies=
|
|
87
|
-
|
|
86
|
+
codifies=(
|
|
87
|
+
"R721 + #446 — _common.md frontmatter caused 4h/123-round silent burn; "
|
|
88
|
+
"now halts serve (config_broken) instead of respawning a broken config"
|
|
89
|
+
),
|
|
90
|
+
guarded_by=Path("tests/unit/test_serve_config_broken.py"),
|
|
91
|
+
current_state="active",
|
|
92
|
+
),
|
|
93
|
+
Defense(
|
|
94
|
+
name="crash_loop_breaker",
|
|
95
|
+
value="stop after 5 consecutive short crashes; exp-escalating delay",
|
|
96
|
+
codifies="Run 6 — crashing agent respawned ~100 empty rounds at a fixed 2x delay",
|
|
97
|
+
guarded_by=Path("tests/unit/test_serve_crash_loop.py"),
|
|
88
98
|
current_state="active",
|
|
89
99
|
),
|
|
90
100
|
Defense(
|
|
@@ -32,6 +32,8 @@ ANOMALY_REPETITIVE_TOOL = "anomaly_repetitive_tool"
|
|
|
32
32
|
AGENT_NETWORK_BLIP = "agent_network_blip"
|
|
33
33
|
AGENT_SPAWN = "agent_spawn"
|
|
34
34
|
AGENT_USAGE_RECORDED = "agent_usage_recorded"
|
|
35
|
+
CONFIG_BROKEN = "config_broken"
|
|
36
|
+
CRASH_LOOP = "crash_loop"
|
|
35
37
|
DIRTY_COMMIT_FAILED = "dirty_commit_failed"
|
|
36
38
|
DIRTY_DETECTED = "dirty_detected"
|
|
37
39
|
FRESH_EYES_ROUND_TRIGGERED = "fresh_eyes_round_triggered"
|
|
@@ -49,7 +49,6 @@ KNOWN_ALERT_KINDS: frozenset[str] = frozenset(
|
|
|
49
49
|
"disk_warning",
|
|
50
50
|
"disk_critical",
|
|
51
51
|
"mem_pressure",
|
|
52
|
-
"smoke_fail_rate",
|
|
53
52
|
"oauth_fail",
|
|
54
53
|
"network_fail",
|
|
55
54
|
"rate_limit_active",
|
|
@@ -265,29 +264,6 @@ def detect_mem_pressure(metrics: list[dict[str, Any]], *, threshold_mb: int = 20
|
|
|
265
264
|
)
|
|
266
265
|
|
|
267
266
|
|
|
268
|
-
def detect_smoke_fail_rate(
|
|
269
|
-
events: list[dict[str, Any]], *, window: int = 10, threshold: float = 0.1
|
|
270
|
-
) -> Alert | None:
|
|
271
|
-
ends = [e for e in events if e.get("event") == "round_end"]
|
|
272
|
-
if len(ends) < window:
|
|
273
|
-
return None
|
|
274
|
-
recent_round_nums = [e.get("round_num") for e in ends[-window:]]
|
|
275
|
-
fails = sum(
|
|
276
|
-
1
|
|
277
|
-
for e in events
|
|
278
|
-
if e.get("event") == "smoke_check_failed" and e.get("round_num") in recent_round_nums
|
|
279
|
-
)
|
|
280
|
-
rate = fails / window
|
|
281
|
-
if rate < threshold:
|
|
282
|
-
return None
|
|
283
|
-
return _alert(
|
|
284
|
-
"smoke_fail_rate",
|
|
285
|
-
"warning",
|
|
286
|
-
f"{fails}/{window} recent rounds had smoke_check_failed",
|
|
287
|
-
{"rate": rate, "threshold": threshold, "hint": "Inspect events.jsonl for failure reasons"},
|
|
288
|
-
)
|
|
289
|
-
|
|
290
|
-
|
|
291
267
|
def detect_oauth_fail(
|
|
292
268
|
events: list[dict[str, Any]],
|
|
293
269
|
log_tails: dict[int, str],
|
|
@@ -603,7 +579,6 @@ def run_all_detectors(
|
|
|
603
579
|
),
|
|
604
580
|
detect_disk_critical(metrics, threshold_pct=disk_critical_pct),
|
|
605
581
|
detect_mem_pressure(metrics, threshold_mb=mem_avail_min_mb),
|
|
606
|
-
detect_smoke_fail_rate(events),
|
|
607
582
|
detect_oauth_fail(events, log_tails, patterns=compiled_auth_pats, hint=auth_fail_hint),
|
|
608
583
|
detect_network_fail(events, log_tails),
|
|
609
584
|
detect_rate_limit_active(events, now=now.timestamp()),
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# agent-runner.toml — generated by `agent-runner init --preset codewhale`.
|
|
2
|
+
#
|
|
3
|
+
# Prereqs:
|
|
4
|
+
# - codewhale installed (ships `codewhale` + `codewhale-tui`; both on PATH):
|
|
5
|
+
# npm i -g codewhale (or cargo/brew per CodeWhale docs)
|
|
6
|
+
# - DEEPSEEK_API_KEY set on the supervisor host (or a key saved via
|
|
7
|
+
# `codewhale auth set`; resolution order is config > keyring > env)
|
|
8
|
+
# - work_dir is a git repo
|
|
9
|
+
|
|
10
|
+
[agent]
|
|
11
|
+
command = ["codewhale", "exec", "--auto", "--output-format", "stream-json"]
|
|
12
|
+
prompt_arg_template = ["{prompt}"]
|
|
13
|
+
name = "codewhale"
|
|
14
|
+
|
|
15
|
+
[runtime]
|
|
16
|
+
work_dir = "."
|
|
17
|
+
log_dir = "~/.agent-runner/{project}/logs"
|
|
18
|
+
round_timeout_s = 1800
|
|
19
|
+
restart_delay_s = 3
|
|
20
|
+
|
|
21
|
+
[prompt]
|
|
22
|
+
file = "./prompts/main.md"
|
|
23
|
+
inject_context = true
|
|
24
|
+
|
|
25
|
+
[vcs]
|
|
26
|
+
dirty_action = "stash"
|
|
27
|
+
stash_idempotency_s = 5
|
|
28
|
+
|
|
29
|
+
[monitor]
|
|
30
|
+
auth_fail_hint = "Run `codewhale auth status` to inspect provider/credentials, or set DEEPSEEK_API_KEY on the supervisor host."
|
|
@@ -369,7 +369,7 @@ def run_one_round(cfg: Config, *, phase_override: str | None = None) -> RoundRes
|
|
|
369
369
|
file=sys.stderr,
|
|
370
370
|
)
|
|
371
371
|
events.emit(log_dir, "smoke_check_failed", reason=f"{r.name}: {r.reason}")
|
|
372
|
-
sys.exit(
|
|
372
|
+
sys.exit(api.PERMANENT_CONFIG_EXIT)
|
|
373
373
|
|
|
374
374
|
# Concurrency lock (per-project)
|
|
375
375
|
lock_path = log_dir / "agent-runner.lock"
|
|
@@ -521,6 +521,7 @@ def _run_one_round_inner(cfg: Config, *, phase_override: str | None = None) -> R
|
|
|
521
521
|
round_num=round_num,
|
|
522
522
|
phase=phase,
|
|
523
523
|
idempotency_s=cfg.vcs.stash_idempotency_s,
|
|
524
|
+
log_dir=cfg.runtime.log_dir,
|
|
524
525
|
)
|
|
525
526
|
if ref is not None:
|
|
526
527
|
context_store.write_orphan_state(
|
|
@@ -546,7 +547,9 @@ def _run_one_round_inner(cfg: Config, *, phase_override: str | None = None) -> R
|
|
|
546
547
|
# Leave tree dirty for next round; dirty_detected already emitted
|
|
547
548
|
pass
|
|
548
549
|
elif action == "auto_commit":
|
|
549
|
-
err = vcs_state.try_auto_commit(
|
|
550
|
+
err = vcs_state.try_auto_commit(
|
|
551
|
+
cfg.runtime.work_dir, round_num, phase, log_dir=cfg.runtime.log_dir
|
|
552
|
+
)
|
|
550
553
|
if err is not None:
|
|
551
554
|
events.emit(
|
|
552
555
|
log_dir,
|
|
@@ -5,8 +5,8 @@ Writes three files into a git repo:
|
|
|
5
5
|
prompts/main.md — neutral 8-line placeholder
|
|
6
6
|
.gitignore — append "logs/" if missing
|
|
7
7
|
|
|
8
|
-
Available presets ship as package data in `agent_runner/presets/*.toml
|
|
9
|
-
|
|
8
|
+
Available presets ship as package data in `agent_runner/presets/*.toml`;
|
|
9
|
+
`agent-runner init --preset <name>` discovers them from that directory.
|
|
10
10
|
|
|
11
11
|
Optionally commits in one step (default true via the CLI).
|
|
12
12
|
"""
|