cli-agent-runner 0.1.32__tar.gz → 0.1.33__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.gitignore +3 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/CHANGELOG.md +10 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/CONTRIBUTING.md +3 -8
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/PKG-INFO +1 -1
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/_emit.py +34 -9
- cli_agent_runner-0.1.33/agent_runner/_throttle.py +133 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/_version.py +2 -2
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/builtin_plugins/_constants.py +18 -2
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/serve_cmd.py +5 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/runner.py +20 -5
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/architecture.md +1 -1
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/commands.md +2 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/long-running-agents.md +5 -4
- cli_agent_runner-0.1.33/docs/migrations/0.1.33.md +88 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/plugins.md +12 -3
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/thesis.md +38 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_architecture.py +1 -1
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_claude_error_detector.py +16 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_events.py +61 -0
- cli_agent_runner-0.1.33/tests/unit/test_runner_throttle.py +339 -0
- cli_agent_runner-0.1.32/.githooks/commit-msg +0 -33
- cli_agent_runner-0.1.32/agent_runner/_throttle.py +0 -63
- cli_agent_runner-0.1.32/tests/unit/test_runner_throttle.py +0 -125
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.codecov.yml +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.github/workflows/ci.yml +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.github/workflows/release.yml +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.vulture-whitelist.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/CODE_OF_CONDUCT.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/LICENSE +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/README.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/README.zh.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/SECURITY.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/__init__.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/_docgen.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/_registry.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/_substrate.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/agent_runtime.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/api.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/api_types.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/builtin_plugins/__init__.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/builtin_plugins/claude_rate_limit.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/builtin_plugins/gemini.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/__init__.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/__main__.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/common.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/init_cmd.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/install_cmd.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/monitor_cmd.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/peek_cmd.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/round_cmd.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/service_cmd.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/upgrade_cmd.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/config.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/context_store.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/defenses.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/detector_helpers.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/events.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/hooks.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/http_progress.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/lifecycle.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/metrics.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/monitor.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/presets/__init__.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/presets/aider.toml +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/presets/claude.toml +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/presets/gemini.toml +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/prompt_loader.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/round_log.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/round_view.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/scaffold.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/service_unit.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/startup_check.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/vcs_state.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/build.sh +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/deploy/example-agent-runner.toml +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/deploy/launchd.plist.tmpl +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/deploy/run-loop.sh +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/deploy/systemd.service.tmpl +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/README.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/configuration.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/events.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/marketing/README.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/marketing/promo-cn.html +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.16.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.17.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.19.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.20.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.21.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.22.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.23.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.24.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.25.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.26.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.27.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.28.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.29.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.30.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.31.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.32.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/quickstart.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/recipes/aider.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/runbook.md +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/pyproject.toml +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/__init__.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/_test_helpers.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/conftest.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/contract/__init__.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/contract/test_public_api_surface.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/e2e/__init__.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/e2e/conftest.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/e2e/test_e2e_graceful_stop.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/e2e/test_e2e_install_systemd.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/e2e/test_e2e_monitor_remote.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/e2e/test_e2e_round_lifecycle.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/fixtures/cli-real-output/claude-2.1.143-assistant-tool-use.jsonl +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/fixtures/cli-real-output/claude-2.1.143-result-event.jsonl +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/fixtures/cli-real-output/gemini-0.42.0-result-event.jsonl +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/__init__.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_bounded_run.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_context_enricher_namespacing.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_fresh_eyes_signal.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_grace_kill_emission.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_install_dry_run.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_monitor_seeded.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_plugin_detector_loaded.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_plugin_owned_paths.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_plugin_real_flow.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_run_one_round_with_fake_agent.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_scaffold_presets.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_serve_loop.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_substrate_fingerprint.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_transient_error_backoff.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/__init__.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_atomic_write_enforced.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_catalogs.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_classification_ssot.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_docs_generated.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_entry_points_resolve.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_event_kind_registry.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_event_kinds_ssot.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_events_doc_contract.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_layer_2_loop_size.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_module_boundaries.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_module_sizes.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_no_ai_signatures.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_no_pytest_skip_on_parse_fail.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_peek_schema_version.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_repo_constants_patched_in_tests.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_round_result_stable.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_stash_uses_sha_not_index.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_upstream_schema_canary.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/literate/__init__.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/literate/parser.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/literate/test_parser.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/literate/test_quickstart.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/__init__.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_agent_runtime.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_agent_runtime_grace.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_agent_runtime_progress.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_assemble_prompt.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_events_stream.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_install.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_observation.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_read_round_num.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_resolve_phase.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_service.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_types.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_auto_stop_gating.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_cli.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_cli_common.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_cli_init_install.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_cli_monitor_http.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_cli_service_peek_monitor.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_cli_upgrade.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_config.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_config_fresh_eyes.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_config_max_rounds.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_config_stop_file.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_config_substrate_fingerprint_paths.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_config_transient_error_action.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_context_store.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_defenses.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_detector_helpers.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_detector_protocol.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_docgen.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_fresh_eyes_trigger.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_gemini_plugin.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_hook_failure_isolation.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_hooks.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_http_progress.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_init_entry_points.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_lifecycle.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_metrics.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_monitor_assembly.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_monitor_detect_anomaly_repetitive.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_monitor_detect_rate_limit.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_monitor_detectors.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_monitor_remote.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_peek_argparse.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_peek_select.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_presets.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_prompt_loader.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_round_log_helpers.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_round_view.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_runner.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_scaffold.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_serve_cmd_bounded.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_serve_round_log.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_serve_sentinel.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_serve_startup_hooks.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_service_unit.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_startup_check.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_substrate.py +0 -0
- {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_vcs_state.py +0 -0
|
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.1.33] - 2026-05-19
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- `_5XX_STATUSES` includes 529 (Anthropic's "overloaded") — now classified as `api_transient_5xx`.
|
|
14
|
+
- Exp backoff for estimated-class transient errors (`rate_limit_model` / `api_transient_5xx` / `api_timeout`): consecutive failures multiply the wait `2^N` capped at 32× and 30 minutes absolute. Server-authoritative `rate_limit_account` unchanged.
|
|
15
|
+
- `transient_error_backoff_capped` event gains `original_reset_at_epoch`, `applied_reset_at_epoch`, `consecutive_count`, `capped_by_absolute_max` fields for backoff-curve observability.
|
|
16
|
+
- `docs/thesis.md` names the server-authoritative vs estimated reset principle.
|
|
17
|
+
|
|
18
|
+
See `docs/migrations/0.1.33.md`.
|
|
19
|
+
|
|
10
20
|
## [0.1.32] - 2026-05-18
|
|
11
21
|
|
|
12
22
|
### Added
|
|
@@ -9,7 +9,6 @@ git clone https://github.com/wan9yu/cli-agent-runner.git
|
|
|
9
9
|
cd cli-agent-runner
|
|
10
10
|
python3 -m venv .venv && source .venv/bin/activate
|
|
11
11
|
pip install -e ".[dev]"
|
|
12
|
-
git config core.hooksPath .githooks # enables the commit-msg lint hook
|
|
13
12
|
./build.sh check
|
|
14
13
|
```
|
|
15
14
|
|
|
@@ -17,13 +16,6 @@ git config core.hooksPath .githooks # enables the commit-msg lint hook
|
|
|
17
16
|
+ integration tests, the literate quickstart, and the docs CI gate. It's
|
|
18
17
|
what GitHub Actions runs on every push and PR.
|
|
19
18
|
|
|
20
|
-
`git config core.hooksPath .githooks` activates the in-repo
|
|
21
|
-
[`.githooks/commit-msg`](.githooks/commit-msg) hook which rejects commit
|
|
22
|
-
messages containing `Co-Authored-By:` trailers, robot emojis, or other
|
|
23
|
-
AI-tool attribution patterns. The same check runs in CI (`lint-commits`
|
|
24
|
-
job) and as a pytest invariant (`tests/invariants/test_no_ai_signatures.py`)
|
|
25
|
-
— defense in depth.
|
|
26
|
-
|
|
27
19
|
## Workflow
|
|
28
20
|
|
|
29
21
|
1. Open an issue first for non-trivial changes — saves wasted work on both sides.
|
|
@@ -33,6 +25,9 @@ job) and as a pytest invariant (`tests/invariants/test_no_ai_signatures.py`)
|
|
|
33
25
|
5. Run `./build.sh check` locally before pushing.
|
|
34
26
|
6. Conventional Commits: `feat:` / `fix:` / `docs:` / `refactor:` / `test:` /
|
|
35
27
|
`chore:` / `ci:` / `build:` / `perf:`. Subjects in English, imperative mood.
|
|
28
|
+
CI (`lint-commits` job) and `tests/invariants/test_no_ai_signatures.py`
|
|
29
|
+
reject auto-generated trailers and robot signatures — keep messages
|
|
30
|
+
human-authored.
|
|
36
31
|
|
|
37
32
|
## Architecture / docs
|
|
38
33
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cli-agent-runner
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.33
|
|
4
4
|
Summary: Restart-on-exit supervisor for autonomous CLI agents
|
|
5
5
|
Project-URL: Homepage, https://github.com/wan9yu/cli-agent-runner
|
|
6
6
|
Project-URL: Documentation, https://github.com/wan9yu/cli-agent-runner#readme
|
|
@@ -281,15 +281,40 @@ def emit_transient_error_backoff_capped(
|
|
|
281
281
|
agent: str,
|
|
282
282
|
requested_sleep_s: int,
|
|
283
283
|
applied_sleep_s: int,
|
|
284
|
+
original_reset_at_epoch: int | None = None,
|
|
285
|
+
applied_reset_at_epoch: int | None = None,
|
|
286
|
+
consecutive_count: int | None = None,
|
|
287
|
+
capped_by_absolute_max: bool | None = None,
|
|
284
288
|
) -> None:
|
|
285
|
-
"""Emit
|
|
289
|
+
"""Emit when supervisor adjusts the plugin-emitted transient back-off.
|
|
290
|
+
|
|
291
|
+
Fires in two cases:
|
|
292
|
+
1. **Exp backoff applied** (0.1.33+): estimated-class transient errors
|
|
293
|
+
(`rate_limit_model` / `api_transient_5xx` / `api_timeout`) doubled
|
|
294
|
+
on consecutive failures. ``consecutive_count`` > 1, multiplier > 1×.
|
|
295
|
+
2. **Defensive cap hit** (0.1.20+): malformed `reset_at_epoch` or the
|
|
296
|
+
30-min absolute cap clipped the wait. ``capped_by_absolute_max`` True.
|
|
297
|
+
|
|
298
|
+
Fields ``original_reset_at_epoch`` / ``applied_reset_at_epoch`` /
|
|
299
|
+
``consecutive_count`` / ``capped_by_absolute_max`` are 0.1.33+. Older
|
|
300
|
+
callers that pass only the first 4 kwargs continue to work; the new
|
|
301
|
+
fields are omitted from the payload when None.
|
|
302
|
+
"""
|
|
286
303
|
from agent_runner.events import TRANSIENT_ERROR_BACKOFF_CAPPED, emit
|
|
287
304
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
305
|
+
kwargs: dict = {
|
|
306
|
+
"classification": classification,
|
|
307
|
+
"agent": agent,
|
|
308
|
+
"requested_sleep_s": requested_sleep_s,
|
|
309
|
+
"applied_sleep_s": applied_sleep_s,
|
|
310
|
+
}
|
|
311
|
+
if original_reset_at_epoch is not None:
|
|
312
|
+
kwargs["original_reset_at_epoch"] = original_reset_at_epoch
|
|
313
|
+
if applied_reset_at_epoch is not None:
|
|
314
|
+
kwargs["applied_reset_at_epoch"] = applied_reset_at_epoch
|
|
315
|
+
if consecutive_count is not None:
|
|
316
|
+
kwargs["consecutive_count"] = consecutive_count
|
|
317
|
+
if capped_by_absolute_max is not None:
|
|
318
|
+
kwargs["capped_by_absolute_max"] = capped_by_absolute_max
|
|
319
|
+
|
|
320
|
+
emit(log_dir, TRANSIENT_ERROR_BACKOFF_CAPPED, **kwargs)
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Throttle state helpers — read events.jsonl tail for transient error state.
|
|
2
|
+
|
|
3
|
+
Internal module. Callers: runner.py (serve loop back-off), api.py (peek).
|
|
4
|
+
Separated from runner.py to satisfy the ouroboros defense: runner.py writes
|
|
5
|
+
events.jsonl but must never read it back (§3 module boundary invariant).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import time
|
|
12
|
+
from collections import deque
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from agent_runner.api_types import TransientErrorState
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _check_throttle_state(log_dir: Path) -> TransientErrorState | None:
|
|
20
|
+
"""Scan events.jsonl tail for latest unmatched transient error.
|
|
21
|
+
|
|
22
|
+
Reads `transient_error_detected` / `transient_error_recovered` event names.
|
|
23
|
+
Returns TransientErrorState if currently throttled (reset still in future,
|
|
24
|
+
no matching recovered after). Restart-safe.
|
|
25
|
+
"""
|
|
26
|
+
candidates = sorted(log_dir.glob("events-*.jsonl"))
|
|
27
|
+
if not candidates:
|
|
28
|
+
return None
|
|
29
|
+
with candidates[-1].open() as f:
|
|
30
|
+
tail = deque(f, maxlen=100)
|
|
31
|
+
events: list[dict[str, Any]] = []
|
|
32
|
+
for line in tail:
|
|
33
|
+
line = line.strip()
|
|
34
|
+
if not line:
|
|
35
|
+
continue
|
|
36
|
+
try:
|
|
37
|
+
events.append(json.loads(line))
|
|
38
|
+
except json.JSONDecodeError:
|
|
39
|
+
continue
|
|
40
|
+
|
|
41
|
+
latest_detected: dict[str, Any] | None = None
|
|
42
|
+
for ev in reversed(events):
|
|
43
|
+
kind = ev.get("event")
|
|
44
|
+
if kind == "transient_error_recovered":
|
|
45
|
+
return None
|
|
46
|
+
if kind == "transient_error_detected":
|
|
47
|
+
latest_detected = ev
|
|
48
|
+
break
|
|
49
|
+
|
|
50
|
+
if latest_detected is None:
|
|
51
|
+
return None
|
|
52
|
+
reset_at = int(latest_detected.get("reset_at_epoch", 0))
|
|
53
|
+
if reset_at <= time.time():
|
|
54
|
+
return None # Reset already passed without recovery emit; treat as recovered
|
|
55
|
+
|
|
56
|
+
classification = str(latest_detected.get("classification", "rate_limit_account"))
|
|
57
|
+
|
|
58
|
+
return TransientErrorState(
|
|
59
|
+
reset_at_epoch=reset_at,
|
|
60
|
+
classification=classification,
|
|
61
|
+
agent=str(latest_detected.get("agent", "unknown")),
|
|
62
|
+
since_round=int(latest_detected.get("round_num", 0)),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# Module-level supervisor state — bucket → consecutive-failure count.
|
|
67
|
+
# Cleared by reset_counters() or by serve restart.
|
|
68
|
+
_consecutive_failures: dict[str, int] = {}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def compute_adjusted_reset_at(
|
|
72
|
+
*,
|
|
73
|
+
classification: str,
|
|
74
|
+
original_reset_at_epoch: int,
|
|
75
|
+
agent: str,
|
|
76
|
+
log_dir: Path,
|
|
77
|
+
) -> tuple[int, int, bool]:
|
|
78
|
+
"""Apply exp backoff for estimated-class transient errors.
|
|
79
|
+
|
|
80
|
+
Returns (applied_reset_at_epoch, consecutive_count, capped_by_absolute_max).
|
|
81
|
+
|
|
82
|
+
For server-authoritative classification (``rate_limit_account``): returns
|
|
83
|
+
the original reset epoch verbatim, never increments the counter, and
|
|
84
|
+
never emits an adjustment event. Anthropic's resetsAt is authoritative.
|
|
85
|
+
|
|
86
|
+
For estimated classifications (``rate_limit_model``, ``api_transient_5xx``,
|
|
87
|
+
``api_timeout``): increments the counter for this bucket, computes
|
|
88
|
+
duration = base × 2^min(n, _EXP_CAP), caps at _ABSOLUTE_CAP_S, emits
|
|
89
|
+
``transient_error_backoff_capped`` if multiplier > 1 or capped.
|
|
90
|
+
"""
|
|
91
|
+
from agent_runner._emit import emit_transient_error_backoff_capped
|
|
92
|
+
from agent_runner.builtin_plugins._constants import (
|
|
93
|
+
_ABSOLUTE_CAP_S,
|
|
94
|
+
_BACK_OFF_DEFAULTS,
|
|
95
|
+
_EXP_CAP,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
if classification == "rate_limit_account":
|
|
99
|
+
# Server-authoritative: respect resetsAt verbatim, no counter touch.
|
|
100
|
+
return (original_reset_at_epoch, 0, False)
|
|
101
|
+
|
|
102
|
+
# Estimated class: apply exp backoff.
|
|
103
|
+
base = _BACK_OFF_DEFAULTS[classification]
|
|
104
|
+
n = _consecutive_failures.get(classification, 0)
|
|
105
|
+
multiplier = 2 ** min(n, _EXP_CAP)
|
|
106
|
+
extended_duration = base * multiplier
|
|
107
|
+
capped_by_absolute_max = extended_duration > _ABSOLUTE_CAP_S
|
|
108
|
+
applied_duration = min(extended_duration, _ABSOLUTE_CAP_S)
|
|
109
|
+
applied_reset_at = int(time.time()) + applied_duration
|
|
110
|
+
|
|
111
|
+
new_count = n + 1
|
|
112
|
+
_consecutive_failures[classification] = new_count
|
|
113
|
+
|
|
114
|
+
# Emit observability event when supervisor adjusted the wait.
|
|
115
|
+
if multiplier > 1 or capped_by_absolute_max:
|
|
116
|
+
emit_transient_error_backoff_capped(
|
|
117
|
+
log_dir,
|
|
118
|
+
classification=classification,
|
|
119
|
+
agent=agent,
|
|
120
|
+
requested_sleep_s=int(base),
|
|
121
|
+
applied_sleep_s=applied_duration,
|
|
122
|
+
original_reset_at_epoch=original_reset_at_epoch,
|
|
123
|
+
applied_reset_at_epoch=applied_reset_at,
|
|
124
|
+
consecutive_count=new_count,
|
|
125
|
+
capped_by_absolute_max=capped_by_absolute_max,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
return (applied_reset_at, new_count, capped_by_absolute_max)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def reset_counters() -> None:
|
|
132
|
+
"""Clear all bucket counters. Called by serve loop when no active throttle."""
|
|
133
|
+
_consecutive_failures.clear()
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.1.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
21
|
+
__version__ = version = '0.1.33'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 1, 33)
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/builtin_plugins/_constants.py
RENAMED
|
@@ -21,9 +21,11 @@ _BACK_OFF_DEFAULTS: dict[str, int] = {
|
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
# 5xx codes treated as transient (retry-worthy server errors per RFC 9110):
|
|
24
|
-
# 500=unexpected, 502=bad gateway, 503=unavailable, 504=gateway timeout
|
|
24
|
+
# 500=unexpected, 502=bad gateway, 503=unavailable, 504=gateway timeout,
|
|
25
|
+
# 529=overloaded (Anthropic's non-RFC code emitted during sustained capacity
|
|
26
|
+
# issues; treated as transient per Anthropic SDK behavior).
|
|
25
27
|
# Excluded: 501 (not implemented = permanent), 505 (HTTP version mismatch).
|
|
26
|
-
_5XX_STATUSES: frozenset[int] = frozenset({500, 502, 503, 504})
|
|
28
|
+
_5XX_STATUSES: frozenset[int] = frozenset({500, 502, 503, 504, 529})
|
|
27
29
|
|
|
28
30
|
_CLASSIFICATIONS: frozenset[str] = frozenset(
|
|
29
31
|
{
|
|
@@ -38,3 +40,17 @@ _CLASSIFICATIONS: frozenset[str] = frozenset(
|
|
|
38
40
|
rate_limit_account uses server-provided resetsAt (excluded from
|
|
39
41
|
_BACK_OFF_DEFAULTS table); others use defaults from that table.
|
|
40
42
|
"""
|
|
43
|
+
|
|
44
|
+
_EXP_CAP: int = 5
|
|
45
|
+
"""Maximum exponent for transient-error consecutive backoff: 2^5 = 32×.
|
|
46
|
+
|
|
47
|
+
Beyond this, the multiplier plateaus. Combined with _ABSOLUTE_CAP_S, this
|
|
48
|
+
prevents runaway wait times during sustained outages (max wait = 30min).
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
_ABSOLUTE_CAP_S: int = 1800
|
|
52
|
+
"""Absolute upper bound on supervisor-applied transient back-off (30 min).
|
|
53
|
+
|
|
54
|
+
Applies after exp multiplier — even if base × 2^5 exceeds this, the wait
|
|
55
|
+
is clipped here. Defends against an indefinitely-stuck supervisor.
|
|
56
|
+
"""
|
|
@@ -20,6 +20,7 @@ from pathlib import Path
|
|
|
20
20
|
|
|
21
21
|
from agent_runner._substrate import compute_git_head, compute_paths_hash
|
|
22
22
|
from agent_runner._throttle import _check_throttle_state
|
|
23
|
+
from agent_runner._throttle import reset_counters as _reset_counters
|
|
23
24
|
from agent_runner.api import (
|
|
24
25
|
check_self_terminated_sentinel,
|
|
25
26
|
emit_fresh_eyes_round_triggered,
|
|
@@ -151,6 +152,10 @@ def cmd(args) -> int:
|
|
|
151
152
|
elif action == "stop":
|
|
152
153
|
emit_rate_limit_stop(log_dir)
|
|
153
154
|
break
|
|
155
|
+
else:
|
|
156
|
+
# No active throttle this round — supervisor counters can reset.
|
|
157
|
+
# Next failure (if any) restarts the exp backoff curve from 1×.
|
|
158
|
+
_reset_counters()
|
|
154
159
|
if stop_file is not None and stop_file.exists():
|
|
155
160
|
try:
|
|
156
161
|
content = stop_file.read_text(encoding="utf-8", errors="replace")[:200]
|
|
@@ -44,17 +44,32 @@ _BACK_OFF_JITTER_MAX_S = 30
|
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
def _apply_back_off(log_dir: Path, throttle: TransientErrorState) -> None:
|
|
47
|
-
"""Sleep until
|
|
47
|
+
"""Sleep until adjusted reset_at + jitter; emit recovered (and capped if applicable).
|
|
48
48
|
|
|
49
|
-
|
|
49
|
+
For estimated-class classifications (rate_limit_model / api_transient_5xx /
|
|
50
|
+
api_timeout), applies exp backoff on consecutive failures via
|
|
51
|
+
`_throttle.compute_adjusted_reset_at`. For server-authoritative
|
|
52
|
+
rate_limit_account, the original reset_at_epoch is used verbatim.
|
|
53
|
+
|
|
54
|
+
Defensive 8h cap retained as last-line defense against malformed reset
|
|
55
|
+
epochs (e.g. an external/manual event with a far-future reset_at).
|
|
50
56
|
"""
|
|
57
|
+
from agent_runner import _throttle
|
|
58
|
+
|
|
59
|
+
adjusted_reset_at, _consecutive_count, _capped = _throttle.compute_adjusted_reset_at(
|
|
60
|
+
classification=throttle.classification,
|
|
61
|
+
original_reset_at_epoch=throttle.reset_at_epoch,
|
|
62
|
+
agent=throttle.agent,
|
|
63
|
+
log_dir=log_dir,
|
|
64
|
+
)
|
|
65
|
+
|
|
51
66
|
now = time.time()
|
|
52
67
|
requested = (
|
|
53
|
-
|
|
54
|
-
- now
|
|
55
|
-
+ random.uniform(_BACK_OFF_JITTER_MIN_S, _BACK_OFF_JITTER_MAX_S)
|
|
68
|
+
adjusted_reset_at - now + random.uniform(_BACK_OFF_JITTER_MIN_S, _BACK_OFF_JITTER_MAX_S)
|
|
56
69
|
)
|
|
57
70
|
if requested > _BACK_OFF_CAP_S:
|
|
71
|
+
# Defensive: malformed reset epoch (e.g. manual event with far-future ts).
|
|
72
|
+
# Exp backoff layer caps at 30min, so legitimate flow never hits this.
|
|
58
73
|
api.emit_transient_error_backoff_capped(
|
|
59
74
|
log_dir,
|
|
60
75
|
classification=throttle.classification,
|
|
@@ -65,7 +65,7 @@ surfacing everywhere.
|
|
|
65
65
|
| `event_kind_registry` | Prevent events.emit() typos / unregistered kinds slipping past CI | `tests/invariants/test_event_kind_registry.py` |
|
|
66
66
|
<!-- /gen:defenses-table -->
|
|
67
67
|
|
|
68
|
-
## Monitor:
|
|
68
|
+
## Monitor: 11 detectors
|
|
69
69
|
|
|
70
70
|
Three categories by `auto_action`:
|
|
71
71
|
|
|
@@ -87,6 +87,8 @@ agent-runner peek
|
|
|
87
87
|
agent-runner peek --json
|
|
88
88
|
agent-runner peek --select system.disk_used_pct
|
|
89
89
|
agent-runner peek --select defenses
|
|
90
|
+
agent-runner peek --select events.agent_usage_recorded --window 5 # 0.1.32+: native event-kind query
|
|
91
|
+
agent-runner peek --select events.transient_error_detected --window 20
|
|
90
92
|
agent-runner peek --round 42 --log # drill into round 42, include log tail
|
|
91
93
|
agent-runner peek --events 50 # last 50 events
|
|
92
94
|
```
|
|
@@ -216,10 +216,11 @@ the underlying problem is unbounded lineage on a shared resource.
|
|
|
216
216
|
event family is now `transient_error_detected` with a `classification`
|
|
217
217
|
field (`rate_limit_account`, `rate_limit_model`, `api_transient_5xx`,
|
|
218
218
|
`api_timeout`). The same back-off mechanism covers all 4 classifications.
|
|
219
|
-
The legacy `rate_limit_rejected`
|
|
220
|
-
`
|
|
221
|
-
|
|
222
|
-
`docs/migrations/0.1.
|
|
219
|
+
The legacy `rate_limit_rejected` aliases were removed in 0.1.29 — subscribe
|
|
220
|
+
to `transient_error_detected` (filter by `classification == "rate_limit_account"`
|
|
221
|
+
if you only want 5h-quota events). See `docs/migrations/0.1.27.md` for the
|
|
222
|
+
consumer dispatch recipe and `docs/migrations/0.1.29.md` for alias-removal
|
|
223
|
+
migration recipes.
|
|
223
224
|
|
|
224
225
|
## Writing post_round_hook plugins
|
|
225
226
|
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# 0.1.33 — Transient-error exp backoff + 529 classification
|
|
2
|
+
|
|
3
|
+
**Date**: 2026-05-19
|
|
4
|
+
|
|
5
|
+
## What changed
|
|
6
|
+
|
|
7
|
+
Two improvements to how agent-runner handles transient errors from claude.ai:
|
|
8
|
+
|
|
9
|
+
1. **Exp backoff for estimated-class transient errors**. When a round fires the same
|
|
10
|
+
estimated-class transient (`rate_limit_model` / `api_transient_5xx` / `api_timeout`)
|
|
11
|
+
after waiting our previous estimate, the supervisor doubles the next wait. Curve:
|
|
12
|
+
`2^N` capped at 32× and 30 minutes absolute. Counter resets when a round completes
|
|
13
|
+
without firing a new transient. Defaults upgraded transparently — no config knobs,
|
|
14
|
+
no consumer action.
|
|
15
|
+
|
|
16
|
+
2. **529 status code now classified as `api_transient_5xx`**. Anthropic's "overloaded"
|
|
17
|
+
response (HTTP 529, non-RFC) is correctly handled as a transient — supervisor
|
|
18
|
+
throttles instead of immediately re-dispatching. Previously fell through to
|
|
19
|
+
"unknown error, no transient detection" and hammered the upstream.
|
|
20
|
+
|
|
21
|
+
Server-authoritative `rate_limit_account` (Anthropic's `resetsAt`) is unchanged —
|
|
22
|
+
the server's exact unblock time is respected verbatim, no exp backoff applied.
|
|
23
|
+
|
|
24
|
+
## Backoff curve reference (estimated classes)
|
|
25
|
+
|
|
26
|
+
| Bucket | Base | 1× | 2× | 4× | 8× | 16× | 32× (cap) | Absolute cap |
|
|
27
|
+
|---|---|---|---|---|---|---|---|---|
|
|
28
|
+
| `rate_limit_model` | 60s | 60s | 120s | 240s | 480s | 960s | **1800s** | 30 min |
|
|
29
|
+
| `api_transient_5xx` | 60s | 60s | 120s | 240s | 480s | 960s | **1800s** | 30 min |
|
|
30
|
+
| `api_timeout` | 30s | 30s | 60s | 120s | 240s | 480s | 960s | 30 min |
|
|
31
|
+
|
|
32
|
+
Multiplier = `2^min(consecutive_count - 1, 5)`. After 5 consecutive failures the
|
|
33
|
+
multiplier plateaus at 32×. Effective wait may also be clipped by the 30-minute
|
|
34
|
+
absolute cap (e.g. `rate_limit_model` after 6 consecutive failures: `60 × 32 = 1920s`
|
|
35
|
+
clipped to `1800s`).
|
|
36
|
+
|
|
37
|
+
## Observing the backoff curve
|
|
38
|
+
|
|
39
|
+
The `transient_error_backoff_capped` event now fires whenever the supervisor adjusts
|
|
40
|
+
the plugin-emitted wait — including the exp-backoff case (was previously
|
|
41
|
+
only-on-defensive-8h-cap).
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
agent-runner peek --select events.transient_error_backoff_capped --window 20
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
The payload includes:
|
|
48
|
+
|
|
49
|
+
- `original_reset_at_epoch` — what the plugin emitted (base × 1×)
|
|
50
|
+
- `applied_reset_at_epoch` — what the supervisor will actually sleep to
|
|
51
|
+
- `consecutive_count` — how many times this bucket fired in a row
|
|
52
|
+
- `capped_by_absolute_max` — whether the 30-min ceiling kicked in
|
|
53
|
+
|
|
54
|
+
## Server-authoritative class (unchanged)
|
|
55
|
+
|
|
56
|
+
`rate_limit_account` events with Anthropic's `resetsAt` epoch are still respected
|
|
57
|
+
exactly. The exp backoff machinery never increments the counter for this bucket
|
|
58
|
+
and never emits `transient_error_backoff_capped`. The reasoning: server knows
|
|
59
|
+
when the 5-hour quota resets; second-guessing it would be counter-productive.
|
|
60
|
+
|
|
61
|
+
## Tuning
|
|
62
|
+
|
|
63
|
+
No config knobs added. The curve parameters (base, multiplier, exp cap, absolute
|
|
64
|
+
cap) are hardcoded to sensible defaults. If your scenario needs different values,
|
|
65
|
+
open an issue with the specific case — we'll evaluate against
|
|
66
|
+
`docs/thesis.md` ("Not a remediation framework — defaults are right").
|
|
67
|
+
|
|
68
|
+
## 529 callout
|
|
69
|
+
|
|
70
|
+
Anthropic returns HTTP 529 ("overloaded") during sustained capacity issues. This
|
|
71
|
+
is not in the RFC 9110 5xx set but Anthropic's SDK treats it as transient. Adding
|
|
72
|
+
it to `_5XX_STATUSES` keeps our classification consistent with upstream behavior.
|
|
73
|
+
|
|
74
|
+
## No consumer action required
|
|
75
|
+
|
|
76
|
+
All changes are default-on or additive:
|
|
77
|
+
|
|
78
|
+
- Existing TOML keeps working unchanged.
|
|
79
|
+
- Existing event subscribers see a more populated `transient_error_backoff_capped`
|
|
80
|
+
payload (additive fields) — old fields retained.
|
|
81
|
+
- Plugins (`claude_error_detector`, `gemini_error_detector`) unchanged.
|
|
82
|
+
- CLI surface unchanged.
|
|
83
|
+
|
|
84
|
+
If your code reads `transient_error_backoff_capped` payload, the four new fields
|
|
85
|
+
(`original_reset_at_epoch`, `applied_reset_at_epoch`, `consecutive_count`,
|
|
86
|
+
`capped_by_absolute_max`) are absent for events emitted before 0.1.33 (or by the
|
|
87
|
+
defensive 8h-cap path which still uses only the old payload shape). Defensive
|
|
88
|
+
parsing recommended.
|
|
@@ -95,9 +95,18 @@ class HookContext:
|
|
|
95
95
|
project: str
|
|
96
96
|
round_num: int
|
|
97
97
|
phase: str | None
|
|
98
|
-
agent_name: str | None
|
|
98
|
+
agent_name: str | None # cosmetic name from [agent].name TOML
|
|
99
|
+
agent_binary: str | None # 0.1.30+: basename of agent.command[0]
|
|
100
|
+
# plus dry_run, anomaly_repetitive_*, agent_log_path — see source for full set
|
|
99
101
|
```
|
|
100
102
|
|
|
103
|
+
For capability detection (e.g. "is this round running claude?"), plugins
|
|
104
|
+
should check `ctx.agent_binary == "claude"`, NOT `ctx.agent_name`. The
|
|
105
|
+
former is the actual binary basename; the latter is user-cosmetic and
|
|
106
|
+
may be overridden in `[agent] name = "..."` (this was a real bug fixed
|
|
107
|
+
in 0.1.30 — strict `agent_name` check silently suppressed events when
|
|
108
|
+
operators set custom names).
|
|
109
|
+
|
|
101
110
|
`PostRoundHook` additionally receives a `RoundResult` (`from agent_runner.api_types import RoundResult`).
|
|
102
111
|
Its field set is stable across 0.1.x (additions only).
|
|
103
112
|
|
|
@@ -289,7 +298,7 @@ and applies the configured `transient_error_action` (default `back_off`;
|
|
|
289
298
|
No configuration required to enable the detector; it activates for any
|
|
290
299
|
project using claude as the agent CLI.
|
|
291
300
|
|
|
292
|
-
Non-claude agents: the detector returns early when `ctx.
|
|
301
|
+
Non-claude agents: the detector returns early when `ctx.agent_binary != "claude"`.
|
|
293
302
|
Third-party plugin authors may use the same `register_post_round_hook` API
|
|
294
303
|
to ship equivalent detectors for other agent CLIs — the bundled
|
|
295
304
|
`gemini_error_detector` is a working reference.
|
|
@@ -297,7 +306,7 @@ to ship equivalent detectors for other agent CLIs — the bundled
|
|
|
297
306
|
## Custom monitor detectors (§3.3)
|
|
298
307
|
|
|
299
308
|
0.1.5 adds a fourth extension point — plugin authors can ship custom monitor
|
|
300
|
-
detectors that run alongside the
|
|
309
|
+
detectors that run alongside the 11 builtins on every monitor poll.
|
|
301
310
|
|
|
302
311
|
### Group + Protocol
|
|
303
312
|
|
|
@@ -51,12 +51,50 @@ then silence = hung agent. Not generic anomaly. Per-project variance in
|
|
|
51
51
|
token usage and round duration is large enough that rolling-baseline alerting
|
|
52
52
|
would produce constant false positives across diverse workloads.
|
|
53
53
|
|
|
54
|
+
The `anomaly_repetitive_active` detector (added 0.1.32) is the live example:
|
|
55
|
+
it fires when the claude plugin emits `anomaly_repetitive_tool` events
|
|
56
|
+
above a fixed threshold within a window — a specific signature, not N-σ.
|
|
57
|
+
`max_grace_after_result_s` (0.1.31) is another: kills the subprocess after
|
|
58
|
+
a fixed grace following the `result` event — specific signature, not "is
|
|
59
|
+
this subprocess behaving unusually".
|
|
60
|
+
|
|
54
61
|
> **Example**: A 2026-05-18 proposal requested a "cost spike detector" that
|
|
55
62
|
> fires when this round's cost is N× the rolling 7-day average. Rejected.
|
|
56
63
|
> The rolling baseline itself requires aggregation we don't own, and the
|
|
57
64
|
> threshold N is project-specific. A consumer can compute this from the flat
|
|
58
65
|
> events file.
|
|
59
66
|
|
|
67
|
+
### How we handle transient errors: server-authoritative vs estimated
|
|
68
|
+
|
|
69
|
+
`transient_error_detected` events carry a `reset_at_epoch` field telling
|
|
70
|
+
the supervisor when to retry. Two cases with different policies:
|
|
71
|
+
|
|
72
|
+
- **Server-authoritative**: Anthropic's `rate_limit_event.resetsAt` is an
|
|
73
|
+
exact unblock time. We respect it verbatim — no backoff multipliers, no
|
|
74
|
+
caps applied. Server knows best.
|
|
75
|
+
- **Estimated**: For other classifications (`rate_limit_model`,
|
|
76
|
+
`api_transient_5xx`, `api_timeout`), the plugin emits a default guess
|
|
77
|
+
(`_BACK_OFF_DEFAULTS[bucket]`). Guesses can be wrong; if a round fires
|
|
78
|
+
the same bucket again after waiting our guess, we increase the wait
|
|
79
|
+
exponentially (`2^N`, capped at 32× and 30 minutes absolute).
|
|
80
|
+
|
|
81
|
+
This split keeps the policy simple: trust the server when it talks, and
|
|
82
|
+
back off our own estimates when they prove insufficient. It is **not**
|
|
83
|
+
N-σ novelty detection (which we reject — see the section above); it
|
|
84
|
+
codifies the specific scar of "fixed-per-bucket backoff insufficient
|
|
85
|
+
during sustained upstream outage."
|
|
86
|
+
|
|
87
|
+
Counter reset: any round that completes without firing a new
|
|
88
|
+
`transient_error_detected` event clears all bucket counters back to zero.
|
|
89
|
+
|
|
90
|
+
> **Example**: Gateway 2026-05-18 reported sustained 5xx + 529 from
|
|
91
|
+
> Anthropic where our previous fixed 60s wait was too short — the next
|
|
92
|
+
> round hit the same error, waited 60s again, and again. Rejected: adding
|
|
93
|
+
> a config knob (`[runtime] transient_backoff_strategy = "fixed" |
|
|
94
|
+
> "exp"`). Instead: upgraded the default policy to exp backoff
|
|
95
|
+
> transparently, since "the default was wrong" is the right framing — not
|
|
96
|
+
> "the operator should pick between two strategies."
|
|
97
|
+
|
|
60
98
|
### Not an analytics database
|
|
61
99
|
|
|
62
100
|
No `--select`-able query language beyond simple peek selectors. No event
|
|
@@ -44,7 +44,7 @@ ALLOWED_SERVE_FROM = [
|
|
|
44
44
|
"agent_runner.round_log",
|
|
45
45
|
{"ROUND_CURRENT_LINK", "atomic_relink", "next_round_num", "prune_old_round_logs"},
|
|
46
46
|
),
|
|
47
|
-
("agent_runner._throttle", {"_check_throttle_state"}),
|
|
47
|
+
("agent_runner._throttle", {"_check_throttle_state", "reset_counters"}),
|
|
48
48
|
("agent_runner.runner", {"_apply_back_off"}),
|
|
49
49
|
]
|
|
50
50
|
|
{cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_claude_error_detector.py
RENAMED
|
@@ -613,3 +613,19 @@ def test_given_non_claude_binary_when_after_round_then_no_event(tmp_path):
|
|
|
613
613
|
with patch(f"{_MOD}.emit_agent_usage_recorded") as emit:
|
|
614
614
|
ClaudeErrorDetector().after_round(ctx, result)
|
|
615
615
|
emit.assert_not_called()
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def test_given_claude_log_with_529_overloaded_when_classified_then_api_transient_5xx(tmp_path):
|
|
619
|
+
"""Anthropic's "overloaded" status (529) should classify as api_transient_5xx,
|
|
620
|
+
not fall through as unknown error. Real scar — gateway hits this during
|
|
621
|
+
sustained Anthropic capacity issues.
|
|
622
|
+
"""
|
|
623
|
+
from agent_runner.builtin_plugins.claude_rate_limit import _parse_claude_log
|
|
624
|
+
|
|
625
|
+
log = tmp_path / "round-1.log"
|
|
626
|
+
log.write_text(
|
|
627
|
+
'{"type":"result","is_error":true,"api_error_status":529,"result":"Overloaded"}\n',
|
|
628
|
+
encoding="utf-8",
|
|
629
|
+
)
|
|
630
|
+
parsed = _parse_claude_log(log)
|
|
631
|
+
assert parsed["transient_error"]["classification"] == "api_transient_5xx"
|