cli-agent-runner 0.1.26__tar.gz → 0.1.28__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/CHANGELOG.md +30 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/PKG-INFO +1 -1
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/_emit.py +16 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/_version.py +2 -2
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/api_types.py +0 -4
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/builtin_plugins/claude_rate_limit.py +24 -6
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/builtin_plugins/gemini.py +23 -2
- cli_agent_runner-0.1.28/docs/migrations/0.1.27.md +169 -0
- cli_agent_runner-0.1.28/docs/migrations/0.1.28.md +176 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/_test_helpers.py +8 -2
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_types.py +9 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_claude_error_detector.py +147 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_gemini_plugin.py +94 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.codecov.yml +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.githooks/commit-msg +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.github/workflows/ci.yml +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.github/workflows/release.yml +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.gitignore +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.vulture-whitelist.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/CODE_OF_CONDUCT.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/CONTRIBUTING.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/LICENSE +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/README.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/README.zh.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/SECURITY.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/__init__.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/_docgen.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/_registry.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/_substrate.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/_throttle.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/agent_runtime.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/api.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/builtin_plugins/__init__.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/builtin_plugins/_constants.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/__init__.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/__main__.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/common.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/init_cmd.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/install_cmd.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/monitor_cmd.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/peek_cmd.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/round_cmd.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/serve_cmd.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/service_cmd.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/upgrade_cmd.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/config.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/context_store.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/defenses.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/detector_helpers.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/events.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/hooks.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/http_progress.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/lifecycle.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/metrics.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/monitor.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/presets/__init__.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/presets/aider.toml +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/presets/claude.toml +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/presets/gemini.toml +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/prompt_loader.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/round_log.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/round_view.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/runner.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/scaffold.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/service_unit.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/startup_check.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/vcs_state.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/build.sh +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/deploy/example-agent-runner.toml +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/deploy/launchd.plist.tmpl +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/deploy/run-loop.sh +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/deploy/systemd.service.tmpl +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/README.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/architecture.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/commands.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/configuration.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/events.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/long-running-agents.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/marketing/README.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/marketing/promo-cn.html +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.16.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.17.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.19.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.20.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.21.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.22.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.23.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.24.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.25.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.26.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/plugins.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/quickstart.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/recipes/aider.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/runbook.md +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/pyproject.toml +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/__init__.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/conftest.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/contract/__init__.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/contract/test_public_api_surface.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/e2e/__init__.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/e2e/conftest.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/e2e/test_e2e_graceful_stop.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/e2e/test_e2e_install_systemd.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/e2e/test_e2e_monitor_remote.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/e2e/test_e2e_round_lifecycle.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/__init__.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_bounded_run.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_context_enricher_namespacing.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_fresh_eyes_signal.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_install_dry_run.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_monitor_seeded.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_plugin_detector_loaded.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_plugin_owned_paths.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_plugin_real_flow.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_run_one_round_with_fake_agent.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_scaffold_presets.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_serve_loop.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_substrate_fingerprint.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_transient_error_backoff.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/__init__.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_architecture.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_atomic_write_enforced.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_catalogs.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_docs_generated.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_event_kind_registry.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_events_doc_contract.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_module_boundaries.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_module_sizes.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_no_ai_signatures.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_no_pytest_skip_on_parse_fail.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_peek_schema_version.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_repo_constants_patched_in_tests.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_round_result_stable.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_stash_uses_sha_not_index.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/literate/__init__.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/literate/parser.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/literate/test_parser.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/literate/test_quickstart.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/__init__.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_agent_runtime.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_assemble_prompt.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_events_stream.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_install.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_observation.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_read_round_num.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_resolve_phase.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_service.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_auto_stop_gating.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_cli.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_cli_common.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_cli_init_install.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_cli_monitor_http.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_cli_service_peek_monitor.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_cli_upgrade.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_config.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_config_fresh_eyes.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_config_max_rounds.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_config_rate_limit_action.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_config_stop_file.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_config_substrate_fingerprint_paths.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_config_transient_error_action.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_context_store.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_defenses.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_detector_helpers.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_detector_protocol.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_docgen.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_events.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_fresh_eyes_trigger.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_hook_failure_isolation.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_hooks.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_http_progress.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_init_entry_points.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_lifecycle.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_metrics.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_monitor_assembly.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_monitor_detect_rate_limit.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_monitor_detectors.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_monitor_remote.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_peek_argparse.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_presets.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_prompt_loader.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_round_log_helpers.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_round_view.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_runner.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_runner_throttle.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_scaffold.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_serve_cmd_bounded.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_serve_round_log.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_serve_sentinel.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_serve_startup_hooks.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_service_unit.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_startup_check.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_substrate.py +0 -0
- {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_vcs_state.py +0 -0
|
@@ -7,6 +7,36 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.1.28] - 2026-05-17
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- `agent_usage_recorded` event: new fields `cache_creation_tokens` (claude only, 0 for gemini),
|
|
14
|
+
`tool_call_count`, `phase`, `success`. Enables full cost reconciliation and phase/status
|
|
15
|
+
segmentation by consumers.
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
- gemini `models_breakdown` per-model entries no longer include raw `input` / `cached` keys;
|
|
19
|
+
canonical `input_tokens` / `cached_tokens` only. Consumers reading raw keys must migrate.
|
|
20
|
+
|
|
21
|
+
### Removed
|
|
22
|
+
- `agent_runner.api_types.ThrottleState` dead alias (0.1.23 back-compat; deprecation window
|
|
23
|
+
passed; switch to `TransientErrorState`).
|
|
24
|
+
|
|
25
|
+
See `docs/migrations/0.1.28.md`.
|
|
26
|
+
|
|
27
|
+
## [0.1.27] - 2026-05-17
|
|
28
|
+
|
|
29
|
+
### Fixed
|
|
30
|
+
- claude plugin: rate_limit_event with rateLimitType=null no longer misclassified as account
|
|
31
|
+
5h quota; falls through to api_error_status-based bucket (e.g. infra 429 → rate_limit_model).
|
|
32
|
+
Affects supervisors consuming transient_error_detected.
|
|
33
|
+
|
|
34
|
+
### Added
|
|
35
|
+
- docs/migrations/0.1.27.md: supervisor usage guide for transient_error_detected event
|
|
36
|
+
(4-bucket dispatch table + back-off recipe).
|
|
37
|
+
|
|
38
|
+
See `docs/migrations/0.1.27.md`.
|
|
39
|
+
|
|
10
40
|
## [0.1.26] - 2026-05-17
|
|
11
41
|
|
|
12
42
|
- Fix claude `agent_usage_recorded` `model` field (was always
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cli-agent-runner
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.28
|
|
4
4
|
Summary: Restart-on-exit supervisor for autonomous CLI agents
|
|
5
5
|
Project-URL: Homepage, https://github.com/wan9yu/cli-agent-runner
|
|
6
6
|
Project-URL: Documentation, https://github.com/wan9yu/cli-agent-runner#readme
|
|
@@ -216,6 +216,10 @@ def emit_agent_usage_recorded(
|
|
|
216
216
|
cost_usd: float | None,
|
|
217
217
|
duration_ms: int,
|
|
218
218
|
models_breakdown: dict[str, dict[str, int]] | None = None,
|
|
219
|
+
cache_creation_tokens: int = 0,
|
|
220
|
+
tool_call_count: int = 0,
|
|
221
|
+
phase: str = "",
|
|
222
|
+
success: bool = True,
|
|
219
223
|
) -> None:
|
|
220
224
|
"""Emit per-round usage record from a CLI plugin.
|
|
221
225
|
|
|
@@ -229,6 +233,14 @@ def emit_agent_usage_recorded(
|
|
|
229
233
|
(gemini has no cost field; claude exposes total_cost_usd).
|
|
230
234
|
- ``models_breakdown``: only populated when a round used multiple models
|
|
231
235
|
(gemini multi-model rounds). None for claude (always single-model).
|
|
236
|
+
- ``cache_creation_tokens``: claude only — ``usage.cache_creation_input_tokens``,
|
|
237
|
+
independent count from ``cached_tokens`` (cache_read). Billed at ~25% premium
|
|
238
|
+
over fresh input per Anthropic pricing. Gemini has no equivalent → 0.
|
|
239
|
+
- ``tool_call_count``: number of tool invocations the agent made in the round.
|
|
240
|
+
Claude: count of ``tool_use`` content blocks across all assistant events.
|
|
241
|
+
Gemini: ``stats.tool_calls``.
|
|
242
|
+
- ``phase``: phase label from HookContext (e.g. "planning"); empty string when None.
|
|
243
|
+
- ``success``: True when exit_code == 0 and not timed_out.
|
|
232
244
|
"""
|
|
233
245
|
from agent_runner.events import AGENT_USAGE_RECORDED, emit
|
|
234
246
|
|
|
@@ -244,6 +256,10 @@ def emit_agent_usage_recorded(
|
|
|
244
256
|
cost_usd=cost_usd,
|
|
245
257
|
duration_ms=duration_ms,
|
|
246
258
|
models_breakdown=models_breakdown,
|
|
259
|
+
cache_creation_tokens=cache_creation_tokens,
|
|
260
|
+
tool_call_count=tool_call_count,
|
|
261
|
+
phase=phase,
|
|
262
|
+
success=success,
|
|
247
263
|
)
|
|
248
264
|
|
|
249
265
|
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.1.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
21
|
+
__version__ = version = '0.1.28'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 1, 28)
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -64,7 +64,13 @@ class ClaudeErrorDetector:
|
|
|
64
64
|
)
|
|
65
65
|
|
|
66
66
|
if parsed.get("usage"):
|
|
67
|
-
emit_agent_usage_recorded(
|
|
67
|
+
emit_agent_usage_recorded(
|
|
68
|
+
ctx.log_dir,
|
|
69
|
+
round_num=ctx.round_num,
|
|
70
|
+
phase=ctx.phase or "",
|
|
71
|
+
success=(result.exit_code == 0 and not result.timed_out),
|
|
72
|
+
**parsed["usage"],
|
|
73
|
+
)
|
|
68
74
|
|
|
69
75
|
|
|
70
76
|
def _parse_claude_log(log_path: Path) -> dict[str, Any]:
|
|
@@ -77,6 +83,7 @@ def _parse_claude_log(log_path: Path) -> dict[str, Any]:
|
|
|
77
83
|
rate_limit_info: dict | None = None
|
|
78
84
|
result_event: dict | None = None
|
|
79
85
|
assistant_model: str | None = None
|
|
86
|
+
tool_call_count = 0
|
|
80
87
|
for line in tail:
|
|
81
88
|
line = line.strip()
|
|
82
89
|
if not line:
|
|
@@ -97,6 +104,11 @@ def _parse_claude_log(log_path: Path) -> dict[str, Any]:
|
|
|
97
104
|
model_val = msg.get("model") if isinstance(msg, dict) else None
|
|
98
105
|
if model_val:
|
|
99
106
|
assistant_model = str(model_val)
|
|
107
|
+
content = msg.get("content", []) if isinstance(msg, dict) else []
|
|
108
|
+
if isinstance(content, list):
|
|
109
|
+
tool_call_count += sum(
|
|
110
|
+
1 for c in content if isinstance(c, dict) and c.get("type") == "tool_use"
|
|
111
|
+
)
|
|
100
112
|
|
|
101
113
|
out: dict[str, Any] = {}
|
|
102
114
|
|
|
@@ -105,7 +117,9 @@ def _parse_claude_log(log_path: Path) -> dict[str, Any]:
|
|
|
105
117
|
out["transient_error"] = error_payload
|
|
106
118
|
|
|
107
119
|
if result_event is not None:
|
|
108
|
-
usage_payload = _extract_usage(
|
|
120
|
+
usage_payload = _extract_usage(
|
|
121
|
+
result_event, model=assistant_model, tool_call_count=tool_call_count
|
|
122
|
+
)
|
|
109
123
|
if usage_payload is not None:
|
|
110
124
|
out["usage"] = usage_payload
|
|
111
125
|
|
|
@@ -118,13 +132,15 @@ def _classify_transient_error(
|
|
|
118
132
|
"""Refactored from prior _scan_log_for_transient_error 0.1.23 logic; same shape, same
|
|
119
133
|
priority (rate_limit_event.rejected > 429 > 5xx > 408).
|
|
120
134
|
"""
|
|
121
|
-
if rate_limit_info is not None:
|
|
135
|
+
if rate_limit_info is not None and rate_limit_info.get("rateLimitType") == "five_hour":
|
|
122
136
|
return {
|
|
123
137
|
"classification": "rate_limit_account",
|
|
124
138
|
"agent": "claude",
|
|
125
139
|
"reset_at_epoch": int(rate_limit_info.get("resetsAt", time.time() + 300)),
|
|
126
140
|
"raw": str((result_event or {}).get("result", ""))[:_RAW_CAP],
|
|
127
141
|
}
|
|
142
|
+
# rate_limit_event with null/other rateLimitType falls through to status-based
|
|
143
|
+
# classification below.
|
|
128
144
|
if result_event is None or result_event.get("is_error") is not True:
|
|
129
145
|
return None
|
|
130
146
|
status = result_event.get("api_error_status")
|
|
@@ -138,7 +154,7 @@ def _classify_transient_error(
|
|
|
138
154
|
return None
|
|
139
155
|
|
|
140
156
|
|
|
141
|
-
def _extract_usage(result_event: dict, *, model: str | None) -> dict | None:
|
|
157
|
+
def _extract_usage(result_event: dict, *, model: str | None, tool_call_count: int) -> dict | None:
|
|
142
158
|
"""Extract usage payload from claude result event.
|
|
143
159
|
|
|
144
160
|
Returns None if no usage field present.
|
|
@@ -149,8 +165,8 @@ def _extract_usage(result_event: dict, *, model: str | None) -> dict | None:
|
|
|
149
165
|
(they're independent counts). Earlier 0.1.24 simplify pass incorrectly
|
|
150
166
|
subtracted cached from input; 0.1.26 reverts to the correct direct read.
|
|
151
167
|
- ``cached_tokens`` is cache reads only (``cache_read_input_tokens``).
|
|
152
|
-
|
|
153
|
-
|
|
168
|
+
- ``cache_creation_tokens`` is ``cache_creation_input_tokens`` (write cost,
|
|
169
|
+
billed at ~25% premium over fresh input per Anthropic pricing).
|
|
154
170
|
- ``models_breakdown`` always None for claude (single-model per round);
|
|
155
171
|
only populated by gemini multi-model rounds.
|
|
156
172
|
- ``model`` from caller — ``_parse_claude_log`` tracks the latest
|
|
@@ -166,9 +182,11 @@ def _extract_usage(result_event: dict, *, model: str | None) -> dict | None:
|
|
|
166
182
|
"input_tokens": int(usage.get("input_tokens", 0)),
|
|
167
183
|
"output_tokens": int(usage.get("output_tokens", 0)),
|
|
168
184
|
"cached_tokens": int(usage.get("cache_read_input_tokens", 0)),
|
|
185
|
+
"cache_creation_tokens": int(usage.get("cache_creation_input_tokens", 0)),
|
|
169
186
|
"cost_usd": result_event.get("total_cost_usd"),
|
|
170
187
|
"duration_ms": int(result_event.get("duration_ms", 0)),
|
|
171
188
|
"models_breakdown": None,
|
|
189
|
+
"tool_call_count": tool_call_count,
|
|
172
190
|
}
|
|
173
191
|
|
|
174
192
|
|
|
@@ -42,7 +42,13 @@ class GeminiErrorDetector:
|
|
|
42
42
|
te = parsed["transient_error"]
|
|
43
43
|
emit_transient_error_detected(ctx.log_dir, round_num=ctx.round_num, **te)
|
|
44
44
|
if parsed.get("usage"):
|
|
45
|
-
emit_agent_usage_recorded(
|
|
45
|
+
emit_agent_usage_recorded(
|
|
46
|
+
ctx.log_dir,
|
|
47
|
+
round_num=ctx.round_num,
|
|
48
|
+
phase=ctx.phase or "",
|
|
49
|
+
success=(result.exit_code == 0 and not result.timed_out),
|
|
50
|
+
**parsed["usage"],
|
|
51
|
+
)
|
|
46
52
|
|
|
47
53
|
|
|
48
54
|
def _parse_gemini_log(log_path: Path) -> dict[str, Any]:
|
|
@@ -104,15 +110,30 @@ def _extract_usage(stats: dict[str, Any]) -> dict[str, Any]:
|
|
|
104
110
|
primary_model = (
|
|
105
111
|
max(models, key=lambda m: models[m].get("total_tokens", 0)) if models else "unknown"
|
|
106
112
|
)
|
|
113
|
+
breakdown = (
|
|
114
|
+
{
|
|
115
|
+
name: {
|
|
116
|
+
"total_tokens": int(m.get("total_tokens", 0)),
|
|
117
|
+
"input_tokens": int(m.get("input_tokens", m.get("input", 0))),
|
|
118
|
+
"output_tokens": int(m.get("output_tokens", 0)),
|
|
119
|
+
"cached_tokens": int(m.get("cached", 0)),
|
|
120
|
+
}
|
|
121
|
+
for name, m in models.items()
|
|
122
|
+
}
|
|
123
|
+
if len(models) > 1
|
|
124
|
+
else None
|
|
125
|
+
)
|
|
107
126
|
return {
|
|
108
127
|
"agent": "gemini",
|
|
109
128
|
"model": primary_model,
|
|
110
129
|
"input_tokens": int(stats.get("input", 0)),
|
|
111
130
|
"output_tokens": int(stats.get("output_tokens", 0)),
|
|
112
131
|
"cached_tokens": int(stats.get("cached", 0)),
|
|
132
|
+
"cache_creation_tokens": 0, # gemini has no cache-creation concept
|
|
113
133
|
"cost_usd": None, # gemini doesn't expose USD
|
|
114
134
|
"duration_ms": int(stats.get("duration_ms", 0)),
|
|
115
|
-
"models_breakdown":
|
|
135
|
+
"models_breakdown": breakdown,
|
|
136
|
+
"tool_call_count": int(stats.get("tool_calls", 0)),
|
|
116
137
|
}
|
|
117
138
|
|
|
118
139
|
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# 0.1.27 — Rate-limit classifier fix + supervisor usage guide
|
|
2
|
+
|
|
3
|
+
## What changed
|
|
4
|
+
|
|
5
|
+
The claude built-in plugin (`agent_runner.builtin_plugins.claude_rate_limit`) previously
|
|
6
|
+
misclassified any `rate_limit_event` with `status="rejected"` as `rate_limit_account`
|
|
7
|
+
(account-level 5-hour quota exhaustion), regardless of `rateLimitType`. As of 0.1.27, the
|
|
8
|
+
`rate_limit_account` branch requires `rateLimitType == "five_hour"`; other `rate_limit_event`
|
|
9
|
+
values (e.g. `rateLimitType: null` for claude.ai infrastructure throttling) fall through to
|
|
10
|
+
status-code-based classification.
|
|
11
|
+
|
|
12
|
+
Concretely: a claude.ai 429 with `rateLimitType: null` is now correctly emitted as
|
|
13
|
+
`transient_error_detected` with `classification: "rate_limit_model"` and a 60-second default
|
|
14
|
+
`reset_at_epoch`, instead of `rate_limit_account` with a synthetic 5-minute fallback epoch.
|
|
15
|
+
|
|
16
|
+
No event schema changes. No new event kinds. No new public API.
|
|
17
|
+
|
|
18
|
+
## Supervisor usage — consuming `transient_error_detected`
|
|
19
|
+
|
|
20
|
+
External supervisors should subscribe to the `transient_error_detected` event family (added in
|
|
21
|
+
0.1.23). The event carries a `classification` discriminator with one of 4 values:
|
|
22
|
+
|
|
23
|
+
| classification | Trigger | reset_at_epoch semantics | Suggested supervisor action |
|
|
24
|
+
|--------------------|------------------------------------------------------|-------------------------------------|-------------------------------------------|
|
|
25
|
+
| rate_limit_account | rate_limit_event.rateLimitType == "five_hour" | Server-provided `resetsAt` (exact) | Sleep until reset_at_epoch (multi-hour) |
|
|
26
|
+
| rate_limit_model | api_error_status == 429 (and not five_hour) | now + 60s default (no server hint) | Sleep until reset_at_epoch or exp-backoff |
|
|
27
|
+
| api_transient_5xx | api_error_status in {500, 502, 503, 504} | now + 60s default | Sleep until reset_at_epoch or exp-backoff |
|
|
28
|
+
| api_timeout | api_error_status == 408 | now + 30s default | Sleep until reset_at_epoch or exp-backoff |
|
|
29
|
+
|
|
30
|
+
### Event payload shape
|
|
31
|
+
|
|
32
|
+
Each line in `events-YYYY-MM.jsonl` looks like:
|
|
33
|
+
|
|
34
|
+
```json
|
|
35
|
+
{
|
|
36
|
+
"ts": "2026-05-17T02:13:44.123Z",
|
|
37
|
+
"event": "transient_error_detected",
|
|
38
|
+
"classification": "rate_limit_model",
|
|
39
|
+
"agent": "claude",
|
|
40
|
+
"reset_at_epoch": 1747450424,
|
|
41
|
+
"round_num": 7,
|
|
42
|
+
"raw": "API Error: Server is temporarily limiting requests (not your usage limit) · Rate limited"
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Fields:
|
|
47
|
+
|
|
48
|
+
- `ts` — ISO 8601 UTC timestamp of event emission.
|
|
49
|
+
- `event` — always `"transient_error_detected"`.
|
|
50
|
+
- `classification` — one of the 4 buckets above.
|
|
51
|
+
- `agent` — `"claude"` (gemini uses same schema via its own plugin).
|
|
52
|
+
- `reset_at_epoch` — Unix epoch seconds; supervisor sleeps until this time. For
|
|
53
|
+
`rate_limit_account` this is the server-provided exact unblock time; for all other buckets
|
|
54
|
+
it is `now + default_seconds` at the moment of emission.
|
|
55
|
+
- `round_num` — which agent round triggered the error.
|
|
56
|
+
- `raw` — first 200 chars of the result text (useful for logging/alerting).
|
|
57
|
+
|
|
58
|
+
### Dispatch recipe (Python)
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
import time
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def handle_transient_error(event: dict) -> None:
|
|
65
|
+
"""React to a transient_error_detected event from agent-runner."""
|
|
66
|
+
bucket = event["classification"]
|
|
67
|
+
reset_at = event["reset_at_epoch"]
|
|
68
|
+
now = time.time()
|
|
69
|
+
wait_s = max(reset_at - now, 0)
|
|
70
|
+
|
|
71
|
+
if bucket == "rate_limit_account":
|
|
72
|
+
# Server-provided exact unblock time; respect it (multi-hour wait typical).
|
|
73
|
+
time.sleep(wait_s)
|
|
74
|
+
elif bucket == "rate_limit_model":
|
|
75
|
+
# Infra-level 429; 60s default. Apply your own exp-backoff curve if desired.
|
|
76
|
+
time.sleep(wait_s) # or: time.sleep(exp_backoff_with_cap(attempts, cap=300))
|
|
77
|
+
elif bucket == "api_transient_5xx":
|
|
78
|
+
# Transient server error; 60s default.
|
|
79
|
+
time.sleep(wait_s)
|
|
80
|
+
elif bucket == "api_timeout":
|
|
81
|
+
# Request timed out; 30s default.
|
|
82
|
+
time.sleep(wait_s)
|
|
83
|
+
else:
|
|
84
|
+
# Unknown future bucket — safe fallback.
|
|
85
|
+
time.sleep(max(wait_s, 30))
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Default back-off vs. your own curve
|
|
89
|
+
|
|
90
|
+
Agent-runner's defaults (`rate_limit_model` and `api_transient_5xx` → 60s, `api_timeout` → 30s)
|
|
91
|
+
are a conservative baseline — a flat one-shot sleep. Supervisors that track consecutive failures
|
|
92
|
+
may apply an exponential curve with a cap (e.g. 30s → 60s → 120s → 300s max) for
|
|
93
|
+
`rate_limit_model` and `api_transient_5xx`. For `rate_limit_account`, always respect
|
|
94
|
+
`reset_at_epoch` verbatim — the server provides the exact unblock time.
|
|
95
|
+
|
|
96
|
+
## Migration from legacy `rate_limit_rejected` event
|
|
97
|
+
|
|
98
|
+
Consumers that still listen to `rate_limit_rejected` (added in 0.1.20) continue to receive it
|
|
99
|
+
for `rate_limit_account` events only. It is emitted as a back-compat dual-emit alongside
|
|
100
|
+
`transient_error_detected`. New consumers should subscribe to `transient_error_detected` for
|
|
101
|
+
full 4-bucket coverage; `rate_limit_rejected` only fires for the `five_hour` bucket and carries
|
|
102
|
+
no `classification` field.
|
|
103
|
+
|
|
104
|
+
## Verification
|
|
105
|
+
|
|
106
|
+
Write the incident JSONL into a temporary file and call `_parse_claude_log` directly:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
mkdir -p /tmp/verify-0.1.27
|
|
110
|
+
cat > /tmp/verify-0.1.27/round-1.log <<'EOF'
|
|
111
|
+
{"type":"rate_limit_event","rate_limit_info":{"status":"rejected","rateLimitType":null}}
|
|
112
|
+
{"type":"assistant","message":{"model":"claude-opus-4-7","content":[{"type":"text","text":"API Error: rate limited"}]}}
|
|
113
|
+
{"type":"result","is_error":true,"api_error_status":429,"result":"API Error: rate limited","usage":{"input_tokens":100,"output_tokens":10,"cache_read_input_tokens":0},"duration_ms":1000,"total_cost_usd":0.01}
|
|
114
|
+
EOF
|
|
115
|
+
|
|
116
|
+
.venv/bin/python -c "
|
|
117
|
+
from pathlib import Path
|
|
118
|
+
from agent_runner.builtin_plugins.claude_rate_limit import _parse_claude_log
|
|
119
|
+
import json
|
|
120
|
+
print(json.dumps(_parse_claude_log(Path('/tmp/verify-0.1.27/round-1.log')), indent=2, default=str))
|
|
121
|
+
"
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Expected output:
|
|
125
|
+
|
|
126
|
+
```json
|
|
127
|
+
{
|
|
128
|
+
"transient_error": {
|
|
129
|
+
"classification": "rate_limit_model",
|
|
130
|
+
"agent": "claude",
|
|
131
|
+
"reset_at_epoch": 1747450484,
|
|
132
|
+
"raw": "API Error: rate limited"
|
|
133
|
+
},
|
|
134
|
+
"usage": {
|
|
135
|
+
"agent": "claude",
|
|
136
|
+
"model": "claude-opus-4-7",
|
|
137
|
+
"input_tokens": 100,
|
|
138
|
+
"output_tokens": 10,
|
|
139
|
+
"cached_tokens": 10,
|
|
140
|
+
"cost_usd": 0.01,
|
|
141
|
+
"duration_ms": 1000
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
`classification` must be `"rate_limit_model"` and `reset_at_epoch` must be approximately
|
|
147
|
+
`now + 60`. Clean up with `rm -rf /tmp/verify-0.1.27`.
|
|
148
|
+
|
|
149
|
+
## Impact summary
|
|
150
|
+
|
|
151
|
+
Supervisors that dispatch semantically on `classification == "rate_limit_account"` (e.g. "this
|
|
152
|
+
is the 5-hour quota — wait until reset") will no longer trigger that path for infrastructure
|
|
153
|
+
429s with `rateLimitType: null`. Instead, a `rate_limit_model` event fires with a 60-second
|
|
154
|
+
`reset_at_epoch`.
|
|
155
|
+
|
|
156
|
+
Supervisors that dispatch only on `reset_at_epoch` (ignoring classification) will see a shorter
|
|
157
|
+
wait (60s instead of ~300s) for infra 429s — a net improvement.
|
|
158
|
+
|
|
159
|
+
Supervisors subscribed to the legacy `rate_limit_rejected` event are unaffected: that event
|
|
160
|
+
only fires for genuine `rate_limit_account` (five_hour) events, which continue to work as
|
|
161
|
+
before.
|
|
162
|
+
|
|
163
|
+
## What did NOT change
|
|
164
|
+
|
|
165
|
+
- `transient_error_detected` event schema — field names, field types unchanged.
|
|
166
|
+
- `_BACK_OFF_DEFAULTS` table — unchanged.
|
|
167
|
+
- Legacy `rate_limit_rejected` back-compat emission for `rate_limit_account` — unchanged.
|
|
168
|
+
- Other plugins (gemini) — no equivalent `rate_limit_event` semantics; not touched.
|
|
169
|
+
- Public API surface — no new functions, no new event kinds, no signature changes.
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# Migration Guide — 0.1.28
|
|
2
|
+
|
|
3
|
+
## What changed
|
|
4
|
+
|
|
5
|
+
Three areas changed in 0.1.28. First, `agent_usage_recorded` events now carry four additional
|
|
6
|
+
fields: `cache_creation_tokens`, `tool_call_count`, `phase`, and `success`. These are populated
|
|
7
|
+
by the built-in claude and gemini plugins; third-party plugins inheriting from the same
|
|
8
|
+
`emit_agent_usage_recorded` call get the new fields for free with safe defaults. Second, the
|
|
9
|
+
gemini plugin's `models_breakdown` per-model dict no longer passes through the raw `input` and
|
|
10
|
+
`cached` keys from the gemini JSONL; only canonical `input_tokens` and `cached_tokens` are
|
|
11
|
+
present. Consumers iterating `models_breakdown` entries need a one-line rename. Third, the
|
|
12
|
+
`ThrottleState` back-compat alias (introduced 0.1.23 when the class was renamed to
|
|
13
|
+
`TransientErrorState`) is removed; any import of `ThrottleState` now raises `ImportError`.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## New fields reference
|
|
18
|
+
|
|
19
|
+
| Field | Type | Semantics | claude value | gemini value |
|
|
20
|
+
|---|---|---|---|---|
|
|
21
|
+
| `cache_creation_tokens` | `int` | Tokens written into the prompt cache (billed at ~25 % premium over fresh input per Anthropic pricing). Independent count from `cached_tokens` (reads). | `usage.cache_creation_input_tokens` | `0` (no creation concept) |
|
|
22
|
+
| `tool_call_count` | `int` | Number of tool invocations the agent made in the round. | Count of `tool_use` content blocks across all assistant events in the round JSONL. | `stats.tool_calls` |
|
|
23
|
+
| `phase` | `str` | Phase label from `HookContext.phase`; empty string when the round has no phase. | `ctx.phase or ""` | `ctx.phase or ""` |
|
|
24
|
+
| `success` | `bool` | `True` when `exit_code == 0` and `timed_out` is `False`. | `result.exit_code == 0 and not result.timed_out` | same |
|
|
25
|
+
|
|
26
|
+
All four fields have safe defaults in `emit_agent_usage_recorded` (`0`, `0`, `""`, `True`),
|
|
27
|
+
so third-party plugins that call the function without the new kwargs continue to work without
|
|
28
|
+
modification.
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## Updated event payload example
|
|
33
|
+
|
|
34
|
+
### Claude variant
|
|
35
|
+
|
|
36
|
+
```json
|
|
37
|
+
{
|
|
38
|
+
"kind": "agent_usage_recorded",
|
|
39
|
+
"agent": "claude",
|
|
40
|
+
"model": "claude-opus-4-7",
|
|
41
|
+
"round_num": 3,
|
|
42
|
+
"input_tokens": 4200,
|
|
43
|
+
"output_tokens": 312,
|
|
44
|
+
"cached_tokens": 18900,
|
|
45
|
+
"cache_creation_tokens": 12223,
|
|
46
|
+
"cost_usd": 0.0812,
|
|
47
|
+
"duration_ms": 14470,
|
|
48
|
+
"models_breakdown": null,
|
|
49
|
+
"tool_call_count": 2,
|
|
50
|
+
"phase": "dev",
|
|
51
|
+
"success": true
|
|
52
|
+
}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Gemini variant
|
|
56
|
+
|
|
57
|
+
```json
|
|
58
|
+
{
|
|
59
|
+
"kind": "agent_usage_recorded",
|
|
60
|
+
"agent": "gemini",
|
|
61
|
+
"model": "gemini-3-flash-preview",
|
|
62
|
+
"round_num": 1,
|
|
63
|
+
"input_tokens": 4614,
|
|
64
|
+
"output_tokens": 91,
|
|
65
|
+
"cached_tokens": 15119,
|
|
66
|
+
"cache_creation_tokens": 0,
|
|
67
|
+
"cost_usd": null,
|
|
68
|
+
"duration_ms": 5337,
|
|
69
|
+
"models_breakdown": null,
|
|
70
|
+
"tool_call_count": 1,
|
|
71
|
+
"phase": "",
|
|
72
|
+
"success": true
|
|
73
|
+
}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Cost reconciliation recipe
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
def total_token_cost(event: dict) -> float | None:
|
|
82
|
+
"""Return estimated USD cost for a round.
|
|
83
|
+
|
|
84
|
+
For claude: use the recorded cost_usd (includes cache read + write billing).
|
|
85
|
+
For gemini: cost_usd is null; billing requires provider pricing sheet.
|
|
86
|
+
"""
|
|
87
|
+
if event.get("cost_usd") is not None:
|
|
88
|
+
return event["cost_usd"]
|
|
89
|
+
# gemini: no USD field; caller must apply provider pricing
|
|
90
|
+
# approximate: input_tokens * input_rate + output_tokens * output_rate + cached_tokens * cache_rate
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def audit_cache_efficiency(event: dict) -> dict:
|
|
95
|
+
"""Breakdown of cache hit vs creation vs fresh input for a claude round."""
|
|
96
|
+
return {
|
|
97
|
+
"fresh_input": event["input_tokens"],
|
|
98
|
+
"cache_read": event["cached_tokens"],
|
|
99
|
+
"cache_write": event["cache_creation_tokens"],
|
|
100
|
+
"total_throughput": (
|
|
101
|
+
event["input_tokens"] + event["cached_tokens"] + event["cache_creation_tokens"]
|
|
102
|
+
),
|
|
103
|
+
}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## `models_breakdown` migration
|
|
109
|
+
|
|
110
|
+
Pre-0.1.28, gemini multi-model rounds passed raw gemini stat keys directly into each
|
|
111
|
+
`models_breakdown` entry. Those entries contained both `input` (raw) and `input_tokens`
|
|
112
|
+
(canonical) with the same value, and `cached` (raw) alongside `cached_tokens` missing entirely.
|
|
113
|
+
|
|
114
|
+
0.1.28 normalises to canonical keys only:
|
|
115
|
+
|
|
116
|
+
| Pre-0.1.28 key | Removed? | Canonical replacement |
|
|
117
|
+
|---|---|---|
|
|
118
|
+
| `input` | yes | `input_tokens` |
|
|
119
|
+
| `cached` | yes | `cached_tokens` |
|
|
120
|
+
| `input_tokens` | kept | — |
|
|
121
|
+
| `output_tokens` | kept | — |
|
|
122
|
+
| `total_tokens` | kept | — |
|
|
123
|
+
|
|
124
|
+
Migration for any consumer iterating breakdown entries:
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
# Before
|
|
128
|
+
entry["input"] # raw gemini field
|
|
129
|
+
entry["cached"] # raw gemini field
|
|
130
|
+
|
|
131
|
+
# After
|
|
132
|
+
entry["input_tokens"] # canonical
|
|
133
|
+
entry["cached_tokens"] # canonical
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## `ThrottleState` import migration
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
# Before (raises ImportError on 0.1.28+)
|
|
142
|
+
from agent_runner.api_types import ThrottleState
|
|
143
|
+
|
|
144
|
+
# After
|
|
145
|
+
from agent_runner.api_types import TransientErrorState
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
`ThrottleState` was renamed to `TransientErrorState` in 0.1.23 when the `classification` field
|
|
149
|
+
was added. The alias was retained through 0.1.27. It is now removed.
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## Verification recipe
|
|
154
|
+
|
|
155
|
+
Run one round and inspect the emitted event:
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
cd /path/to/your/project
|
|
159
|
+
agent-runner serve --max-rounds 1
|
|
160
|
+
|
|
161
|
+
# Find today's events file
|
|
162
|
+
EVENTS=$(ls logs/events-*.jsonl | tail -1)
|
|
163
|
+
|
|
164
|
+
# Print the usage event with pretty JSON
|
|
165
|
+
grep '"kind":"agent_usage_recorded"' "$EVENTS" | python3 -m json.tool
|
|
166
|
+
|
|
167
|
+
# Confirm all new fields are present
|
|
168
|
+
grep '"kind":"agent_usage_recorded"' "$EVENTS" | python3 -c "
|
|
169
|
+
import json, sys
|
|
170
|
+
evt = json.loads(sys.stdin.read())
|
|
171
|
+
for field in ['cache_creation_tokens', 'tool_call_count', 'phase', 'success']:
|
|
172
|
+
assert field in evt, f'missing field: {field}'
|
|
173
|
+
print(f'{field}: {evt[field]}')
|
|
174
|
+
print('All new fields present.')
|
|
175
|
+
"
|
|
176
|
+
```
|
|
@@ -109,7 +109,13 @@ def read_events_for_current_month(log_dir: Path) -> list[dict]:
|
|
|
109
109
|
return [json.loads(line) for line in events_path.read_text().splitlines() if line.strip()]
|
|
110
110
|
|
|
111
111
|
|
|
112
|
-
def make_hook_context(
|
|
112
|
+
def make_hook_context(
|
|
113
|
+
tmp_path: Path,
|
|
114
|
+
*,
|
|
115
|
+
agent_name: str = "claude",
|
|
116
|
+
round_num: int = 1,
|
|
117
|
+
phase: str | None = None,
|
|
118
|
+
):
|
|
113
119
|
"""Build a minimal HookContext for plugin testing.
|
|
114
120
|
|
|
115
121
|
agent_log_path is populated to match where runner.py writes the
|
|
@@ -126,7 +132,7 @@ def make_hook_context(tmp_path: Path, *, agent_name: str = "claude", round_num:
|
|
|
126
132
|
log_dir=tmp_path,
|
|
127
133
|
project="testproj",
|
|
128
134
|
round_num=round_num,
|
|
129
|
-
phase=
|
|
135
|
+
phase=phase,
|
|
130
136
|
agent_name=agent_name,
|
|
131
137
|
agent_log_path=agent_log_path,
|
|
132
138
|
)
|
|
@@ -114,3 +114,12 @@ def test_given_state_default_when_constructed_then_recent_hook_failures_empty()
|
|
|
114
114
|
service=ServiceStatus(mode=ServiceMode.NONE, active=False),
|
|
115
115
|
)
|
|
116
116
|
assert state.recent_hook_failures == []
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def test_throttle_state_removed() -> None:
|
|
120
|
+
"""ThrottleState alias was deprecated 0.1.23, removed 0.1.28.
|
|
121
|
+
|
|
122
|
+
Consumers should switch to TransientErrorState.
|
|
123
|
+
"""
|
|
124
|
+
with pytest.raises(ImportError):
|
|
125
|
+
from agent_runner.api_types import ThrottleState # noqa: F401
|