cli-agent-runner 0.1.32__tar.gz → 0.1.33__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.gitignore +3 -0
  2. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/CHANGELOG.md +10 -0
  3. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/CONTRIBUTING.md +3 -8
  4. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/PKG-INFO +1 -1
  5. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/_emit.py +34 -9
  6. cli_agent_runner-0.1.33/agent_runner/_throttle.py +133 -0
  7. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/_version.py +2 -2
  8. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/builtin_plugins/_constants.py +18 -2
  9. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/serve_cmd.py +5 -0
  10. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/runner.py +20 -5
  11. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/architecture.md +1 -1
  12. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/commands.md +2 -0
  13. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/long-running-agents.md +5 -4
  14. cli_agent_runner-0.1.33/docs/migrations/0.1.33.md +88 -0
  15. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/plugins.md +12 -3
  16. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/thesis.md +38 -0
  17. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_architecture.py +1 -1
  18. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_claude_error_detector.py +16 -0
  19. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_events.py +61 -0
  20. cli_agent_runner-0.1.33/tests/unit/test_runner_throttle.py +339 -0
  21. cli_agent_runner-0.1.32/.githooks/commit-msg +0 -33
  22. cli_agent_runner-0.1.32/agent_runner/_throttle.py +0 -63
  23. cli_agent_runner-0.1.32/tests/unit/test_runner_throttle.py +0 -125
  24. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.codecov.yml +0 -0
  25. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  26. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  27. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  28. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  29. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.github/workflows/ci.yml +0 -0
  30. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.github/workflows/release.yml +0 -0
  31. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/.vulture-whitelist.py +0 -0
  32. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/CODE_OF_CONDUCT.md +0 -0
  33. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/LICENSE +0 -0
  34. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/README.md +0 -0
  35. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/README.zh.md +0 -0
  36. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/SECURITY.md +0 -0
  37. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/__init__.py +0 -0
  38. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/_docgen.py +0 -0
  39. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/_registry.py +0 -0
  40. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/_substrate.py +0 -0
  41. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/agent_runtime.py +0 -0
  42. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/api.py +0 -0
  43. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/api_types.py +0 -0
  44. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/builtin_plugins/__init__.py +0 -0
  45. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/builtin_plugins/claude_rate_limit.py +0 -0
  46. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/builtin_plugins/gemini.py +0 -0
  47. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/__init__.py +0 -0
  48. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/__main__.py +0 -0
  49. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/common.py +0 -0
  50. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/init_cmd.py +0 -0
  51. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/install_cmd.py +0 -0
  52. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/monitor_cmd.py +0 -0
  53. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/peek_cmd.py +0 -0
  54. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/round_cmd.py +0 -0
  55. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/service_cmd.py +0 -0
  56. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/cli/upgrade_cmd.py +0 -0
  57. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/config.py +0 -0
  58. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/context_store.py +0 -0
  59. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/defenses.py +0 -0
  60. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/detector_helpers.py +0 -0
  61. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/events.py +0 -0
  62. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/hooks.py +0 -0
  63. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/http_progress.py +0 -0
  64. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/lifecycle.py +0 -0
  65. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/metrics.py +0 -0
  66. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/monitor.py +0 -0
  67. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/presets/__init__.py +0 -0
  68. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/presets/aider.toml +0 -0
  69. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/presets/claude.toml +0 -0
  70. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/presets/gemini.toml +0 -0
  71. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/prompt_loader.py +0 -0
  72. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/round_log.py +0 -0
  73. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/round_view.py +0 -0
  74. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/scaffold.py +0 -0
  75. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/service_unit.py +0 -0
  76. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/startup_check.py +0 -0
  77. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/agent_runner/vcs_state.py +0 -0
  78. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/build.sh +0 -0
  79. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/deploy/example-agent-runner.toml +0 -0
  80. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/deploy/launchd.plist.tmpl +0 -0
  81. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/deploy/run-loop.sh +0 -0
  82. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/deploy/systemd.service.tmpl +0 -0
  83. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/README.md +0 -0
  84. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/configuration.md +0 -0
  85. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/events.md +0 -0
  86. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/marketing/README.md +0 -0
  87. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/marketing/promo-cn.html +0 -0
  88. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.16.md +0 -0
  89. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.17.md +0 -0
  90. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.19.md +0 -0
  91. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.20.md +0 -0
  92. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.21.md +0 -0
  93. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.22.md +0 -0
  94. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.23.md +0 -0
  95. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.24.md +0 -0
  96. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.25.md +0 -0
  97. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.26.md +0 -0
  98. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.27.md +0 -0
  99. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.28.md +0 -0
  100. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.29.md +0 -0
  101. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.30.md +0 -0
  102. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.31.md +0 -0
  103. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/migrations/0.1.32.md +0 -0
  104. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/quickstart.md +0 -0
  105. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/recipes/aider.md +0 -0
  106. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/docs/runbook.md +0 -0
  107. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/pyproject.toml +0 -0
  108. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/__init__.py +0 -0
  109. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/_test_helpers.py +0 -0
  110. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/conftest.py +0 -0
  111. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/contract/__init__.py +0 -0
  112. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/contract/test_public_api_surface.py +0 -0
  113. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/e2e/__init__.py +0 -0
  114. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/e2e/conftest.py +0 -0
  115. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/e2e/test_e2e_graceful_stop.py +0 -0
  116. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/e2e/test_e2e_install_systemd.py +0 -0
  117. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/e2e/test_e2e_monitor_remote.py +0 -0
  118. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/e2e/test_e2e_round_lifecycle.py +0 -0
  119. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/fixtures/cli-real-output/claude-2.1.143-assistant-tool-use.jsonl +0 -0
  120. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/fixtures/cli-real-output/claude-2.1.143-result-event.jsonl +0 -0
  121. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/fixtures/cli-real-output/gemini-0.42.0-result-event.jsonl +0 -0
  122. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/__init__.py +0 -0
  123. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_bounded_run.py +0 -0
  124. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_context_enricher_namespacing.py +0 -0
  125. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_fresh_eyes_signal.py +0 -0
  126. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_grace_kill_emission.py +0 -0
  127. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_install_dry_run.py +0 -0
  128. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_monitor_seeded.py +0 -0
  129. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_plugin_detector_loaded.py +0 -0
  130. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_plugin_owned_paths.py +0 -0
  131. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_plugin_real_flow.py +0 -0
  132. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_run_one_round_with_fake_agent.py +0 -0
  133. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_scaffold_presets.py +0 -0
  134. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_serve_loop.py +0 -0
  135. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_substrate_fingerprint.py +0 -0
  136. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/integration/test_transient_error_backoff.py +0 -0
  137. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/__init__.py +0 -0
  138. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_atomic_write_enforced.py +0 -0
  139. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_catalogs.py +0 -0
  140. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_classification_ssot.py +0 -0
  141. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_docs_generated.py +0 -0
  142. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_entry_points_resolve.py +0 -0
  143. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_event_kind_registry.py +0 -0
  144. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_event_kinds_ssot.py +0 -0
  145. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_events_doc_contract.py +0 -0
  146. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_layer_2_loop_size.py +0 -0
  147. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_module_boundaries.py +0 -0
  148. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_module_sizes.py +0 -0
  149. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_no_ai_signatures.py +0 -0
  150. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_no_pytest_skip_on_parse_fail.py +0 -0
  151. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_peek_schema_version.py +0 -0
  152. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_repo_constants_patched_in_tests.py +0 -0
  153. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_round_result_stable.py +0 -0
  154. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_stash_uses_sha_not_index.py +0 -0
  155. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/invariants/test_upstream_schema_canary.py +0 -0
  156. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/literate/__init__.py +0 -0
  157. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/literate/parser.py +0 -0
  158. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/literate/test_parser.py +0 -0
  159. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/literate/test_quickstart.py +0 -0
  160. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/__init__.py +0 -0
  161. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_agent_runtime.py +0 -0
  162. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_agent_runtime_grace.py +0 -0
  163. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_agent_runtime_progress.py +0 -0
  164. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_assemble_prompt.py +0 -0
  165. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_events_stream.py +0 -0
  166. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_install.py +0 -0
  167. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_observation.py +0 -0
  168. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_read_round_num.py +0 -0
  169. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_resolve_phase.py +0 -0
  170. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_service.py +0 -0
  171. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_api_types.py +0 -0
  172. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_auto_stop_gating.py +0 -0
  173. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_cli.py +0 -0
  174. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_cli_common.py +0 -0
  175. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_cli_init_install.py +0 -0
  176. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_cli_monitor_http.py +0 -0
  177. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_cli_service_peek_monitor.py +0 -0
  178. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_cli_upgrade.py +0 -0
  179. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_config.py +0 -0
  180. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_config_fresh_eyes.py +0 -0
  181. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_config_max_rounds.py +0 -0
  182. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_config_stop_file.py +0 -0
  183. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_config_substrate_fingerprint_paths.py +0 -0
  184. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_config_transient_error_action.py +0 -0
  185. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_context_store.py +0 -0
  186. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_defenses.py +0 -0
  187. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_detector_helpers.py +0 -0
  188. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_detector_protocol.py +0 -0
  189. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_docgen.py +0 -0
  190. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_fresh_eyes_trigger.py +0 -0
  191. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_gemini_plugin.py +0 -0
  192. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_hook_failure_isolation.py +0 -0
  193. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_hooks.py +0 -0
  194. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_http_progress.py +0 -0
  195. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_init_entry_points.py +0 -0
  196. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_lifecycle.py +0 -0
  197. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_metrics.py +0 -0
  198. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_monitor_assembly.py +0 -0
  199. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_monitor_detect_anomaly_repetitive.py +0 -0
  200. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_monitor_detect_rate_limit.py +0 -0
  201. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_monitor_detectors.py +0 -0
  202. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_monitor_remote.py +0 -0
  203. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_peek_argparse.py +0 -0
  204. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_peek_select.py +0 -0
  205. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_presets.py +0 -0
  206. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_prompt_loader.py +0 -0
  207. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_round_log_helpers.py +0 -0
  208. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_round_view.py +0 -0
  209. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_runner.py +0 -0
  210. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_scaffold.py +0 -0
  211. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_serve_cmd_bounded.py +0 -0
  212. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_serve_round_log.py +0 -0
  213. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_serve_sentinel.py +0 -0
  214. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_serve_startup_hooks.py +0 -0
  215. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_service_unit.py +0 -0
  216. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_startup_check.py +0 -0
  217. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_substrate.py +0 -0
  218. {cli_agent_runner-0.1.32 → cli_agent_runner-0.1.33}/tests/unit/test_vcs_state.py +0 -0
@@ -1,6 +1,9 @@
1
1
  # Internal working notes (specs, plans, drafts) — not for public repo.
2
2
  docs/internal/
3
3
 
4
+ # Local git hooks — opt-in per clone; CI lint-commits is the authoritative gate.
5
+ .githooks/
6
+
4
7
  # Python
5
8
  __pycache__/
6
9
  *.py[cod]
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.1.33] - 2026-05-19
11
+
12
+ ### Added
13
+ - `_5XX_STATUSES` includes 529 (Anthropic's "overloaded") — now classified as `api_transient_5xx`.
14
+ - Exp backoff for estimated-class transient errors (`rate_limit_model` / `api_transient_5xx` / `api_timeout`): consecutive failures multiply the wait `2^N` capped at 32× and 30 minutes absolute. Server-authoritative `rate_limit_account` unchanged.
15
+ - `transient_error_backoff_capped` event gains `original_reset_at_epoch`, `applied_reset_at_epoch`, `consecutive_count`, `capped_by_absolute_max` fields for backoff-curve observability.
16
+ - `docs/thesis.md` names the server-authoritative vs estimated reset principle.
17
+
18
+ See `docs/migrations/0.1.33.md`.
19
+
10
20
  ## [0.1.32] - 2026-05-18
11
21
 
12
22
  ### Added
@@ -9,7 +9,6 @@ git clone https://github.com/wan9yu/cli-agent-runner.git
9
9
  cd cli-agent-runner
10
10
  python3 -m venv .venv && source .venv/bin/activate
11
11
  pip install -e ".[dev]"
12
- git config core.hooksPath .githooks # enables the commit-msg lint hook
13
12
  ./build.sh check
14
13
  ```
15
14
 
@@ -17,13 +16,6 @@ git config core.hooksPath .githooks # enables the commit-msg lint hook
17
16
  + integration tests, the literate quickstart, and the docs CI gate. It's
18
17
  what GitHub Actions runs on every push and PR.
19
18
 
20
- `git config core.hooksPath .githooks` activates the in-repo
21
- [`.githooks/commit-msg`](.githooks/commit-msg) hook which rejects commit
22
- messages containing `Co-Authored-By:` trailers, robot emojis, or other
23
- AI-tool attribution patterns. The same check runs in CI (`lint-commits`
24
- job) and as a pytest invariant (`tests/invariants/test_no_ai_signatures.py`)
25
- — defense in depth.
26
-
27
19
  ## Workflow
28
20
 
29
21
  1. Open an issue first for non-trivial changes — saves wasted work on both sides.
@@ -33,6 +25,9 @@ job) and as a pytest invariant (`tests/invariants/test_no_ai_signatures.py`)
33
25
  5. Run `./build.sh check` locally before pushing.
34
26
  6. Conventional Commits: `feat:` / `fix:` / `docs:` / `refactor:` / `test:` /
35
27
  `chore:` / `ci:` / `build:` / `perf:`. Subjects in English, imperative mood.
28
+ CI (`lint-commits` job) and `tests/invariants/test_no_ai_signatures.py`
29
+ reject auto-generated trailers and robot signatures — keep messages
30
+ human-authored.
36
31
 
37
32
  ## Architecture / docs
38
33
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cli-agent-runner
3
- Version: 0.1.32
3
+ Version: 0.1.33
4
4
  Summary: Restart-on-exit supervisor for autonomous CLI agents
5
5
  Project-URL: Homepage, https://github.com/wan9yu/cli-agent-runner
6
6
  Project-URL: Documentation, https://github.com/wan9yu/cli-agent-runner#readme
@@ -281,15 +281,40 @@ def emit_transient_error_backoff_capped(
281
281
  agent: str,
282
282
  requested_sleep_s: int,
283
283
  applied_sleep_s: int,
284
+ original_reset_at_epoch: int | None = None,
285
+ applied_reset_at_epoch: int | None = None,
286
+ consecutive_count: int | None = None,
287
+ capped_by_absolute_max: bool | None = None,
284
288
  ) -> None:
285
- """Emit defensive event when computed back-off exceeded 8h cap."""
289
+ """Emit when supervisor adjusts the plugin-emitted transient back-off.
290
+
291
+ Fires in two cases:
292
+ 1. **Exp backoff applied** (0.1.33+): estimated-class transient errors
293
+ (`rate_limit_model` / `api_transient_5xx` / `api_timeout`) doubled
294
+ on consecutive failures. ``consecutive_count`` > 1, multiplier > 1×.
295
+ 2. **Defensive cap hit** (0.1.20+): malformed `reset_at_epoch` or the
296
+ 30-min absolute cap clipped the wait. ``capped_by_absolute_max`` True.
297
+
298
+ Fields ``original_reset_at_epoch`` / ``applied_reset_at_epoch`` /
299
+ ``consecutive_count`` / ``capped_by_absolute_max`` are 0.1.33+. Older
300
+ callers that pass only the first 4 kwargs continue to work; the new
301
+ fields are omitted from the payload when None.
302
+ """
286
303
  from agent_runner.events import TRANSIENT_ERROR_BACKOFF_CAPPED, emit
287
304
 
288
- emit(
289
- log_dir,
290
- TRANSIENT_ERROR_BACKOFF_CAPPED,
291
- classification=classification,
292
- agent=agent,
293
- requested_sleep_s=requested_sleep_s,
294
- applied_sleep_s=applied_sleep_s,
295
- )
305
+ kwargs: dict = {
306
+ "classification": classification,
307
+ "agent": agent,
308
+ "requested_sleep_s": requested_sleep_s,
309
+ "applied_sleep_s": applied_sleep_s,
310
+ }
311
+ if original_reset_at_epoch is not None:
312
+ kwargs["original_reset_at_epoch"] = original_reset_at_epoch
313
+ if applied_reset_at_epoch is not None:
314
+ kwargs["applied_reset_at_epoch"] = applied_reset_at_epoch
315
+ if consecutive_count is not None:
316
+ kwargs["consecutive_count"] = consecutive_count
317
+ if capped_by_absolute_max is not None:
318
+ kwargs["capped_by_absolute_max"] = capped_by_absolute_max
319
+
320
+ emit(log_dir, TRANSIENT_ERROR_BACKOFF_CAPPED, **kwargs)
@@ -0,0 +1,133 @@
1
+ """Throttle state helpers — read events.jsonl tail for transient error state.
2
+
3
+ Internal module. Callers: runner.py (serve loop back-off), api.py (peek).
4
+ Separated from runner.py to satisfy the ouroboros defense: runner.py writes
5
+ events.jsonl but must never read it back (§3 module boundary invariant).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import time
12
+ from collections import deque
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ from agent_runner.api_types import TransientErrorState
17
+
18
+
19
+ def _check_throttle_state(log_dir: Path) -> TransientErrorState | None:
20
+ """Scan events.jsonl tail for latest unmatched transient error.
21
+
22
+ Reads `transient_error_detected` / `transient_error_recovered` event names.
23
+ Returns TransientErrorState if currently throttled (reset still in future,
24
+ no matching recovered after). Restart-safe.
25
+ """
26
+ candidates = sorted(log_dir.glob("events-*.jsonl"))
27
+ if not candidates:
28
+ return None
29
+ with candidates[-1].open() as f:
30
+ tail = deque(f, maxlen=100)
31
+ events: list[dict[str, Any]] = []
32
+ for line in tail:
33
+ line = line.strip()
34
+ if not line:
35
+ continue
36
+ try:
37
+ events.append(json.loads(line))
38
+ except json.JSONDecodeError:
39
+ continue
40
+
41
+ latest_detected: dict[str, Any] | None = None
42
+ for ev in reversed(events):
43
+ kind = ev.get("event")
44
+ if kind == "transient_error_recovered":
45
+ return None
46
+ if kind == "transient_error_detected":
47
+ latest_detected = ev
48
+ break
49
+
50
+ if latest_detected is None:
51
+ return None
52
+ reset_at = int(latest_detected.get("reset_at_epoch", 0))
53
+ if reset_at <= time.time():
54
+ return None # Reset already passed without recovery emit; treat as recovered
55
+
56
+ classification = str(latest_detected.get("classification", "rate_limit_account"))
57
+
58
+ return TransientErrorState(
59
+ reset_at_epoch=reset_at,
60
+ classification=classification,
61
+ agent=str(latest_detected.get("agent", "unknown")),
62
+ since_round=int(latest_detected.get("round_num", 0)),
63
+ )
64
+
65
+
66
+ # Module-level supervisor state — bucket → consecutive-failure count.
67
+ # Cleared by reset_counters() or by serve restart.
68
+ _consecutive_failures: dict[str, int] = {}
69
+
70
+
71
+ def compute_adjusted_reset_at(
72
+ *,
73
+ classification: str,
74
+ original_reset_at_epoch: int,
75
+ agent: str,
76
+ log_dir: Path,
77
+ ) -> tuple[int, int, bool]:
78
+ """Apply exp backoff for estimated-class transient errors.
79
+
80
+ Returns (applied_reset_at_epoch, consecutive_count, capped_by_absolute_max).
81
+
82
+ For server-authoritative classification (``rate_limit_account``): returns
83
+ the original reset epoch verbatim, never increments the counter, and
84
+ never emits an adjustment event. Anthropic's resetsAt is authoritative.
85
+
86
+ For estimated classifications (``rate_limit_model``, ``api_transient_5xx``,
87
+ ``api_timeout``): increments the counter for this bucket, computes
88
+ duration = base × 2^min(n, _EXP_CAP), caps at _ABSOLUTE_CAP_S, emits
89
+ ``transient_error_backoff_capped`` if multiplier > 1 or capped.
90
+ """
91
+ from agent_runner._emit import emit_transient_error_backoff_capped
92
+ from agent_runner.builtin_plugins._constants import (
93
+ _ABSOLUTE_CAP_S,
94
+ _BACK_OFF_DEFAULTS,
95
+ _EXP_CAP,
96
+ )
97
+
98
+ if classification == "rate_limit_account":
99
+ # Server-authoritative: respect resetsAt verbatim, no counter touch.
100
+ return (original_reset_at_epoch, 0, False)
101
+
102
+ # Estimated class: apply exp backoff.
103
+ base = _BACK_OFF_DEFAULTS[classification]
104
+ n = _consecutive_failures.get(classification, 0)
105
+ multiplier = 2 ** min(n, _EXP_CAP)
106
+ extended_duration = base * multiplier
107
+ capped_by_absolute_max = extended_duration > _ABSOLUTE_CAP_S
108
+ applied_duration = min(extended_duration, _ABSOLUTE_CAP_S)
109
+ applied_reset_at = int(time.time()) + applied_duration
110
+
111
+ new_count = n + 1
112
+ _consecutive_failures[classification] = new_count
113
+
114
+ # Emit observability event when supervisor adjusted the wait.
115
+ if multiplier > 1 or capped_by_absolute_max:
116
+ emit_transient_error_backoff_capped(
117
+ log_dir,
118
+ classification=classification,
119
+ agent=agent,
120
+ requested_sleep_s=int(base),
121
+ applied_sleep_s=applied_duration,
122
+ original_reset_at_epoch=original_reset_at_epoch,
123
+ applied_reset_at_epoch=applied_reset_at,
124
+ consecutive_count=new_count,
125
+ capped_by_absolute_max=capped_by_absolute_max,
126
+ )
127
+
128
+ return (applied_reset_at, new_count, capped_by_absolute_max)
129
+
130
+
131
+ def reset_counters() -> None:
132
+ """Clear all bucket counters. Called by serve loop when no active throttle."""
133
+ _consecutive_failures.clear()
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.1.32'
22
- __version_tuple__ = version_tuple = (0, 1, 32)
21
+ __version__ = version = '0.1.33'
22
+ __version_tuple__ = version_tuple = (0, 1, 33)
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -21,9 +21,11 @@ _BACK_OFF_DEFAULTS: dict[str, int] = {
21
21
  }
22
22
 
23
23
  # 5xx codes treated as transient (retry-worthy server errors per RFC 9110):
24
- # 500=unexpected, 502=bad gateway, 503=unavailable, 504=gateway timeout.
24
+ # 500=unexpected, 502=bad gateway, 503=unavailable, 504=gateway timeout,
25
+ # 529=overloaded (Anthropic's non-RFC code emitted during sustained capacity
26
+ # issues; treated as transient per Anthropic SDK behavior).
25
27
  # Excluded: 501 (not implemented = permanent), 505 (HTTP version mismatch).
26
- _5XX_STATUSES: frozenset[int] = frozenset({500, 502, 503, 504})
28
+ _5XX_STATUSES: frozenset[int] = frozenset({500, 502, 503, 504, 529})
27
29
 
28
30
  _CLASSIFICATIONS: frozenset[str] = frozenset(
29
31
  {
@@ -38,3 +40,17 @@ _CLASSIFICATIONS: frozenset[str] = frozenset(
38
40
  rate_limit_account uses server-provided resetsAt (excluded from
39
41
  _BACK_OFF_DEFAULTS table); others use defaults from that table.
40
42
  """
43
+
44
+ _EXP_CAP: int = 5
45
+ """Maximum exponent for transient-error consecutive backoff: 2^5 = 32×.
46
+
47
+ Beyond this, the multiplier plateaus. Combined with _ABSOLUTE_CAP_S, this
48
+ prevents runaway wait times during sustained outages (max wait = 30min).
49
+ """
50
+
51
+ _ABSOLUTE_CAP_S: int = 1800
52
+ """Absolute upper bound on supervisor-applied transient back-off (30 min).
53
+
54
+ Applies after exp multiplier — even if base × 2^5 exceeds this, the wait
55
+ is clipped here. Defends against an indefinitely-stuck supervisor.
56
+ """
@@ -20,6 +20,7 @@ from pathlib import Path
20
20
 
21
21
  from agent_runner._substrate import compute_git_head, compute_paths_hash
22
22
  from agent_runner._throttle import _check_throttle_state
23
+ from agent_runner._throttle import reset_counters as _reset_counters
23
24
  from agent_runner.api import (
24
25
  check_self_terminated_sentinel,
25
26
  emit_fresh_eyes_round_triggered,
@@ -151,6 +152,10 @@ def cmd(args) -> int:
151
152
  elif action == "stop":
152
153
  emit_rate_limit_stop(log_dir)
153
154
  break
155
+ else:
156
+ # No active throttle this round — supervisor counters can reset.
157
+ # Next failure (if any) restarts the exp backoff curve from 1×.
158
+ _reset_counters()
154
159
  if stop_file is not None and stop_file.exists():
155
160
  try:
156
161
  content = stop_file.read_text(encoding="utf-8", errors="replace")[:200]
@@ -44,17 +44,32 @@ _BACK_OFF_JITTER_MAX_S = 30
44
44
 
45
45
 
46
46
  def _apply_back_off(log_dir: Path, throttle: TransientErrorState) -> None:
47
- """Sleep until throttle.reset_at_epoch + jitter; emit recovered (and capped if applicable).
47
+ """Sleep until adjusted reset_at + jitter; emit recovered (and capped if applicable).
48
48
 
49
- Capped at _BACK_OFF_CAP_S to defend against malformed reset epochs.
49
+ For estimated-class classifications (rate_limit_model / api_transient_5xx /
50
+ api_timeout), applies exp backoff on consecutive failures via
51
+ `_throttle.compute_adjusted_reset_at`. For server-authoritative
52
+ rate_limit_account, the original reset_at_epoch is used verbatim.
53
+
54
+ Defensive 8h cap retained as last-line defense against malformed reset
55
+ epochs (e.g. an external/manual event with a far-future reset_at).
50
56
  """
57
+ from agent_runner import _throttle
58
+
59
+ adjusted_reset_at, _consecutive_count, _capped = _throttle.compute_adjusted_reset_at(
60
+ classification=throttle.classification,
61
+ original_reset_at_epoch=throttle.reset_at_epoch,
62
+ agent=throttle.agent,
63
+ log_dir=log_dir,
64
+ )
65
+
51
66
  now = time.time()
52
67
  requested = (
53
- throttle.reset_at_epoch
54
- - now
55
- + random.uniform(_BACK_OFF_JITTER_MIN_S, _BACK_OFF_JITTER_MAX_S)
68
+ adjusted_reset_at - now + random.uniform(_BACK_OFF_JITTER_MIN_S, _BACK_OFF_JITTER_MAX_S)
56
69
  )
57
70
  if requested > _BACK_OFF_CAP_S:
71
+ # Defensive: malformed reset epoch (e.g. manual event with far-future ts).
72
+ # Exp backoff layer caps at 30min, so legitimate flow never hits this.
58
73
  api.emit_transient_error_backoff_capped(
59
74
  log_dir,
60
75
  classification=throttle.classification,
@@ -65,7 +65,7 @@ surfacing everywhere.
65
65
  | `event_kind_registry` | Prevent events.emit() typos / unregistered kinds slipping past CI | `tests/invariants/test_event_kind_registry.py` |
66
66
  <!-- /gen:defenses-table -->
67
67
 
68
- ## Monitor: 9 detectors
68
+ ## Monitor: 11 detectors
69
69
 
70
70
  Three categories by `auto_action`:
71
71
 
@@ -87,6 +87,8 @@ agent-runner peek
87
87
  agent-runner peek --json
88
88
  agent-runner peek --select system.disk_used_pct
89
89
  agent-runner peek --select defenses
90
+ agent-runner peek --select events.agent_usage_recorded --window 5 # 0.1.32+: native event-kind query
91
+ agent-runner peek --select events.transient_error_detected --window 20
90
92
  agent-runner peek --round 42 --log # drill into round 42, include log tail
91
93
  agent-runner peek --events 50 # last 50 events
92
94
  ```
@@ -216,10 +216,11 @@ the underlying problem is unbounded lineage on a shared resource.
216
216
  event family is now `transient_error_detected` with a `classification`
217
217
  field (`rate_limit_account`, `rate_limit_model`, `api_transient_5xx`,
218
218
  `api_timeout`). The same back-off mechanism covers all 4 classifications.
219
- The legacy `rate_limit_rejected` event is still dual-emitted for the
220
- `rate_limit_account` case only (no removal date set); new subscribers
221
- should consume `transient_error_detected` for full coverage. See
222
- `docs/migrations/0.1.27.md` for the consumer dispatch recipe.
219
+ The legacy `rate_limit_rejected` aliases were removed in 0.1.29 — subscribe
220
+ to `transient_error_detected` (filter by `classification == "rate_limit_account"`
221
+ if you only want 5h-quota events). See `docs/migrations/0.1.27.md` for the
222
+ consumer dispatch recipe and `docs/migrations/0.1.29.md` for alias-removal
223
+ migration recipes.
223
224
 
224
225
  ## Writing post_round_hook plugins
225
226
 
@@ -0,0 +1,88 @@
1
+ # 0.1.33 — Transient-error exp backoff + 529 classification
2
+
3
+ **Date**: 2026-05-19
4
+
5
+ ## What changed
6
+
7
+ Two improvements to how agent-runner handles transient errors from claude.ai:
8
+
9
+ 1. **Exp backoff for estimated-class transient errors**. When a round fires the same
10
+ estimated-class transient (`rate_limit_model` / `api_transient_5xx` / `api_timeout`)
11
+ after waiting our previous estimate, the supervisor doubles the next wait. Curve:
12
+ `2^N` capped at 32× and 30 minutes absolute. Counter resets when a round completes
13
+ without firing a new transient. Defaults upgraded transparently — no config knobs,
14
+ no consumer action.
15
+
16
+ 2. **529 status code now classified as `api_transient_5xx`**. Anthropic's "overloaded"
17
+ response (HTTP 529, non-RFC) is correctly handled as a transient — supervisor
18
+ throttles instead of immediately re-dispatching. Previously fell through to
19
+ "unknown error, no transient detection" and hammered the upstream.
20
+
21
+ Server-authoritative `rate_limit_account` (Anthropic's `resetsAt`) is unchanged —
22
+ the server's exact unblock time is respected verbatim, no exp backoff applied.
23
+
24
+ ## Backoff curve reference (estimated classes)
25
+
26
+ | Bucket | Base | 1× | 2× | 4× | 8× | 16× | 32× (cap) | Absolute cap |
27
+ |---|---|---|---|---|---|---|---|---|
28
+ | `rate_limit_model` | 60s | 60s | 120s | 240s | 480s | 960s | **1800s** | 30 min |
29
+ | `api_transient_5xx` | 60s | 60s | 120s | 240s | 480s | 960s | **1800s** | 30 min |
30
+ | `api_timeout` | 30s | 30s | 60s | 120s | 240s | 480s | 960s | 30 min |
31
+
32
+ Multiplier = `2^min(consecutive_count - 1, 5)`. After 5 consecutive failures the
33
+ multiplier plateaus at 32×. Effective wait may also be clipped by the 30-minute
34
+ absolute cap (e.g. `rate_limit_model` after 6 consecutive failures: `60 × 32 = 1920s`
35
+ clipped to `1800s`).
36
+
37
+ ## Observing the backoff curve
38
+
39
+ The `transient_error_backoff_capped` event now fires whenever the supervisor adjusts
40
+ the plugin-emitted wait — including the exp-backoff case (was previously
41
+ only-on-defensive-8h-cap).
42
+
43
+ ```bash
44
+ agent-runner peek --select events.transient_error_backoff_capped --window 20
45
+ ```
46
+
47
+ The payload includes:
48
+
49
+ - `original_reset_at_epoch` — what the plugin emitted (base × 1×)
50
+ - `applied_reset_at_epoch` — what the supervisor will actually sleep to
51
+ - `consecutive_count` — how many times this bucket fired in a row
52
+ - `capped_by_absolute_max` — whether the 30-min ceiling kicked in
53
+
54
+ ## Server-authoritative class (unchanged)
55
+
56
+ `rate_limit_account` events with Anthropic's `resetsAt` epoch are still respected
57
+ exactly. The exp backoff machinery never increments the counter for this bucket
58
+ and never emits `transient_error_backoff_capped`. The reasoning: server knows
59
+ when the 5-hour quota resets; second-guessing it would be counter-productive.
60
+
61
+ ## Tuning
62
+
63
+ No config knobs added. The curve parameters (base, multiplier, exp cap, absolute
64
+ cap) are hardcoded to sensible defaults. If your scenario needs different values,
65
+ open an issue with the specific case — we'll evaluate against
66
+ `docs/thesis.md` ("Not a remediation framework — defaults are right").
67
+
68
+ ## 529 callout
69
+
70
+ Anthropic returns HTTP 529 ("overloaded") during sustained capacity issues. This
71
+ is not in the RFC 9110 5xx set but Anthropic's SDK treats it as transient. Adding
72
+ it to `_5XX_STATUSES` keeps our classification consistent with upstream behavior.
73
+
74
+ ## No consumer action required
75
+
76
+ All changes are default-on or additive:
77
+
78
+ - Existing TOML keeps working unchanged.
79
+ - Existing event subscribers see a more populated `transient_error_backoff_capped`
80
+ payload (additive fields) — old fields retained.
81
+ - Plugins (`claude_error_detector`, `gemini_error_detector`) unchanged.
82
+ - CLI surface unchanged.
83
+
84
+ If your code reads `transient_error_backoff_capped` payload, the four new fields
85
+ (`original_reset_at_epoch`, `applied_reset_at_epoch`, `consecutive_count`,
86
+ `capped_by_absolute_max`) are absent for events emitted before 0.1.33 (or by the
87
+ defensive 8h-cap path which still uses only the old payload shape). Defensive
88
+ parsing recommended.
@@ -95,9 +95,18 @@ class HookContext:
95
95
  project: str
96
96
  round_num: int
97
97
  phase: str | None
98
- agent_name: str | None
98
+ agent_name: str | None # cosmetic name from [agent].name TOML
99
+ agent_binary: str | None # 0.1.30+: basename of agent.command[0]
100
+ # plus dry_run, anomaly_repetitive_*, agent_log_path — see source for full set
99
101
  ```
100
102
 
103
+ For capability detection (e.g. "is this round running claude?"), plugins
104
+ should check `ctx.agent_binary == "claude"`, NOT `ctx.agent_name`. The
105
+ former is the actual binary basename; the latter is user-cosmetic and
106
+ may be overridden in `[agent] name = "..."` (this was a real bug fixed
107
+ in 0.1.30 — strict `agent_name` check silently suppressed events when
108
+ operators set custom names).
109
+
101
110
  `PostRoundHook` additionally receives a `RoundResult` (`from agent_runner.api_types import RoundResult`).
102
111
  Its field set is stable across 0.1.x (additions only).
103
112
 
@@ -289,7 +298,7 @@ and applies the configured `transient_error_action` (default `back_off`;
289
298
  No configuration required to enable the detector; it activates for any
290
299
  project using claude as the agent CLI.
291
300
 
292
- Non-claude agents: the detector returns early when `ctx.agent_name != "claude"`.
301
+ Non-claude agents: the detector returns early when `ctx.agent_binary != "claude"`.
293
302
  Third-party plugin authors may use the same `register_post_round_hook` API
294
303
  to ship equivalent detectors for other agent CLIs — the bundled
295
304
  `gemini_error_detector` is a working reference.
@@ -297,7 +306,7 @@ to ship equivalent detectors for other agent CLIs — the bundled
297
306
  ## Custom monitor detectors (§3.3)
298
307
 
299
308
  0.1.5 adds a fourth extension point — plugin authors can ship custom monitor
300
- detectors that run alongside the 10 builtins on every monitor poll.
309
+ detectors that run alongside the 11 builtins on every monitor poll.
301
310
 
302
311
  ### Group + Protocol
303
312
 
@@ -51,12 +51,50 @@ then silence = hung agent. Not generic anomaly. Per-project variance in
51
51
  token usage and round duration is large enough that rolling-baseline alerting
52
52
  would produce constant false positives across diverse workloads.
53
53
 
54
+ The `anomaly_repetitive_active` detector (added 0.1.32) is the live example:
55
+ it fires when the claude plugin emits `anomaly_repetitive_tool` events
56
+ above a fixed threshold within a window — a specific signature, not N-σ.
57
+ `max_grace_after_result_s` (0.1.31) is another: kills the subprocess after
58
+ a fixed grace following the `result` event — specific signature, not "is
59
+ this subprocess behaving unusually".
60
+
54
61
  > **Example**: A 2026-05-18 proposal requested a "cost spike detector" that
55
62
  > fires when this round's cost is N× the rolling 7-day average. Rejected.
56
63
  > The rolling baseline itself requires aggregation we don't own, and the
57
64
  > threshold N is project-specific. A consumer can compute this from the flat
58
65
  > events file.
59
66
 
67
+ ### How we handle transient errors: server-authoritative vs estimated
68
+
69
+ `transient_error_detected` events carry a `reset_at_epoch` field telling
70
+ the supervisor when to retry. Two cases with different policies:
71
+
72
+ - **Server-authoritative**: Anthropic's `rate_limit_event.resetsAt` is an
73
+ exact unblock time. We respect it verbatim — no backoff multipliers, no
74
+ caps applied. Server knows best.
75
+ - **Estimated**: For other classifications (`rate_limit_model`,
76
+ `api_transient_5xx`, `api_timeout`), the plugin emits a default guess
77
+ (`_BACK_OFF_DEFAULTS[bucket]`). Guesses can be wrong; if a round fires
78
+ the same bucket again after waiting our guess, we increase the wait
79
+ exponentially (`2^N`, capped at 32× and 30 minutes absolute).
80
+
81
+ This split keeps the policy simple: trust the server when it talks, and
82
+ back off our own estimates when they prove insufficient. It is **not**
83
+ N-σ novelty detection (which we reject — see the section above); it
84
+ codifies the specific scar of "fixed-per-bucket backoff insufficient
85
+ during sustained upstream outage."
86
+
87
+ Counter reset: any round that completes without firing a new
88
+ `transient_error_detected` event clears all bucket counters back to zero.
89
+
90
+ > **Example**: Gateway 2026-05-18 reported sustained 5xx + 529 from
91
+ > Anthropic where our previous fixed 60s wait was too short — the next
92
+ > round hit the same error, waited 60s again, and again. Rejected: adding
93
+ > a config knob (`[runtime] transient_backoff_strategy = "fixed" |
94
+ > "exp"`). Instead: upgraded the default policy to exp backoff
95
+ > transparently, since "the default was wrong" is the right framing — not
96
+ > "the operator should pick between two strategies."
97
+
60
98
  ### Not an analytics database
61
99
 
62
100
  No `--select`-able query language beyond simple peek selectors. No event
@@ -44,7 +44,7 @@ ALLOWED_SERVE_FROM = [
44
44
  "agent_runner.round_log",
45
45
  {"ROUND_CURRENT_LINK", "atomic_relink", "next_round_num", "prune_old_round_logs"},
46
46
  ),
47
- ("agent_runner._throttle", {"_check_throttle_state"}),
47
+ ("agent_runner._throttle", {"_check_throttle_state", "reset_counters"}),
48
48
  ("agent_runner.runner", {"_apply_back_off"}),
49
49
  ]
50
50
 
@@ -613,3 +613,19 @@ def test_given_non_claude_binary_when_after_round_then_no_event(tmp_path):
613
613
  with patch(f"{_MOD}.emit_agent_usage_recorded") as emit:
614
614
  ClaudeErrorDetector().after_round(ctx, result)
615
615
  emit.assert_not_called()
616
+
617
+
618
+ def test_given_claude_log_with_529_overloaded_when_classified_then_api_transient_5xx(tmp_path):
619
+ """Anthropic's "overloaded" status (529) should classify as api_transient_5xx,
620
+ not fall through as unknown error. Real scar — gateway hits this during
621
+ sustained Anthropic capacity issues.
622
+ """
623
+ from agent_runner.builtin_plugins.claude_rate_limit import _parse_claude_log
624
+
625
+ log = tmp_path / "round-1.log"
626
+ log.write_text(
627
+ '{"type":"result","is_error":true,"api_error_status":529,"result":"Overloaded"}\n',
628
+ encoding="utf-8",
629
+ )
630
+ parsed = _parse_claude_log(log)
631
+ assert parsed["transient_error"]["classification"] == "api_transient_5xx"