cli-agent-runner 0.1.26__tar.gz → 0.1.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/CHANGELOG.md +30 -0
  2. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/PKG-INFO +1 -1
  3. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/_emit.py +16 -0
  4. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/_version.py +2 -2
  5. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/api_types.py +0 -4
  6. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/builtin_plugins/claude_rate_limit.py +24 -6
  7. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/builtin_plugins/gemini.py +23 -2
  8. cli_agent_runner-0.1.28/docs/migrations/0.1.27.md +169 -0
  9. cli_agent_runner-0.1.28/docs/migrations/0.1.28.md +176 -0
  10. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/_test_helpers.py +8 -2
  11. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_types.py +9 -0
  12. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_claude_error_detector.py +147 -0
  13. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_gemini_plugin.py +94 -0
  14. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.codecov.yml +0 -0
  15. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.githooks/commit-msg +0 -0
  16. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  17. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  18. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  19. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  20. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.github/workflows/ci.yml +0 -0
  21. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.github/workflows/release.yml +0 -0
  22. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.gitignore +0 -0
  23. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/.vulture-whitelist.py +0 -0
  24. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/CODE_OF_CONDUCT.md +0 -0
  25. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/CONTRIBUTING.md +0 -0
  26. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/LICENSE +0 -0
  27. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/README.md +0 -0
  28. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/README.zh.md +0 -0
  29. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/SECURITY.md +0 -0
  30. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/__init__.py +0 -0
  31. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/_docgen.py +0 -0
  32. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/_registry.py +0 -0
  33. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/_substrate.py +0 -0
  34. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/_throttle.py +0 -0
  35. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/agent_runtime.py +0 -0
  36. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/api.py +0 -0
  37. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/builtin_plugins/__init__.py +0 -0
  38. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/builtin_plugins/_constants.py +0 -0
  39. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/__init__.py +0 -0
  40. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/__main__.py +0 -0
  41. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/common.py +0 -0
  42. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/init_cmd.py +0 -0
  43. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/install_cmd.py +0 -0
  44. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/monitor_cmd.py +0 -0
  45. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/peek_cmd.py +0 -0
  46. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/round_cmd.py +0 -0
  47. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/serve_cmd.py +0 -0
  48. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/service_cmd.py +0 -0
  49. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/cli/upgrade_cmd.py +0 -0
  50. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/config.py +0 -0
  51. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/context_store.py +0 -0
  52. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/defenses.py +0 -0
  53. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/detector_helpers.py +0 -0
  54. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/events.py +0 -0
  55. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/hooks.py +0 -0
  56. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/http_progress.py +0 -0
  57. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/lifecycle.py +0 -0
  58. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/metrics.py +0 -0
  59. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/monitor.py +0 -0
  60. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/presets/__init__.py +0 -0
  61. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/presets/aider.toml +0 -0
  62. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/presets/claude.toml +0 -0
  63. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/presets/gemini.toml +0 -0
  64. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/prompt_loader.py +0 -0
  65. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/round_log.py +0 -0
  66. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/round_view.py +0 -0
  67. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/runner.py +0 -0
  68. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/scaffold.py +0 -0
  69. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/service_unit.py +0 -0
  70. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/startup_check.py +0 -0
  71. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/agent_runner/vcs_state.py +0 -0
  72. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/build.sh +0 -0
  73. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/deploy/example-agent-runner.toml +0 -0
  74. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/deploy/launchd.plist.tmpl +0 -0
  75. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/deploy/run-loop.sh +0 -0
  76. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/deploy/systemd.service.tmpl +0 -0
  77. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/README.md +0 -0
  78. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/architecture.md +0 -0
  79. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/commands.md +0 -0
  80. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/configuration.md +0 -0
  81. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/events.md +0 -0
  82. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/long-running-agents.md +0 -0
  83. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/marketing/README.md +0 -0
  84. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/marketing/promo-cn.html +0 -0
  85. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.16.md +0 -0
  86. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.17.md +0 -0
  87. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.19.md +0 -0
  88. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.20.md +0 -0
  89. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.21.md +0 -0
  90. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.22.md +0 -0
  91. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.23.md +0 -0
  92. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.24.md +0 -0
  93. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.25.md +0 -0
  94. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/migrations/0.1.26.md +0 -0
  95. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/plugins.md +0 -0
  96. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/quickstart.md +0 -0
  97. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/recipes/aider.md +0 -0
  98. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/docs/runbook.md +0 -0
  99. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/pyproject.toml +0 -0
  100. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/__init__.py +0 -0
  101. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/conftest.py +0 -0
  102. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/contract/__init__.py +0 -0
  103. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/contract/test_public_api_surface.py +0 -0
  104. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/e2e/__init__.py +0 -0
  105. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/e2e/conftest.py +0 -0
  106. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/e2e/test_e2e_graceful_stop.py +0 -0
  107. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/e2e/test_e2e_install_systemd.py +0 -0
  108. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/e2e/test_e2e_monitor_remote.py +0 -0
  109. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/e2e/test_e2e_round_lifecycle.py +0 -0
  110. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/__init__.py +0 -0
  111. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_bounded_run.py +0 -0
  112. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_context_enricher_namespacing.py +0 -0
  113. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_fresh_eyes_signal.py +0 -0
  114. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_install_dry_run.py +0 -0
  115. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_monitor_seeded.py +0 -0
  116. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_plugin_detector_loaded.py +0 -0
  117. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_plugin_owned_paths.py +0 -0
  118. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_plugin_real_flow.py +0 -0
  119. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_run_one_round_with_fake_agent.py +0 -0
  120. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_scaffold_presets.py +0 -0
  121. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_serve_loop.py +0 -0
  122. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_substrate_fingerprint.py +0 -0
  123. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/integration/test_transient_error_backoff.py +0 -0
  124. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/__init__.py +0 -0
  125. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_architecture.py +0 -0
  126. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_atomic_write_enforced.py +0 -0
  127. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_catalogs.py +0 -0
  128. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_docs_generated.py +0 -0
  129. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_event_kind_registry.py +0 -0
  130. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_events_doc_contract.py +0 -0
  131. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_module_boundaries.py +0 -0
  132. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_module_sizes.py +0 -0
  133. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_no_ai_signatures.py +0 -0
  134. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_no_pytest_skip_on_parse_fail.py +0 -0
  135. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_peek_schema_version.py +0 -0
  136. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_repo_constants_patched_in_tests.py +0 -0
  137. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_round_result_stable.py +0 -0
  138. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/invariants/test_stash_uses_sha_not_index.py +0 -0
  139. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/literate/__init__.py +0 -0
  140. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/literate/parser.py +0 -0
  141. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/literate/test_parser.py +0 -0
  142. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/literate/test_quickstart.py +0 -0
  143. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/__init__.py +0 -0
  144. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_agent_runtime.py +0 -0
  145. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_assemble_prompt.py +0 -0
  146. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_events_stream.py +0 -0
  147. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_install.py +0 -0
  148. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_observation.py +0 -0
  149. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_read_round_num.py +0 -0
  150. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_resolve_phase.py +0 -0
  151. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_api_service.py +0 -0
  152. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_auto_stop_gating.py +0 -0
  153. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_cli.py +0 -0
  154. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_cli_common.py +0 -0
  155. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_cli_init_install.py +0 -0
  156. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_cli_monitor_http.py +0 -0
  157. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_cli_service_peek_monitor.py +0 -0
  158. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_cli_upgrade.py +0 -0
  159. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_config.py +0 -0
  160. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_config_fresh_eyes.py +0 -0
  161. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_config_max_rounds.py +0 -0
  162. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_config_rate_limit_action.py +0 -0
  163. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_config_stop_file.py +0 -0
  164. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_config_substrate_fingerprint_paths.py +0 -0
  165. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_config_transient_error_action.py +0 -0
  166. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_context_store.py +0 -0
  167. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_defenses.py +0 -0
  168. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_detector_helpers.py +0 -0
  169. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_detector_protocol.py +0 -0
  170. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_docgen.py +0 -0
  171. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_events.py +0 -0
  172. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_fresh_eyes_trigger.py +0 -0
  173. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_hook_failure_isolation.py +0 -0
  174. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_hooks.py +0 -0
  175. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_http_progress.py +0 -0
  176. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_init_entry_points.py +0 -0
  177. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_lifecycle.py +0 -0
  178. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_metrics.py +0 -0
  179. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_monitor_assembly.py +0 -0
  180. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_monitor_detect_rate_limit.py +0 -0
  181. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_monitor_detectors.py +0 -0
  182. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_monitor_remote.py +0 -0
  183. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_peek_argparse.py +0 -0
  184. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_presets.py +0 -0
  185. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_prompt_loader.py +0 -0
  186. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_round_log_helpers.py +0 -0
  187. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_round_view.py +0 -0
  188. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_runner.py +0 -0
  189. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_runner_throttle.py +0 -0
  190. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_scaffold.py +0 -0
  191. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_serve_cmd_bounded.py +0 -0
  192. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_serve_round_log.py +0 -0
  193. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_serve_sentinel.py +0 -0
  194. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_serve_startup_hooks.py +0 -0
  195. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_service_unit.py +0 -0
  196. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_startup_check.py +0 -0
  197. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_substrate.py +0 -0
  198. {cli_agent_runner-0.1.26 → cli_agent_runner-0.1.28}/tests/unit/test_vcs_state.py +0 -0
@@ -7,6 +7,36 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.1.28] - 2026-05-17
11
+
12
+ ### Added
13
+ - `agent_usage_recorded` event: new fields `cache_creation_tokens` (claude only, 0 for gemini),
14
+ `tool_call_count`, `phase`, `success`. Enables full cost reconciliation and phase/status
15
+ segmentation by consumers.
16
+
17
+ ### Changed
18
+ - gemini `models_breakdown` per-model entries no longer include raw `input` / `cached` keys;
19
+ canonical `input_tokens` / `cached_tokens` only. Consumers reading raw keys must migrate.
20
+
21
+ ### Removed
22
+ - `agent_runner.api_types.ThrottleState` dead alias (0.1.23 back-compat; deprecation window
23
+ passed; switch to `TransientErrorState`).
24
+
25
+ See `docs/migrations/0.1.28.md`.
26
+
27
+ ## [0.1.27] - 2026-05-17
28
+
29
+ ### Fixed
30
+ - claude plugin: rate_limit_event with rateLimitType=null no longer misclassified as account
31
+ 5h quota; falls through to api_error_status-based bucket (e.g. infra 429 → rate_limit_model).
32
+ Affects supervisors consuming transient_error_detected.
33
+
34
+ ### Added
35
+ - docs/migrations/0.1.27.md: supervisor usage guide for transient_error_detected event
36
+ (4-bucket dispatch table + back-off recipe).
37
+
38
+ See `docs/migrations/0.1.27.md`.
39
+
10
40
  ## [0.1.26] - 2026-05-17
11
41
 
12
42
  - Fix claude `agent_usage_recorded` `model` field (was always
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cli-agent-runner
3
- Version: 0.1.26
3
+ Version: 0.1.28
4
4
  Summary: Restart-on-exit supervisor for autonomous CLI agents
5
5
  Project-URL: Homepage, https://github.com/wan9yu/cli-agent-runner
6
6
  Project-URL: Documentation, https://github.com/wan9yu/cli-agent-runner#readme
@@ -216,6 +216,10 @@ def emit_agent_usage_recorded(
216
216
  cost_usd: float | None,
217
217
  duration_ms: int,
218
218
  models_breakdown: dict[str, dict[str, int]] | None = None,
219
+ cache_creation_tokens: int = 0,
220
+ tool_call_count: int = 0,
221
+ phase: str = "",
222
+ success: bool = True,
219
223
  ) -> None:
220
224
  """Emit per-round usage record from a CLI plugin.
221
225
 
@@ -229,6 +233,14 @@ def emit_agent_usage_recorded(
229
233
  (gemini has no cost field; claude exposes total_cost_usd).
230
234
  - ``models_breakdown``: only populated when a round used multiple models
231
235
  (gemini multi-model rounds). None for claude (always single-model).
236
+ - ``cache_creation_tokens``: claude only — ``usage.cache_creation_input_tokens``,
237
+ independent count from ``cached_tokens`` (cache_read). Billed at ~25% premium
238
+ over fresh input per Anthropic pricing. Gemini has no equivalent → 0.
239
+ - ``tool_call_count``: number of tool invocations the agent made in the round.
240
+ Claude: count of ``tool_use`` content blocks across all assistant events.
241
+ Gemini: ``stats.tool_calls``.
242
+ - ``phase``: phase label from HookContext (e.g. "planning"); empty string when None.
243
+ - ``success``: True when exit_code == 0 and not timed_out.
232
244
  """
233
245
  from agent_runner.events import AGENT_USAGE_RECORDED, emit
234
246
 
@@ -244,6 +256,10 @@ def emit_agent_usage_recorded(
244
256
  cost_usd=cost_usd,
245
257
  duration_ms=duration_ms,
246
258
  models_breakdown=models_breakdown,
259
+ cache_creation_tokens=cache_creation_tokens,
260
+ tool_call_count=tool_call_count,
261
+ phase=phase,
262
+ success=success,
247
263
  )
248
264
 
249
265
 
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.1.26'
22
- __version_tuple__ = version_tuple = (0, 1, 26)
21
+ __version__ = version = '0.1.28'
22
+ __version_tuple__ = version_tuple = (0, 1, 28)
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -139,10 +139,6 @@ class TransientErrorState:
139
139
  since_round: int
140
140
 
141
141
 
142
- # 0.1.23 back-compat alias; drop in 0.1.24
143
- ThrottleState = TransientErrorState
144
-
145
-
146
142
  @dataclass(frozen=True)
147
143
  class RoundResult:
148
144
  """Result of one ``run_one_round`` call.
@@ -64,7 +64,13 @@ class ClaudeErrorDetector:
64
64
  )
65
65
 
66
66
  if parsed.get("usage"):
67
- emit_agent_usage_recorded(ctx.log_dir, round_num=ctx.round_num, **parsed["usage"])
67
+ emit_agent_usage_recorded(
68
+ ctx.log_dir,
69
+ round_num=ctx.round_num,
70
+ phase=ctx.phase or "",
71
+ success=(result.exit_code == 0 and not result.timed_out),
72
+ **parsed["usage"],
73
+ )
68
74
 
69
75
 
70
76
  def _parse_claude_log(log_path: Path) -> dict[str, Any]:
@@ -77,6 +83,7 @@ def _parse_claude_log(log_path: Path) -> dict[str, Any]:
77
83
  rate_limit_info: dict | None = None
78
84
  result_event: dict | None = None
79
85
  assistant_model: str | None = None
86
+ tool_call_count = 0
80
87
  for line in tail:
81
88
  line = line.strip()
82
89
  if not line:
@@ -97,6 +104,11 @@ def _parse_claude_log(log_path: Path) -> dict[str, Any]:
97
104
  model_val = msg.get("model") if isinstance(msg, dict) else None
98
105
  if model_val:
99
106
  assistant_model = str(model_val)
107
+ content = msg.get("content", []) if isinstance(msg, dict) else []
108
+ if isinstance(content, list):
109
+ tool_call_count += sum(
110
+ 1 for c in content if isinstance(c, dict) and c.get("type") == "tool_use"
111
+ )
100
112
 
101
113
  out: dict[str, Any] = {}
102
114
 
@@ -105,7 +117,9 @@ def _parse_claude_log(log_path: Path) -> dict[str, Any]:
105
117
  out["transient_error"] = error_payload
106
118
 
107
119
  if result_event is not None:
108
- usage_payload = _extract_usage(result_event, model=assistant_model)
120
+ usage_payload = _extract_usage(
121
+ result_event, model=assistant_model, tool_call_count=tool_call_count
122
+ )
109
123
  if usage_payload is not None:
110
124
  out["usage"] = usage_payload
111
125
 
@@ -118,13 +132,15 @@ def _classify_transient_error(
118
132
  """Refactored from prior _scan_log_for_transient_error 0.1.23 logic; same shape, same
119
133
  priority (rate_limit_event.rejected > 429 > 5xx > 408).
120
134
  """
121
- if rate_limit_info is not None:
135
+ if rate_limit_info is not None and rate_limit_info.get("rateLimitType") == "five_hour":
122
136
  return {
123
137
  "classification": "rate_limit_account",
124
138
  "agent": "claude",
125
139
  "reset_at_epoch": int(rate_limit_info.get("resetsAt", time.time() + 300)),
126
140
  "raw": str((result_event or {}).get("result", ""))[:_RAW_CAP],
127
141
  }
142
+ # rate_limit_event with null/other rateLimitType falls through to status-based
143
+ # classification below.
128
144
  if result_event is None or result_event.get("is_error") is not True:
129
145
  return None
130
146
  status = result_event.get("api_error_status")
@@ -138,7 +154,7 @@ def _classify_transient_error(
138
154
  return None
139
155
 
140
156
 
141
- def _extract_usage(result_event: dict, *, model: str | None) -> dict | None:
157
+ def _extract_usage(result_event: dict, *, model: str | None, tool_call_count: int) -> dict | None:
142
158
  """Extract usage payload from claude result event.
143
159
 
144
160
  Returns None if no usage field present.
@@ -149,8 +165,8 @@ def _extract_usage(result_event: dict, *, model: str | None) -> dict | None:
149
165
  (they're independent counts). Earlier 0.1.24 simplify pass incorrectly
150
166
  subtracted cached from input; 0.1.26 reverts to the correct direct read.
151
167
  - ``cached_tokens`` is cache reads only (``cache_read_input_tokens``).
152
- Cache-creation is omitted from the unified schema; can be added in 0.1.27+
153
- if aggregation needs distinguishing.
168
+ - ``cache_creation_tokens`` is ``cache_creation_input_tokens`` (write cost,
169
+ billed at ~25% premium over fresh input per Anthropic pricing).
154
170
  - ``models_breakdown`` always None for claude (single-model per round);
155
171
  only populated by gemini multi-model rounds.
156
172
  - ``model`` from caller — ``_parse_claude_log`` tracks the latest
@@ -166,9 +182,11 @@ def _extract_usage(result_event: dict, *, model: str | None) -> dict | None:
166
182
  "input_tokens": int(usage.get("input_tokens", 0)),
167
183
  "output_tokens": int(usage.get("output_tokens", 0)),
168
184
  "cached_tokens": int(usage.get("cache_read_input_tokens", 0)),
185
+ "cache_creation_tokens": int(usage.get("cache_creation_input_tokens", 0)),
169
186
  "cost_usd": result_event.get("total_cost_usd"),
170
187
  "duration_ms": int(result_event.get("duration_ms", 0)),
171
188
  "models_breakdown": None,
189
+ "tool_call_count": tool_call_count,
172
190
  }
173
191
 
174
192
 
@@ -42,7 +42,13 @@ class GeminiErrorDetector:
42
42
  te = parsed["transient_error"]
43
43
  emit_transient_error_detected(ctx.log_dir, round_num=ctx.round_num, **te)
44
44
  if parsed.get("usage"):
45
- emit_agent_usage_recorded(ctx.log_dir, round_num=ctx.round_num, **parsed["usage"])
45
+ emit_agent_usage_recorded(
46
+ ctx.log_dir,
47
+ round_num=ctx.round_num,
48
+ phase=ctx.phase or "",
49
+ success=(result.exit_code == 0 and not result.timed_out),
50
+ **parsed["usage"],
51
+ )
46
52
 
47
53
 
48
54
  def _parse_gemini_log(log_path: Path) -> dict[str, Any]:
@@ -104,15 +110,30 @@ def _extract_usage(stats: dict[str, Any]) -> dict[str, Any]:
104
110
  primary_model = (
105
111
  max(models, key=lambda m: models[m].get("total_tokens", 0)) if models else "unknown"
106
112
  )
113
+ breakdown = (
114
+ {
115
+ name: {
116
+ "total_tokens": int(m.get("total_tokens", 0)),
117
+ "input_tokens": int(m.get("input_tokens", m.get("input", 0))),
118
+ "output_tokens": int(m.get("output_tokens", 0)),
119
+ "cached_tokens": int(m.get("cached", 0)),
120
+ }
121
+ for name, m in models.items()
122
+ }
123
+ if len(models) > 1
124
+ else None
125
+ )
107
126
  return {
108
127
  "agent": "gemini",
109
128
  "model": primary_model,
110
129
  "input_tokens": int(stats.get("input", 0)),
111
130
  "output_tokens": int(stats.get("output_tokens", 0)),
112
131
  "cached_tokens": int(stats.get("cached", 0)),
132
+ "cache_creation_tokens": 0, # gemini has no cache-creation concept
113
133
  "cost_usd": None, # gemini doesn't expose USD
114
134
  "duration_ms": int(stats.get("duration_ms", 0)),
115
- "models_breakdown": models if len(models) > 1 else None,
135
+ "models_breakdown": breakdown,
136
+ "tool_call_count": int(stats.get("tool_calls", 0)),
116
137
  }
117
138
 
118
139
 
@@ -0,0 +1,169 @@
1
+ # 0.1.27 — Rate-limit classifier fix + supervisor usage guide
2
+
3
+ ## What changed
4
+
5
+ The claude built-in plugin (`agent_runner.builtin_plugins.claude_rate_limit`) previously
6
+ misclassified any `rate_limit_event` with `status="rejected"` as `rate_limit_account`
7
+ (account-level 5-hour quota exhaustion), regardless of `rateLimitType`. As of 0.1.27, the
8
+ `rate_limit_account` branch requires `rateLimitType == "five_hour"`; other `rate_limit_event`
9
+ values (e.g. `rateLimitType: null` for claude.ai infrastructure throttling) fall through to
10
+ status-code-based classification.
11
+
12
+ Concretely: a claude.ai 429 with `rateLimitType: null` is now correctly emitted as
13
+ `transient_error_detected` with `classification: "rate_limit_model"` and a 60-second default
14
+ `reset_at_epoch`, instead of `rate_limit_account` with a synthetic 5-minute fallback epoch.
15
+
16
+ No event schema changes. No new event kinds. No new public API.
17
+
18
+ ## Supervisor usage — consuming `transient_error_detected`
19
+
20
+ External supervisors should subscribe to the `transient_error_detected` event family (added in
21
+ 0.1.23). The event carries a `classification` discriminator with one of 4 values:
22
+
23
+ | classification | Trigger | reset_at_epoch semantics | Suggested supervisor action |
24
+ |--------------------|------------------------------------------------------|-------------------------------------|-------------------------------------------|
25
+ | rate_limit_account | rate_limit_event.rateLimitType == "five_hour" | Server-provided `resetsAt` (exact) | Sleep until reset_at_epoch (multi-hour) |
26
+ | rate_limit_model | api_error_status == 429 (and not five_hour) | now + 60s default (no server hint) | Sleep until reset_at_epoch or exp-backoff |
27
+ | api_transient_5xx | api_error_status in {500, 502, 503, 504} | now + 60s default | Sleep until reset_at_epoch or exp-backoff |
28
+ | api_timeout | api_error_status == 408 | now + 30s default | Sleep until reset_at_epoch or exp-backoff |
29
+
30
+ ### Event payload shape
31
+
32
+ Each line in `events-YYYY-MM.jsonl` looks like:
33
+
34
+ ```json
35
+ {
36
+ "ts": "2026-05-17T02:13:44.123Z",
37
+ "event": "transient_error_detected",
38
+ "classification": "rate_limit_model",
39
+ "agent": "claude",
40
+ "reset_at_epoch": 1747450424,
41
+ "round_num": 7,
42
+ "raw": "API Error: Server is temporarily limiting requests (not your usage limit) · Rate limited"
43
+ }
44
+ ```
45
+
46
+ Fields:
47
+
48
+ - `ts` — ISO 8601 UTC timestamp of event emission.
49
+ - `event` — always `"transient_error_detected"`.
50
+ - `classification` — one of the 4 buckets above.
51
+ - `agent` — `"claude"` (gemini uses same schema via its own plugin).
52
+ - `reset_at_epoch` — Unix epoch seconds; supervisor sleeps until this time. For
53
+ `rate_limit_account` this is the server-provided exact unblock time; for all other buckets
54
+ it is `now + default_seconds` at the moment of emission.
55
+ - `round_num` — which agent round triggered the error.
56
+ - `raw` — first 200 chars of the result text (useful for logging/alerting).
57
+
58
+ ### Dispatch recipe (Python)
59
+
60
+ ```python
61
+ import time
62
+
63
+
64
+ def handle_transient_error(event: dict) -> None:
65
+ """React to a transient_error_detected event from agent-runner."""
66
+ bucket = event["classification"]
67
+ reset_at = event["reset_at_epoch"]
68
+ now = time.time()
69
+ wait_s = max(reset_at - now, 0)
70
+
71
+ if bucket == "rate_limit_account":
72
+ # Server-provided exact unblock time; respect it (multi-hour wait typical).
73
+ time.sleep(wait_s)
74
+ elif bucket == "rate_limit_model":
75
+ # Infra-level 429; 60s default. Apply your own exp-backoff curve if desired.
76
+ time.sleep(wait_s) # or: time.sleep(exp_backoff_with_cap(attempts, cap=300))
77
+ elif bucket == "api_transient_5xx":
78
+ # Transient server error; 60s default.
79
+ time.sleep(wait_s)
80
+ elif bucket == "api_timeout":
81
+ # Request timed out; 30s default.
82
+ time.sleep(wait_s)
83
+ else:
84
+ # Unknown future bucket — safe fallback.
85
+ time.sleep(max(wait_s, 30))
86
+ ```
87
+
88
+ ### Default back-off vs. your own curve
89
+
90
+ Agent-runner's defaults (`rate_limit_model` and `api_transient_5xx` → 60s, `api_timeout` → 30s)
91
+ are a conservative baseline — a flat one-shot sleep. Supervisors that track consecutive failures
92
+ may apply an exponential curve with a cap (e.g. 30s → 60s → 120s → 300s max) for
93
+ `rate_limit_model` and `api_transient_5xx`. For `rate_limit_account`, always respect
94
+ `reset_at_epoch` verbatim — the server provides the exact unblock time.
95
+
96
+ ## Migration from legacy `rate_limit_rejected` event
97
+
98
+ Consumers that still listen to `rate_limit_rejected` (added in 0.1.20) continue to receive it
99
+ for `rate_limit_account` events only. It is emitted as a back-compat dual-emit alongside
100
+ `transient_error_detected`. New consumers should subscribe to `transient_error_detected` for
101
+ full 4-bucket coverage; `rate_limit_rejected` only fires for the `five_hour` bucket and carries
102
+ no `classification` field.
103
+
104
+ ## Verification
105
+
106
+ Write the incident JSONL into a temporary file and call `_parse_claude_log` directly:
107
+
108
+ ```bash
109
+ mkdir -p /tmp/verify-0.1.27
110
+ cat > /tmp/verify-0.1.27/round-1.log <<'EOF'
111
+ {"type":"rate_limit_event","rate_limit_info":{"status":"rejected","rateLimitType":null}}
112
+ {"type":"assistant","message":{"model":"claude-opus-4-7","content":[{"type":"text","text":"API Error: rate limited"}]}}
113
+ {"type":"result","is_error":true,"api_error_status":429,"result":"API Error: rate limited","usage":{"input_tokens":100,"output_tokens":10,"cache_read_input_tokens":0},"duration_ms":1000,"total_cost_usd":0.01}
114
+ EOF
115
+
116
+ .venv/bin/python -c "
117
+ from pathlib import Path
118
+ from agent_runner.builtin_plugins.claude_rate_limit import _parse_claude_log
119
+ import json
120
+ print(json.dumps(_parse_claude_log(Path('/tmp/verify-0.1.27/round-1.log')), indent=2, default=str))
121
+ "
122
+ ```
123
+
124
+ Expected output:
125
+
126
+ ```json
127
+ {
128
+ "transient_error": {
129
+ "classification": "rate_limit_model",
130
+ "agent": "claude",
131
+ "reset_at_epoch": 1747450484,
132
+ "raw": "API Error: rate limited"
133
+ },
134
+ "usage": {
135
+ "agent": "claude",
136
+ "model": "claude-opus-4-7",
137
+ "input_tokens": 100,
138
+ "output_tokens": 10,
139
+ "cached_tokens": 10,
140
+ "cost_usd": 0.01,
141
+ "duration_ms": 1000
142
+ }
143
+ }
144
+ ```
145
+
146
+ `classification` must be `"rate_limit_model"` and `reset_at_epoch` must be approximately
147
+ `now + 60`. Clean up with `rm -rf /tmp/verify-0.1.27`.
148
+
149
+ ## Impact summary
150
+
151
+ Supervisors that dispatch semantically on `classification == "rate_limit_account"` (e.g. "this
152
+ is the 5-hour quota — wait until reset") will no longer trigger that path for infrastructure
153
+ 429s with `rateLimitType: null`. Instead, a `rate_limit_model` event fires with a 60-second
154
+ `reset_at_epoch`.
155
+
156
+ Supervisors that dispatch only on `reset_at_epoch` (ignoring classification) will see a shorter
157
+ wait (60s instead of ~300s) for infra 429s — a net improvement.
158
+
159
+ Supervisors subscribed to the legacy `rate_limit_rejected` event are unaffected: that event
160
+ only fires for genuine `rate_limit_account` (five_hour) events, which continue to work as
161
+ before.
162
+
163
+ ## What did NOT change
164
+
165
+ - `transient_error_detected` event schema — field names, field types unchanged.
166
+ - `_BACK_OFF_DEFAULTS` table — unchanged.
167
+ - Legacy `rate_limit_rejected` back-compat emission for `rate_limit_account` — unchanged.
168
+ - Other plugins (gemini) — no equivalent `rate_limit_event` semantics; not touched.
169
+ - Public API surface — no new functions, no new event kinds, no signature changes.
@@ -0,0 +1,176 @@
1
+ # Migration Guide — 0.1.28
2
+
3
+ ## What changed
4
+
5
+ Three areas changed in 0.1.28. First, `agent_usage_recorded` events now carry four additional
6
+ fields: `cache_creation_tokens`, `tool_call_count`, `phase`, and `success`. These are populated
7
+ by the built-in claude and gemini plugins; third-party plugins inheriting from the same
8
+ `emit_agent_usage_recorded` call get the new fields for free with safe defaults. Second, the
9
+ gemini plugin's `models_breakdown` per-model dict no longer passes through the raw `input` and
10
+ `cached` keys from the gemini JSONL; only canonical `input_tokens` and `cached_tokens` are
11
+ present. Consumers iterating `models_breakdown` entries need a one-line rename. Third, the
12
+ `ThrottleState` back-compat alias (introduced 0.1.23 when the class was renamed to
13
+ `TransientErrorState`) is removed; any import of `ThrottleState` now raises `ImportError`.
14
+
15
+ ---
16
+
17
+ ## New fields reference
18
+
19
+ | Field | Type | Semantics | claude value | gemini value |
20
+ |---|---|---|---|---|
21
+ | `cache_creation_tokens` | `int` | Tokens written into the prompt cache (billed at ~25 % premium over fresh input per Anthropic pricing). Independent count from `cached_tokens` (reads). | `usage.cache_creation_input_tokens` | `0` (no creation concept) |
22
+ | `tool_call_count` | `int` | Number of tool invocations the agent made in the round. | Count of `tool_use` content blocks across all assistant events in the round JSONL. | `stats.tool_calls` |
23
+ | `phase` | `str` | Phase label from `HookContext.phase`; empty string when the round has no phase. | `ctx.phase or ""` | `ctx.phase or ""` |
24
+ | `success` | `bool` | `True` when `exit_code == 0` and `timed_out` is `False`. | `result.exit_code == 0 and not result.timed_out` | same |
25
+
26
+ All four fields have safe defaults in `emit_agent_usage_recorded` (`0`, `0`, `""`, `True`),
27
+ so third-party plugins that call the function without the new kwargs continue to work without
28
+ modification.
29
+
30
+ ---
31
+
32
+ ## Updated event payload example
33
+
34
+ ### Claude variant
35
+
36
+ ```json
37
+ {
38
+ "kind": "agent_usage_recorded",
39
+ "agent": "claude",
40
+ "model": "claude-opus-4-7",
41
+ "round_num": 3,
42
+ "input_tokens": 4200,
43
+ "output_tokens": 312,
44
+ "cached_tokens": 18900,
45
+ "cache_creation_tokens": 12223,
46
+ "cost_usd": 0.0812,
47
+ "duration_ms": 14470,
48
+ "models_breakdown": null,
49
+ "tool_call_count": 2,
50
+ "phase": "dev",
51
+ "success": true
52
+ }
53
+ ```
54
+
55
+ ### Gemini variant
56
+
57
+ ```json
58
+ {
59
+ "kind": "agent_usage_recorded",
60
+ "agent": "gemini",
61
+ "model": "gemini-3-flash-preview",
62
+ "round_num": 1,
63
+ "input_tokens": 4614,
64
+ "output_tokens": 91,
65
+ "cached_tokens": 15119,
66
+ "cache_creation_tokens": 0,
67
+ "cost_usd": null,
68
+ "duration_ms": 5337,
69
+ "models_breakdown": null,
70
+ "tool_call_count": 1,
71
+ "phase": "",
72
+ "success": true
73
+ }
74
+ ```
75
+
76
+ ---
77
+
78
+ ## Cost reconciliation recipe
79
+
80
+ ```python
81
+ def total_token_cost(event: dict) -> float | None:
82
+ """Return estimated USD cost for a round.
83
+
84
+ For claude: use the recorded cost_usd (includes cache read + write billing).
85
+ For gemini: cost_usd is null; billing requires provider pricing sheet.
86
+ """
87
+ if event.get("cost_usd") is not None:
88
+ return event["cost_usd"]
89
+ # gemini: no USD field; caller must apply provider pricing
90
+ # approximate: input_tokens * input_rate + output_tokens * output_rate + cached_tokens * cache_rate
91
+ return None
92
+
93
+
94
+ def audit_cache_efficiency(event: dict) -> dict:
95
+ """Breakdown of cache hit vs creation vs fresh input for a claude round."""
96
+ return {
97
+ "fresh_input": event["input_tokens"],
98
+ "cache_read": event["cached_tokens"],
99
+ "cache_write": event["cache_creation_tokens"],
100
+ "total_throughput": (
101
+ event["input_tokens"] + event["cached_tokens"] + event["cache_creation_tokens"]
102
+ ),
103
+ }
104
+ ```
105
+
106
+ ---
107
+
108
+ ## `models_breakdown` migration
109
+
110
+ Pre-0.1.28, gemini multi-model rounds passed raw gemini stat keys directly into each
111
+ `models_breakdown` entry. Those entries contained both `input` (raw) and `input_tokens`
112
+ (canonical) with the same value, and `cached` (raw) alongside `cached_tokens` missing entirely.
113
+
114
+ 0.1.28 normalises to canonical keys only:
115
+
116
+ | Pre-0.1.28 key | Removed? | Canonical replacement |
117
+ |---|---|---|
118
+ | `input` | yes | `input_tokens` |
119
+ | `cached` | yes | `cached_tokens` |
120
+ | `input_tokens` | kept | — |
121
+ | `output_tokens` | kept | — |
122
+ | `total_tokens` | kept | — |
123
+
124
+ Migration for any consumer iterating breakdown entries:
125
+
126
+ ```python
127
+ # Before
128
+ entry["input"] # raw gemini field
129
+ entry["cached"] # raw gemini field
130
+
131
+ # After
132
+ entry["input_tokens"] # canonical
133
+ entry["cached_tokens"] # canonical
134
+ ```
135
+
136
+ ---
137
+
138
+ ## `ThrottleState` import migration
139
+
140
+ ```python
141
+ # Before (raises ImportError on 0.1.28+)
142
+ from agent_runner.api_types import ThrottleState
143
+
144
+ # After
145
+ from agent_runner.api_types import TransientErrorState
146
+ ```
147
+
148
+ `ThrottleState` was renamed to `TransientErrorState` in 0.1.23 when the `classification` field
149
+ was added. The alias was retained through 0.1.27. It is now removed.
150
+
151
+ ---
152
+
153
+ ## Verification recipe
154
+
155
+ Run one round and inspect the emitted event:
156
+
157
+ ```bash
158
+ cd /path/to/your/project
159
+ agent-runner serve --max-rounds 1
160
+
161
+ # Find today's events file
162
+ EVENTS=$(ls logs/events-*.jsonl | tail -1)
163
+
164
+ # Print the usage event with pretty JSON
165
+ grep '"kind":"agent_usage_recorded"' "$EVENTS" | python3 -m json.tool
166
+
167
+ # Confirm all new fields are present
168
+ grep '"kind":"agent_usage_recorded"' "$EVENTS" | python3 -c "
169
+ import json, sys
170
+ evt = json.loads(sys.stdin.read())
171
+ for field in ['cache_creation_tokens', 'tool_call_count', 'phase', 'success']:
172
+ assert field in evt, f'missing field: {field}'
173
+ print(f'{field}: {evt[field]}')
174
+ print('All new fields present.')
175
+ "
176
+ ```
@@ -109,7 +109,13 @@ def read_events_for_current_month(log_dir: Path) -> list[dict]:
109
109
  return [json.loads(line) for line in events_path.read_text().splitlines() if line.strip()]
110
110
 
111
111
 
112
- def make_hook_context(tmp_path: Path, *, agent_name: str = "claude", round_num: int = 1):
112
+ def make_hook_context(
113
+ tmp_path: Path,
114
+ *,
115
+ agent_name: str = "claude",
116
+ round_num: int = 1,
117
+ phase: str | None = None,
118
+ ):
113
119
  """Build a minimal HookContext for plugin testing.
114
120
 
115
121
  agent_log_path is populated to match where runner.py writes the
@@ -126,7 +132,7 @@ def make_hook_context(tmp_path: Path, *, agent_name: str = "claude", round_num:
126
132
  log_dir=tmp_path,
127
133
  project="testproj",
128
134
  round_num=round_num,
129
- phase=None,
135
+ phase=phase,
130
136
  agent_name=agent_name,
131
137
  agent_log_path=agent_log_path,
132
138
  )
@@ -114,3 +114,12 @@ def test_given_state_default_when_constructed_then_recent_hook_failures_empty()
114
114
  service=ServiceStatus(mode=ServiceMode.NONE, active=False),
115
115
  )
116
116
  assert state.recent_hook_failures == []
117
+
118
+
119
+ def test_throttle_state_removed() -> None:
120
+ """ThrottleState alias was deprecated 0.1.23, removed 0.1.28.
121
+
122
+ Consumers should switch to TransientErrorState.
123
+ """
124
+ with pytest.raises(ImportError):
125
+ from agent_runner.api_types import ThrottleState # noqa: F401