cli-agent-runner 0.1.40__tar.gz → 0.1.42__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/CHANGELOG.md +21 -0
  2. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/PKG-INFO +5 -5
  3. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/README.md +4 -4
  4. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/README.zh.md +5 -5
  5. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/_emit.py +23 -0
  6. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/_version.py +2 -2
  7. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/api.py +56 -1
  8. cli_agent_runner-0.1.42/agent_runner/builtin_plugins/codewhale.py +133 -0
  9. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/init_cmd.py +13 -1
  10. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/serve_cmd.py +26 -5
  11. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/defenses.py +12 -2
  12. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/events.py +2 -0
  13. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/monitor.py +0 -25
  14. cli_agent_runner-0.1.42/agent_runner/presets/codewhale.toml +30 -0
  15. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/runner.py +5 -2
  16. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/scaffold.py +2 -2
  17. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/vcs_state.py +51 -3
  18. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/architecture.md +7 -5
  19. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/commands.md +2 -2
  20. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/configuration.md +7 -4
  21. cli_agent_runner-0.1.42/docs/migrations/0.1.42.md +58 -0
  22. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/quickstart.md +1 -1
  23. cli_agent_runner-0.1.42/docs/recipes/codewhale.md +98 -0
  24. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/thesis.md +38 -8
  25. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/pyproject.toml +1 -0
  26. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/_test_helpers.py +8 -3
  27. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_bounded_run.py +10 -2
  28. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_fresh_eyes_signal.py +2 -0
  29. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_scaffold_presets.py +2 -2
  30. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_substrate_fingerprint.py +5 -1
  31. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_architecture.py +4 -1
  32. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_doc_claims_match_ssot.py +11 -0
  33. cli_agent_runner-0.1.42/tests/unit/test_codewhale_plugin.py +155 -0
  34. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_defenses.py +2 -2
  35. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_monitor_detectors.py +1 -18
  36. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_presets.py +30 -2
  37. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_runner.py +2 -2
  38. cli_agent_runner-0.1.42/tests/unit/test_serve_config_broken.py +33 -0
  39. cli_agent_runner-0.1.42/tests/unit/test_serve_crash_loop.py +128 -0
  40. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_vcs_state.py +69 -0
  41. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.codecov.yml +0 -0
  42. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  43. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  44. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  45. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  46. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.github/workflows/ci.yml +0 -0
  47. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.github/workflows/release.yml +0 -0
  48. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.gitignore +0 -0
  49. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/.vulture-whitelist.py +0 -0
  50. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/CODE_OF_CONDUCT.md +0 -0
  51. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/CONTRIBUTING.md +0 -0
  52. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/LICENSE +0 -0
  53. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/SECURITY.md +0 -0
  54. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/__init__.py +0 -0
  55. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/_docgen.py +0 -0
  56. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/_redact.py +0 -0
  57. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/_registry.py +0 -0
  58. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/_substrate.py +0 -0
  59. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/_throttle.py +0 -0
  60. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/agent_runtime.py +0 -0
  61. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/api_types.py +0 -0
  62. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/builtin_plugins/__init__.py +0 -0
  63. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/builtin_plugins/_constants.py +0 -0
  64. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/builtin_plugins/claude_rate_limit.py +0 -0
  65. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/builtin_plugins/gemini.py +0 -0
  66. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/__init__.py +0 -0
  67. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/__main__.py +0 -0
  68. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/common.py +0 -0
  69. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/events_cmd.py +0 -0
  70. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/install_cmd.py +0 -0
  71. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/monitor_cmd.py +0 -0
  72. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/peek_cmd.py +0 -0
  73. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/round_cmd.py +0 -0
  74. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/service_cmd.py +0 -0
  75. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/cli/upgrade_cmd.py +0 -0
  76. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/config.py +0 -0
  77. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/context_store.py +0 -0
  78. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/detector_helpers.py +0 -0
  79. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/hooks.py +0 -0
  80. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/http_progress.py +0 -0
  81. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/lifecycle.py +0 -0
  82. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/metrics.py +0 -0
  83. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/presets/__init__.py +0 -0
  84. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/presets/aider.toml +0 -0
  85. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/presets/claude.toml +0 -0
  86. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/presets/gemini.toml +0 -0
  87. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/prompt_loader.py +0 -0
  88. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/round_log.py +0 -0
  89. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/round_view.py +0 -0
  90. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/service_unit.py +0 -0
  91. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/agent_runner/startup_check.py +0 -0
  92. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/build.sh +0 -0
  93. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/deploy/example-agent-runner.toml +0 -0
  94. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/deploy/launchd.plist.tmpl +0 -0
  95. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/deploy/run-loop.sh +0 -0
  96. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/deploy/systemd.service.tmpl +0 -0
  97. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/README.md +0 -0
  98. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/events.md +0 -0
  99. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/long-running-agents.md +0 -0
  100. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/marketing/README.md +0 -0
  101. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/marketing/promo-cn.html +0 -0
  102. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.16.md +0 -0
  103. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.17.md +0 -0
  104. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.19.md +0 -0
  105. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.20.md +0 -0
  106. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.21.md +0 -0
  107. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.22.md +0 -0
  108. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.23.md +0 -0
  109. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.24.md +0 -0
  110. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.25.md +0 -0
  111. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.26.md +0 -0
  112. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.27.md +0 -0
  113. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.28.md +0 -0
  114. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.29.md +0 -0
  115. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.30.md +0 -0
  116. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.31.md +0 -0
  117. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.32.md +0 -0
  118. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.33.md +0 -0
  119. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.34.md +0 -0
  120. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.35.md +0 -0
  121. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.36.md +0 -0
  122. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.37.md +0 -0
  123. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.38.md +0 -0
  124. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.39.md +0 -0
  125. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/migrations/0.1.40.md +0 -0
  126. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/plugins.md +0 -0
  127. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/recipes/aider.md +0 -0
  128. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/docs/runbook.md +0 -0
  129. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/__init__.py +0 -0
  130. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/conftest.py +0 -0
  131. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/contract/__init__.py +0 -0
  132. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/contract/test_public_api_surface.py +0 -0
  133. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/e2e/__init__.py +0 -0
  134. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/e2e/conftest.py +0 -0
  135. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/e2e/test_e2e_graceful_stop.py +0 -0
  136. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/e2e/test_e2e_install_systemd.py +0 -0
  137. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/e2e/test_e2e_monitor_remote.py +0 -0
  138. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/e2e/test_e2e_round_lifecycle.py +0 -0
  139. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/fixtures/cli-real-output/claude-2.1.143-assistant-tool-use.jsonl +0 -0
  140. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/fixtures/cli-real-output/claude-2.1.143-result-event.jsonl +0 -0
  141. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/fixtures/cli-real-output/gemini-0.42.0-result-event.jsonl +0 -0
  142. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/__init__.py +0 -0
  143. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_context_enricher_namespacing.py +0 -0
  144. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_grace_kill_emission.py +0 -0
  145. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_install_dry_run.py +0 -0
  146. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_monitor_seeded.py +0 -0
  147. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_plugin_detector_loaded.py +0 -0
  148. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_plugin_owned_paths.py +0 -0
  149. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_plugin_real_flow.py +0 -0
  150. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_run_one_round_with_fake_agent.py +0 -0
  151. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_serve_loop.py +0 -0
  152. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/integration/test_transient_error_backoff.py +0 -0
  153. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/__init__.py +0 -0
  154. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_atomic_write_enforced.py +0 -0
  155. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_catalogs.py +0 -0
  156. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_classification_ssot.py +0 -0
  157. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_docs_generated.py +0 -0
  158. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_entry_points_resolve.py +0 -0
  159. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_event_kind_registry.py +0 -0
  160. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_event_kinds_ssot.py +0 -0
  161. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_events_doc_contract.py +0 -0
  162. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_layer_2_loop_size.py +0 -0
  163. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_module_boundaries.py +0 -0
  164. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_module_sizes.py +0 -0
  165. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_no_ai_signatures.py +0 -0
  166. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_no_pytest_skip_on_parse_fail.py +0 -0
  167. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_peek_schema_version.py +0 -0
  168. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_repo_constants_patched_in_tests.py +0 -0
  169. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_round_result_stable.py +0 -0
  170. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_stash_uses_sha_not_index.py +0 -0
  171. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/invariants/test_upstream_schema_canary.py +0 -0
  172. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/literate/__init__.py +0 -0
  173. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/literate/parser.py +0 -0
  174. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/literate/test_parser.py +0 -0
  175. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/literate/test_quickstart.py +0 -0
  176. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/__init__.py +0 -0
  177. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_agent_runtime.py +0 -0
  178. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_agent_runtime_grace.py +0 -0
  179. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_agent_runtime_progress.py +0 -0
  180. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_assemble_prompt.py +0 -0
  181. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_events_stream.py +0 -0
  182. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_install.py +0 -0
  183. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_observation.py +0 -0
  184. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_read_round_num.py +0 -0
  185. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_resolve_phase.py +0 -0
  186. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_service.py +0 -0
  187. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_api_types.py +0 -0
  188. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_auto_stop_gating.py +0 -0
  189. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_claude_error_detector.py +0 -0
  190. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_cli.py +0 -0
  191. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_cli_common.py +0 -0
  192. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_cli_init_install.py +0 -0
  193. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_cli_monitor_http.py +0 -0
  194. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_cli_service_peek_monitor.py +0 -0
  195. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_cli_upgrade.py +0 -0
  196. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_config.py +0 -0
  197. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_config_fresh_eyes.py +0 -0
  198. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_config_max_rounds.py +0 -0
  199. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_config_stop_file.py +0 -0
  200. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_config_substrate_fingerprint_paths.py +0 -0
  201. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_config_transient_error_action.py +0 -0
  202. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_context_store.py +0 -0
  203. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_detector_helpers.py +0 -0
  204. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_detector_protocol.py +0 -0
  205. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_docgen.py +0 -0
  206. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_events.py +0 -0
  207. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_events_cmd.py +0 -0
  208. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_fresh_eyes_trigger.py +0 -0
  209. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_gemini_plugin.py +0 -0
  210. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_hook_failure_isolation.py +0 -0
  211. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_hooks.py +0 -0
  212. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_http_progress.py +0 -0
  213. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_init_entry_points.py +0 -0
  214. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_lifecycle.py +0 -0
  215. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_metrics.py +0 -0
  216. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_monitor_assembly.py +0 -0
  217. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_monitor_detect_anomaly_repetitive.py +0 -0
  218. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_monitor_detect_rate_limit.py +0 -0
  219. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_monitor_detect_supervisor_stale.py +0 -0
  220. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_monitor_remote.py +0 -0
  221. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_peek_argparse.py +0 -0
  222. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_peek_select.py +0 -0
  223. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_prompt_loader.py +0 -0
  224. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_redact.py +0 -0
  225. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_round_log_helpers.py +0 -0
  226. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_round_view.py +0 -0
  227. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_runner_throttle.py +0 -0
  228. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_scaffold.py +0 -0
  229. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_serve_cmd_bounded.py +0 -0
  230. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_serve_round_log.py +0 -0
  231. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_serve_sentinel.py +0 -0
  232. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_serve_startup_hooks.py +0 -0
  233. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_service_unit.py +0 -0
  234. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_startup_check.py +0 -0
  235. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.42}/tests/unit/test_substrate.py +0 -0
@@ -5,6 +5,27 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.1.42] - 2026-06-25
9
+
10
+ ### Added
11
+ - `crash_loop` defense — serve stops after 5 consecutive *unknown* short crashes (non-zero exit, <60s, no classified transient), escalating the restart delay and recording the failure reason. Ends the respawn-forever crash loop; recoverable-slow failures (rate-limit / quota / 5xx / timeout) still ride the transient-error backoff unchanged.
12
+ - `config_broken` defense — a permanent startup-battery failure now halts serve (distinct no-retry exit code `78`) instead of respawning a broken config every round.
13
+
14
+ ### Fixed
15
+ - `vcs.dirty_action` no longer sweeps the runner's own `log_dir` bookkeeping when `log_dir` is inside `work_dir`: `auto_commit` excludes it from the commit (no more phantom `git_head` advance on a zero-work round) and `stash` excludes it from `git stash push -u` (logs no longer vanish). `.evolving/` and agent work are unaffected.
16
+
17
+ ### Removed
18
+ - The inert `smoke_fail_rate` monitor alert (could never fire — superseded by the always-on `config_broken` stop). Monitor now ships 11 detectors.
19
+
20
+ ### Docs
21
+ - `thesis.md`: the stuck-loop defense is described honestly as a notify-level, opt-in-to-auto-stop monitor detector (`anomaly_repetitive_active`), not a default hard-stop; fixed the `stuck_loop_detected` naming drift.
22
+
23
+ ## [0.1.41] - 2026-06-07
24
+
25
+ ### Added
26
+ - New `codewhale` preset — supervise Hmbown/CodeWhale (DeepSeek terminal agent) via `codewhale exec --auto --output-format stream-json`. `agent-runner init --preset codewhale`.
27
+ - New built-in `codewhale_error_detector` plugin — emits `agent_usage_recorded` (model + token counts) from codewhale's stream-json output. Transient-error classification is best-effort (mappable buckets only); auth failures surface via the existing monitor `oauth_fail` detector.
28
+
8
29
  ## [0.1.40] - 2026-05-31
9
30
 
10
31
  ### Security
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cli-agent-runner
3
- Version: 0.1.40
3
+ Version: 0.1.42
4
4
  Summary: Restart-on-exit supervisor for autonomous CLI agents
5
5
  Project-URL: Homepage, https://github.com/wan9yu/cli-agent-runner
6
6
  Project-URL: Documentation, https://github.com/wan9yu/cli-agent-runner#readme
@@ -49,7 +49,7 @@ full disks, runaway memory.
49
49
 
50
50
  ```
51
51
  ┌──────────────────────────────────────────┐
52
- │ Layer 3: The Witness (monitor) │ 12 detectors + auto-stop
52
+ │ Layer 3: The Witness (monitor) │ 11 detectors + auto-stop
53
53
  ├──────────────────────────────────────────┤
54
54
  │ Layer 2: The Loop (serve, ~120 LOC) │ signal-trapping restart loop
55
55
  ├──────────────────────────────────────────┤
@@ -86,14 +86,14 @@ Full walkthrough: [`docs/quickstart.md`](docs/quickstart.md).
86
86
  |---|---|
87
87
  | `init` / `install` / `uninstall` | `peek` — state snapshot |
88
88
  | `start` / `stop` / `kill` / `cancel` | `watch` — peek in a refresh loop |
89
- | `restart` / `status` | `monitor` — 12 detectors, alerts, auto-stop |
89
+ | `restart` / `status` | `monitor` — 11 detectors, alerts, auto-stop |
90
90
  | `round` / `serve` / `upgrade` | `events` — query / stream events.jsonl |
91
91
 
92
92
  Verb reference: [`docs/commands.md`](docs/commands.md).
93
93
 
94
94
  ## Defenses (built in)
95
95
 
96
- 11 named defenses, structured as data — see `agent-runner peek --select defenses`.
96
+ 12 named defenses, structured as data — see `agent-runner peek --select defenses`.
97
97
  Each carries the historical incident it codifies and the invariant test that
98
98
  guards it. Highlights:
99
99
 
@@ -106,7 +106,7 @@ guards it. Highlights:
106
106
 
107
107
  Full list and rationale: [`docs/architecture.md`](docs/architecture.md).
108
108
 
109
- ## Monitor: 12 detectors
109
+ ## Monitor: 11 detectors
110
110
 
111
111
  Notify only: `timeout_rate`, `hung`, `orphan_chain`, `disk_warning`,
112
112
  `mem_pressure`, `smoke_fail_rate`, `network_fail`, `rate_limit_active`,
@@ -12,7 +12,7 @@ full disks, runaway memory.
12
12
 
13
13
  ```
14
14
  ┌──────────────────────────────────────────┐
15
- │ Layer 3: The Witness (monitor) │ 12 detectors + auto-stop
15
+ │ Layer 3: The Witness (monitor) │ 11 detectors + auto-stop
16
16
  ├──────────────────────────────────────────┤
17
17
  │ Layer 2: The Loop (serve, ~120 LOC) │ signal-trapping restart loop
18
18
  ├──────────────────────────────────────────┤
@@ -49,14 +49,14 @@ Full walkthrough: [`docs/quickstart.md`](docs/quickstart.md).
49
49
  |---|---|
50
50
  | `init` / `install` / `uninstall` | `peek` — state snapshot |
51
51
  | `start` / `stop` / `kill` / `cancel` | `watch` — peek in a refresh loop |
52
- | `restart` / `status` | `monitor` — 12 detectors, alerts, auto-stop |
52
+ | `restart` / `status` | `monitor` — 11 detectors, alerts, auto-stop |
53
53
  | `round` / `serve` / `upgrade` | `events` — query / stream events.jsonl |
54
54
 
55
55
  Verb reference: [`docs/commands.md`](docs/commands.md).
56
56
 
57
57
  ## Defenses (built in)
58
58
 
59
- 11 named defenses, structured as data — see `agent-runner peek --select defenses`.
59
+ 12 named defenses, structured as data — see `agent-runner peek --select defenses`.
60
60
  Each carries the historical incident it codifies and the invariant test that
61
61
  guards it. Highlights:
62
62
 
@@ -69,7 +69,7 @@ guards it. Highlights:
69
69
 
70
70
  Full list and rationale: [`docs/architecture.md`](docs/architecture.md).
71
71
 
72
- ## Monitor: 12 detectors
72
+ ## Monitor: 11 detectors
73
73
 
74
74
  Notify only: `timeout_rate`, `hung`, `orphan_chain`, `disk_warning`,
75
75
  `mem_pressure`, `smoke_fail_rate`, `network_fail`, `rate_limit_active`,
@@ -6,7 +6,7 @@
6
6
 
7
7
  把任意 CLI agent(Claude Code、自研 agent、任何长跑命令)包装成可被
8
8
  systemd / launchd 拉起、能被远程观测的服务。**每轮跑完进程退出**,外层
9
- supervisor 重启 —— 这是核心模式。中间穿插 11 条防御,避开 production 上
9
+ supervisor 重启 —— 这是核心模式。中间穿插 12 条防御,避开 production 上
10
10
  最容易翻车的几条路:
11
11
 
12
12
  - 轮卡死、Tool 调用空转 → 硬墙 timeout
@@ -20,7 +20,7 @@ supervisor 重启 —— 这是核心模式。中间穿插 11 条防御,避开
20
20
 
21
21
  ```
22
22
  ┌──────────────────────────────────────────┐
23
- │ Layer 3:Witness(monitor) │ 12 个检测器 + 自动停服
23
+ │ Layer 3:Witness(monitor) │ 11 个检测器 + 自动停服
24
24
  ├──────────────────────────────────────────┤
25
25
  │ Layer 2:Loop(serve,~120 LOC 薄壳) │ 捕获信号,循环拉起 round
26
26
  ├──────────────────────────────────────────┤
@@ -63,7 +63,7 @@ agent-runner monitor # 实时异常检测,OAuth/磁盘 critical
63
63
  |---|---|
64
64
  | `init` / `install` / `uninstall` | `peek` —— 项目状态快照 |
65
65
  | `start` / `stop` / `kill` / `cancel` | `watch` —— peek 在刷新循环里 |
66
- | `restart` / `status` | `monitor` —— 12 个检测器 + 告警 + 自动停服 |
66
+ | `restart` / `status` | `monitor` —— 11 个检测器 + 告警 + 自动停服 |
67
67
  | `round` / `serve` / `upgrade` | `events` —— 查询 / 流式订阅 events.jsonl |
68
68
 
69
69
  **停服三动词**有清晰的语义分层:
@@ -73,7 +73,7 @@ agent-runner monitor # 实时异常检测,OAuth/磁盘 critical
73
73
 
74
74
  动词参考:[`docs/commands.md`](docs/commands.md)。
75
75
 
76
- ## 内置防御(11 条)
76
+ ## 内置防御(12 条)
77
77
 
78
78
  防御以数据形式定义在 `agent_runner/defenses.py`,可通过
79
79
  `agent-runner peek --select defenses` 直接拿到。每条防御自带:
@@ -95,7 +95,7 @@ agent-runner monitor # 实时异常检测,OAuth/磁盘 critical
95
95
 
96
96
  完整列表 + 历史出处:[`docs/architecture.md`](docs/architecture.md)。
97
97
 
98
- ## Monitor:12 个检测器
98
+ ## Monitor:11 个检测器
99
99
 
100
100
  **只告警**(warning 级,服务继续跑):
101
101
  `timeout_rate` / `hung` / `orphan_chain` / `disk_warning` /
@@ -45,6 +45,29 @@ def emit_max_rounds_reached(log_dir: Path, *, rounds_completed: int, max_rounds:
45
45
  emit(log_dir, MAX_ROUNDS_REACHED, rounds_completed=rounds_completed, max_rounds=max_rounds)
46
46
 
47
47
 
48
+ def emit_config_broken(log_dir: Path, *, reason: str) -> None:
49
+ """Emit config_broken (serve stopped on a permanent startup-battery failure)."""
50
+ from agent_runner.events import CONFIG_BROKEN, emit
51
+
52
+ emit(log_dir, CONFIG_BROKEN, reason=reason)
53
+
54
+
55
+ def emit_crash_loop(log_dir: Path, *, consecutive: int, exit_code: int, log_path: Path) -> None:
56
+ """Emit crash_loop (serve stopped after consecutive unknown short crashes).
57
+
58
+ Captures the failure reason — a redacted tail of the round log — so a
59
+ recurring unknown crash can later be classified into a transient bucket.
60
+ """
61
+ from agent_runner._redact import redact_secrets
62
+ from agent_runner.events import CRASH_LOOP, emit
63
+
64
+ try:
65
+ reason = redact_secrets(log_path.read_text(errors="replace")[-2000:])
66
+ except OSError:
67
+ reason = ""
68
+ emit(log_dir, CRASH_LOOP, consecutive=consecutive, exit_code=exit_code, reason=reason)
69
+
70
+
48
71
  def emit_stop_file_detected(
49
72
  log_dir: Path, *, stop_file: Path, content: str, rounds_completed: int
50
73
  ) -> None:
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.1.40'
22
- __version_tuple__ = version_tuple = (0, 1, 40)
21
+ __version__ = version = '0.1.42'
22
+ __version_tuple__ = version_tuple = (0, 1, 42)
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -18,7 +18,7 @@ import sysconfig
18
18
  import time
19
19
  from collections.abc import Iterator
20
20
  from pathlib import Path
21
- from typing import Any
21
+ from typing import Any, Literal
22
22
 
23
23
  from agent_runner import events, lifecycle
24
24
  from agent_runner.api_types import (
@@ -45,6 +45,59 @@ from agent_runner.service_unit import (
45
45
  serve_unit_filename,
46
46
  )
47
47
 
48
+ # Exit code for a permanent (no-retry) startup-battery failure. A broken config
49
+ # does not self-heal between rounds, so serve STOPS rather than respawning it
50
+ # forever. 78 = EX_CONFIG (sysexits) — avoids argparse's 2 and the generic 1.
51
+ # Lives here (not runner.py) so serve_cmd can import it from the sanctioned api
52
+ # facade without coupling to runner (runner imports api, not the reverse).
53
+ PERMANENT_CONFIG_EXIT = 78
54
+
55
+ # Crash-loop circuit breaker (b12). The serve loop escalates the restart delay
56
+ # on consecutive UNKNOWN short crashes (non-zero exit, short duration, no
57
+ # classified transient) and STOPS after CRASH_LOOP_THRESHOLD of them — the Run 6
58
+ # ~100-empty-rounds scar. Recoverable-slow failures (rate limit / 5h quota / 5xx
59
+ # / timeout) are already handled by the transient-error throttle and never reach
60
+ # this path. A clean (exit 0), long, or classified-transient round resets the run.
61
+ CRASH_LOOP_THRESHOLD = 5
62
+ CRASH_LOOP_SHORT_EXIT_S = 60 # mirrors monitor.SHORT_EXIT_THRESHOLD_S
63
+ CRASH_LOOP_MAX_DELAY_S = 1800 # cap the escalating restart delay (30 min)
64
+
65
+
66
+ def post_round_decision(
67
+ *,
68
+ returncode: int,
69
+ duration_s: float,
70
+ throttle_active: bool,
71
+ consecutive: int,
72
+ restart_delay_s: int,
73
+ ) -> tuple[Literal["config_broken", "crash_loop", "continue"], int, int]:
74
+ """Restart policy after one round — keeps the serve loop a thin dispatcher.
75
+
76
+ Returns ``(action, delay_s, consecutive)`` where action is:
77
+ - ``"config_broken"`` — permanent startup failure (b18): stop.
78
+ - ``"crash_loop"`` — CRASH_LOOP_THRESHOLD consecutive unknown short crashes
79
+ (b12): stop. An unknown short crash is a non-zero, fast exit with no
80
+ classified transient (rate-limit/5xx/timeout are handled by the throttle).
81
+ - ``"continue"`` — sleep ``delay_s`` then run the next round.
82
+
83
+ A clean (exit 0), long, or transient round resets ``consecutive`` to 0; an
84
+ unknown short crash escalates the delay (restart × 2ⁿ, capped) until the stop.
85
+ """
86
+ if returncode == PERMANENT_CONFIG_EXIT:
87
+ return ("config_broken", 0, consecutive)
88
+ unknown_short_crash = (
89
+ returncode != 0 and duration_s < CRASH_LOOP_SHORT_EXIT_S and not throttle_active
90
+ )
91
+ if unknown_short_crash:
92
+ consecutive += 1
93
+ if consecutive >= CRASH_LOOP_THRESHOLD:
94
+ return ("crash_loop", 0, consecutive)
95
+ delay = min(restart_delay_s * 2**consecutive, CRASH_LOOP_MAX_DELAY_S)
96
+ return ("continue", delay, consecutive)
97
+ delay = restart_delay_s if returncode == 0 else restart_delay_s * 2
98
+ return ("continue", delay, 0)
99
+
100
+
48
101
  _PROJECT_NAME_RE = re.compile(r"^[A-Za-z0-9._-]+$")
49
102
 
50
103
  _LINGER_HINT = (
@@ -730,6 +783,8 @@ def check_self_terminated_sentinel(log_dir: Path) -> bool:
730
783
  from agent_runner._emit import ( # noqa: E402,F401 — intentional bottom re-export
731
784
  emit_agent_usage_recorded,
732
785
  emit_anomaly_repetitive_tool,
786
+ emit_config_broken,
787
+ emit_crash_loop,
733
788
  emit_fresh_eyes_round_triggered,
734
789
  emit_max_rounds_reached,
735
790
  emit_rate_limit_stop,
@@ -0,0 +1,133 @@
1
+ """Built-in post_round_hook for codewhale CLI: usage events + transient classifier.
2
+
3
+ Third built-in plugin (after claude, gemini). Parses codewhale's `exec
4
+ --output-format stream-json` NDJSON stdout tail; emits agent_usage_recorded
5
+ from the terminal metadata record. Transient-error classification is
6
+ best-effort and emits ONLY when an error maps to an existing bucket (like
7
+ gemini): codewhale's exec stdout surfaces a {"type":"error"} record, but the
8
+ only observed case so far is auth failure (oauth_fail territory, not a
9
+ transient bucket), so nothing maps yet -- usage-only today. 429/5xx mapping
10
+ is added when a real rate-limit sample is captured.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import time
17
+ from collections import deque
18
+ from pathlib import Path
19
+ from typing import Any
20
+
21
+ from agent_runner.api import (
22
+ emit_agent_usage_recorded,
23
+ emit_transient_error_detected,
24
+ )
25
+ from agent_runner.builtin_plugins._constants import (
26
+ _5XX_STATUSES,
27
+ _BACK_OFF_DEFAULTS,
28
+ _RAW_CAP,
29
+ _TAIL_LINES,
30
+ )
31
+ from agent_runner.hooks import HookContext, register_post_round_hook
32
+
33
+
34
+ class CodewhaleErrorDetector:
35
+ """Parse codewhale round log tail; emit usage + transient_error_detected events."""
36
+
37
+ name = "codewhale_error_detector"
38
+
39
+ def after_round(self, ctx: HookContext, result: Any) -> None:
40
+ if ctx.agent_binary != "codewhale":
41
+ return
42
+ log_path = ctx.agent_log_path
43
+ if log_path is None or not log_path.exists():
44
+ return
45
+ parsed = _parse_codewhale_log(log_path)
46
+ if parsed.get("transient_error"):
47
+ emit_transient_error_detected(
48
+ ctx.log_dir, round_num=ctx.round_num, **parsed["transient_error"]
49
+ )
50
+ if parsed.get("usage"):
51
+ emit_agent_usage_recorded(
52
+ ctx.log_dir,
53
+ round_num=ctx.round_num,
54
+ phase=ctx.phase or "",
55
+ success=(result.exit_code == 0 and not result.timed_out),
56
+ **parsed["usage"],
57
+ )
58
+
59
+
60
+ def _parse_codewhale_log(log_path: Path) -> dict[str, Any]:
61
+ """Scan last _TAIL_LINES of codewhale NDJSON; extract usage from the metadata
62
+ record; classify any {"type":"error"} that maps to a transient bucket.
63
+
64
+ Tolerates non-JSON lines (codewhale prefixes some stdout with terminal
65
+ escapes) via per-line try/except.
66
+ """
67
+ with log_path.open("r", encoding="utf-8", errors="replace") as f:
68
+ tail = deque(f, maxlen=_TAIL_LINES)
69
+ metadata: dict | None = None
70
+ error_event: dict | None = None
71
+ for line in tail:
72
+ line = line.strip()
73
+ if not line:
74
+ continue
75
+ try:
76
+ event = json.loads(line)
77
+ except json.JSONDecodeError:
78
+ continue
79
+ if not isinstance(event, dict):
80
+ continue
81
+ etype = event.get("type")
82
+ if etype == "metadata":
83
+ metadata = event.get("meta") or {}
84
+ elif etype == "error":
85
+ error_event = event
86
+
87
+ out: dict[str, Any] = {}
88
+
89
+ if metadata:
90
+ out["usage"] = {
91
+ "agent": "codewhale",
92
+ "model": str(metadata.get("model", "unknown")),
93
+ "input_tokens": int(metadata.get("input_tokens", 0)),
94
+ "output_tokens": int(metadata.get("output_tokens", 0)),
95
+ "cached_tokens": 0, # codewhale exec stdout exposes no cache counts
96
+ "cost_usd": None, # codewhale exec stdout exposes no USD
97
+ "duration_ms": 0, # not in exec metadata
98
+ }
99
+
100
+ if error_event is not None:
101
+ classification = _classify_codewhale_error(error_event)
102
+ if classification:
103
+ duration = _BACK_OFF_DEFAULTS[classification]
104
+ out["transient_error"] = {
105
+ "classification": classification,
106
+ "agent": "codewhale",
107
+ "reset_at_epoch": int(time.time() + duration),
108
+ "raw": str(error_event.get("error", "error"))[:_RAW_CAP],
109
+ }
110
+ return out
111
+
112
+
113
+ def _classify_codewhale_error(error_event: dict[str, Any]) -> str | None:
114
+ """Map a codewhale {"type":"error"} record to a transient bucket, or None.
115
+
116
+ None means 'not a transient error' (e.g. auth failure -> handled by the
117
+ monitor's oauth_fail log-scan, not the transient classifier). codewhale's
118
+ error record currently carries only a free-text 'error' string with no
119
+ status code; until a real rate-limit/5xx sample is captured we cannot map
120
+ to rate_limit_model / api_transient_5xx / api_timeout, so we return None.
121
+ A future revision keys on a numeric status field once observed.
122
+ """
123
+ code = error_event.get("code") or error_event.get("status_code")
124
+ if code == 429:
125
+ return "rate_limit_model"
126
+ if code in _5XX_STATUSES:
127
+ return "api_transient_5xx"
128
+ if code == 408:
129
+ return "api_timeout"
130
+ return None
131
+
132
+
133
+ register_post_round_hook(CodewhaleErrorDetector())
@@ -2,15 +2,27 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import importlib.resources
6
+
5
7
  from agent_runner import api
6
8
  from agent_runner.cli.common import emit, fail, work_dir_from_args
7
9
 
8
10
 
11
+ def _preset_names() -> list[str]:
12
+ """Discover scaffold presets from the shipped ``agent_runner/presets/*.toml``.
13
+
14
+ Derived (not hardcoded) so adding a preset is a single new .toml file — the
15
+ ``--preset`` choices and validation track the filesystem automatically.
16
+ """
17
+ presets = importlib.resources.files("agent_runner.presets")
18
+ return sorted(p.name[:-5] for p in presets.iterdir() if p.name.endswith(".toml"))
19
+
20
+
9
21
  def add_parser(sub, parent) -> None:
10
22
  p = sub.add_parser("init", parents=[parent], help="Scaffold agent-runner project files")
11
23
  p.add_argument(
12
24
  "--preset",
13
- choices=["claude", "aider", "gemini"],
25
+ choices=_preset_names(),
14
26
  default="claude",
15
27
  help="Which agent CLI preset to scaffold (default: claude)",
16
28
  )
@@ -23,12 +23,15 @@ from agent_runner._throttle import _check_throttle_state
23
23
  from agent_runner._throttle import reset_counters as _reset_counters
24
24
  from agent_runner.api import (
25
25
  check_self_terminated_sentinel,
26
+ emit_config_broken,
27
+ emit_crash_loop,
26
28
  emit_fresh_eyes_round_triggered,
27
29
  emit_max_rounds_reached,
28
30
  emit_rate_limit_stop,
29
31
  emit_round_substrate_after,
30
32
  emit_round_substrate_before,
31
33
  emit_stop_file_detected,
34
+ post_round_decision,
32
35
  )
33
36
  from agent_runner.cli.common import cfg_from_args
34
37
  from agent_runner.hooks import run_serve_startup_hooks
@@ -135,6 +138,7 @@ def cmd(args) -> int:
135
138
  stop_file = cfg.runtime.stop_file # cache: same pattern as effective_max_rounds
136
139
  work_dir = cfg.runtime.work_dir
137
140
  rounds_completed = 0
141
+ consecutive_crashes = 0 # b12: consecutive UNKNOWN short crashes (crash-loop breaker)
138
142
 
139
143
  try:
140
144
  pid_file.write(os.getpid())
@@ -197,6 +201,7 @@ def cmd(args) -> int:
197
201
  every_n=cfg.runtime.fresh_eyes_every_n,
198
202
  )
199
203
  round_log_path = log_dir / f"round-{round_num}.log"
204
+ round_started = time.monotonic()
200
205
  with round_log_path.open("w") as f:
201
206
  r = subprocess.run(
202
207
  [
@@ -211,6 +216,7 @@ def cmd(args) -> int:
211
216
  stdout=f,
212
217
  stderr=subprocess.STDOUT,
213
218
  )
219
+ round_duration_s = time.monotonic() - round_started
214
220
  atomic_relink(log_dir / ROUND_CURRENT_LINK, round_log_path)
215
221
  git_head_after = compute_git_head(work_dir)
216
222
  paths_hash_after = compute_paths_hash(work_dir, cfg.runtime.substrate_fingerprint_paths)
@@ -221,13 +227,28 @@ def cmd(args) -> int:
221
227
  paths_hash=paths_hash_after,
222
228
  )
223
229
  rounds_completed += 1
230
+ # Restart policy (config_broken / crash_loop / continue) lives in the
231
+ # tested api.post_round_decision helper so this loop stays thin.
232
+ action, delay, consecutive_crashes = post_round_decision(
233
+ returncode=r.returncode,
234
+ duration_s=round_duration_s,
235
+ throttle_active=_check_throttle_state(log_dir) is not None,
236
+ consecutive=consecutive_crashes,
237
+ restart_delay_s=cfg.runtime.restart_delay_s,
238
+ )
239
+ if action == "config_broken":
240
+ emit_config_broken(log_dir, reason="startup battery permanent failure")
241
+ break
242
+ if action == "crash_loop":
243
+ emit_crash_loop(
244
+ log_dir,
245
+ consecutive=consecutive_crashes,
246
+ exit_code=r.returncode,
247
+ log_path=round_log_path,
248
+ )
249
+ break
224
250
  if args.once or stop["requested"]:
225
251
  break
226
- delay = (
227
- cfg.runtime.restart_delay_s
228
- if r.returncode == 0
229
- else cfg.runtime.restart_delay_s * 2
230
- )
231
252
  time.sleep(delay)
232
253
  finally:
233
254
  pid_file.unlink()
@@ -83,8 +83,18 @@ def catalog(cfg: Config) -> list[Defense]:
83
83
  Defense(
84
84
  name="startup_smoke_check",
85
85
  value="6 checks (config / log_dir / agent_cli / git / prompt_file / prompt_smoke)",
86
- codifies="R721 + #446 — _common.md frontmatter caused 4h/123-round silent burn",
87
- guarded_by=None,
86
+ codifies=(
87
+ "R721 + #446 — _common.md frontmatter caused 4h/123-round silent burn; "
88
+ "now halts serve (config_broken) instead of respawning a broken config"
89
+ ),
90
+ guarded_by=Path("tests/unit/test_serve_config_broken.py"),
91
+ current_state="active",
92
+ ),
93
+ Defense(
94
+ name="crash_loop_breaker",
95
+ value="stop after 5 consecutive short crashes; exp-escalating delay",
96
+ codifies="Run 6 — crashing agent respawned ~100 empty rounds at a fixed 2x delay",
97
+ guarded_by=Path("tests/unit/test_serve_crash_loop.py"),
88
98
  current_state="active",
89
99
  ),
90
100
  Defense(
@@ -32,6 +32,8 @@ ANOMALY_REPETITIVE_TOOL = "anomaly_repetitive_tool"
32
32
  AGENT_NETWORK_BLIP = "agent_network_blip"
33
33
  AGENT_SPAWN = "agent_spawn"
34
34
  AGENT_USAGE_RECORDED = "agent_usage_recorded"
35
+ CONFIG_BROKEN = "config_broken"
36
+ CRASH_LOOP = "crash_loop"
35
37
  DIRTY_COMMIT_FAILED = "dirty_commit_failed"
36
38
  DIRTY_DETECTED = "dirty_detected"
37
39
  FRESH_EYES_ROUND_TRIGGERED = "fresh_eyes_round_triggered"
@@ -49,7 +49,6 @@ KNOWN_ALERT_KINDS: frozenset[str] = frozenset(
49
49
  "disk_warning",
50
50
  "disk_critical",
51
51
  "mem_pressure",
52
- "smoke_fail_rate",
53
52
  "oauth_fail",
54
53
  "network_fail",
55
54
  "rate_limit_active",
@@ -265,29 +264,6 @@ def detect_mem_pressure(metrics: list[dict[str, Any]], *, threshold_mb: int = 20
265
264
  )
266
265
 
267
266
 
268
- def detect_smoke_fail_rate(
269
- events: list[dict[str, Any]], *, window: int = 10, threshold: float = 0.1
270
- ) -> Alert | None:
271
- ends = [e for e in events if e.get("event") == "round_end"]
272
- if len(ends) < window:
273
- return None
274
- recent_round_nums = [e.get("round_num") for e in ends[-window:]]
275
- fails = sum(
276
- 1
277
- for e in events
278
- if e.get("event") == "smoke_check_failed" and e.get("round_num") in recent_round_nums
279
- )
280
- rate = fails / window
281
- if rate < threshold:
282
- return None
283
- return _alert(
284
- "smoke_fail_rate",
285
- "warning",
286
- f"{fails}/{window} recent rounds had smoke_check_failed",
287
- {"rate": rate, "threshold": threshold, "hint": "Inspect events.jsonl for failure reasons"},
288
- )
289
-
290
-
291
267
  def detect_oauth_fail(
292
268
  events: list[dict[str, Any]],
293
269
  log_tails: dict[int, str],
@@ -603,7 +579,6 @@ def run_all_detectors(
603
579
  ),
604
580
  detect_disk_critical(metrics, threshold_pct=disk_critical_pct),
605
581
  detect_mem_pressure(metrics, threshold_mb=mem_avail_min_mb),
606
- detect_smoke_fail_rate(events),
607
582
  detect_oauth_fail(events, log_tails, patterns=compiled_auth_pats, hint=auth_fail_hint),
608
583
  detect_network_fail(events, log_tails),
609
584
  detect_rate_limit_active(events, now=now.timestamp()),
@@ -0,0 +1,30 @@
1
+ # agent-runner.toml — generated by `agent-runner init --preset codewhale`.
2
+ #
3
+ # Prereqs:
4
+ # - codewhale installed (ships `codewhale` + `codewhale-tui`; both on PATH):
5
+ # npm i -g codewhale (or cargo/brew per CodeWhale docs)
6
+ # - DEEPSEEK_API_KEY set on the supervisor host (or a key saved via
7
+ # `codewhale auth set`; resolution order is config > keyring > env)
8
+ # - work_dir is a git repo
9
+
10
+ [agent]
11
+ command = ["codewhale", "exec", "--auto", "--output-format", "stream-json"]
12
+ prompt_arg_template = ["{prompt}"]
13
+ name = "codewhale"
14
+
15
+ [runtime]
16
+ work_dir = "."
17
+ log_dir = "~/.agent-runner/{project}/logs"
18
+ round_timeout_s = 1800
19
+ restart_delay_s = 3
20
+
21
+ [prompt]
22
+ file = "./prompts/main.md"
23
+ inject_context = true
24
+
25
+ [vcs]
26
+ dirty_action = "stash"
27
+ stash_idempotency_s = 5
28
+
29
+ [monitor]
30
+ auth_fail_hint = "Run `codewhale auth status` to inspect provider/credentials, or set DEEPSEEK_API_KEY on the supervisor host."
@@ -369,7 +369,7 @@ def run_one_round(cfg: Config, *, phase_override: str | None = None) -> RoundRes
369
369
  file=sys.stderr,
370
370
  )
371
371
  events.emit(log_dir, "smoke_check_failed", reason=f"{r.name}: {r.reason}")
372
- sys.exit(1)
372
+ sys.exit(api.PERMANENT_CONFIG_EXIT)
373
373
 
374
374
  # Concurrency lock (per-project)
375
375
  lock_path = log_dir / "agent-runner.lock"
@@ -521,6 +521,7 @@ def _run_one_round_inner(cfg: Config, *, phase_override: str | None = None) -> R
521
521
  round_num=round_num,
522
522
  phase=phase,
523
523
  idempotency_s=cfg.vcs.stash_idempotency_s,
524
+ log_dir=cfg.runtime.log_dir,
524
525
  )
525
526
  if ref is not None:
526
527
  context_store.write_orphan_state(
@@ -546,7 +547,9 @@ def _run_one_round_inner(cfg: Config, *, phase_override: str | None = None) -> R
546
547
  # Leave tree dirty for next round; dirty_detected already emitted
547
548
  pass
548
549
  elif action == "auto_commit":
549
- err = vcs_state.try_auto_commit(cfg.runtime.work_dir, round_num, phase)
550
+ err = vcs_state.try_auto_commit(
551
+ cfg.runtime.work_dir, round_num, phase, log_dir=cfg.runtime.log_dir
552
+ )
550
553
  if err is not None:
551
554
  events.emit(
552
555
  log_dir,
@@ -5,8 +5,8 @@ Writes three files into a git repo:
5
5
  prompts/main.md — neutral 8-line placeholder
6
6
  .gitignore — append "logs/" if missing
7
7
 
8
- Available presets ship as package data in `agent_runner/presets/*.toml`.
9
- Currently: `claude`, `aider`, `gemini`.
8
+ Available presets ship as package data in `agent_runner/presets/*.toml`;
9
+ `agent-runner init --preset <name>` discovers them from that directory.
10
10
 
11
11
  Optionally commits in one step (default true via the CLI).
12
12
  """