cli-agent-runner 0.1.40__tar.gz → 0.1.41__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/CHANGELOG.md +6 -0
  2. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/PKG-INFO +1 -1
  3. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/_version.py +2 -2
  4. cli_agent_runner-0.1.41/agent_runner/builtin_plugins/codewhale.py +133 -0
  5. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/cli/init_cmd.py +13 -1
  6. cli_agent_runner-0.1.41/agent_runner/presets/codewhale.toml +30 -0
  7. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/scaffold.py +2 -2
  8. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/commands.md +1 -1
  9. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/configuration.md +7 -4
  10. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/quickstart.md +1 -1
  11. cli_agent_runner-0.1.41/docs/recipes/codewhale.md +98 -0
  12. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/thesis.md +26 -1
  13. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/pyproject.toml +1 -0
  14. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_scaffold_presets.py +2 -2
  15. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_doc_claims_match_ssot.py +11 -0
  16. cli_agent_runner-0.1.41/tests/unit/test_codewhale_plugin.py +155 -0
  17. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_presets.py +30 -2
  18. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/.codecov.yml +0 -0
  19. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  20. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  21. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  22. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  23. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/.github/workflows/ci.yml +0 -0
  24. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/.github/workflows/release.yml +0 -0
  25. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/.gitignore +0 -0
  26. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/.vulture-whitelist.py +0 -0
  27. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/CODE_OF_CONDUCT.md +0 -0
  28. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/CONTRIBUTING.md +0 -0
  29. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/LICENSE +0 -0
  30. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/README.md +0 -0
  31. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/README.zh.md +0 -0
  32. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/SECURITY.md +0 -0
  33. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/__init__.py +0 -0
  34. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/_docgen.py +0 -0
  35. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/_emit.py +0 -0
  36. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/_redact.py +0 -0
  37. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/_registry.py +0 -0
  38. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/_substrate.py +0 -0
  39. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/_throttle.py +0 -0
  40. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/agent_runtime.py +0 -0
  41. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/api.py +0 -0
  42. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/api_types.py +0 -0
  43. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/builtin_plugins/__init__.py +0 -0
  44. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/builtin_plugins/_constants.py +0 -0
  45. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/builtin_plugins/claude_rate_limit.py +0 -0
  46. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/builtin_plugins/gemini.py +0 -0
  47. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/cli/__init__.py +0 -0
  48. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/cli/__main__.py +0 -0
  49. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/cli/common.py +0 -0
  50. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/cli/events_cmd.py +0 -0
  51. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/cli/install_cmd.py +0 -0
  52. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/cli/monitor_cmd.py +0 -0
  53. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/cli/peek_cmd.py +0 -0
  54. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/cli/round_cmd.py +0 -0
  55. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/cli/serve_cmd.py +0 -0
  56. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/cli/service_cmd.py +0 -0
  57. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/cli/upgrade_cmd.py +0 -0
  58. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/config.py +0 -0
  59. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/context_store.py +0 -0
  60. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/defenses.py +0 -0
  61. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/detector_helpers.py +0 -0
  62. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/events.py +0 -0
  63. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/hooks.py +0 -0
  64. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/http_progress.py +0 -0
  65. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/lifecycle.py +0 -0
  66. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/metrics.py +0 -0
  67. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/monitor.py +0 -0
  68. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/presets/__init__.py +0 -0
  69. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/presets/aider.toml +0 -0
  70. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/presets/claude.toml +0 -0
  71. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/presets/gemini.toml +0 -0
  72. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/prompt_loader.py +0 -0
  73. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/round_log.py +0 -0
  74. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/round_view.py +0 -0
  75. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/runner.py +0 -0
  76. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/service_unit.py +0 -0
  77. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/startup_check.py +0 -0
  78. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/agent_runner/vcs_state.py +0 -0
  79. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/build.sh +0 -0
  80. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/deploy/example-agent-runner.toml +0 -0
  81. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/deploy/launchd.plist.tmpl +0 -0
  82. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/deploy/run-loop.sh +0 -0
  83. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/deploy/systemd.service.tmpl +0 -0
  84. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/README.md +0 -0
  85. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/architecture.md +0 -0
  86. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/events.md +0 -0
  87. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/long-running-agents.md +0 -0
  88. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/marketing/README.md +0 -0
  89. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/marketing/promo-cn.html +0 -0
  90. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.16.md +0 -0
  91. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.17.md +0 -0
  92. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.19.md +0 -0
  93. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.20.md +0 -0
  94. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.21.md +0 -0
  95. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.22.md +0 -0
  96. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.23.md +0 -0
  97. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.24.md +0 -0
  98. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.25.md +0 -0
  99. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.26.md +0 -0
  100. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.27.md +0 -0
  101. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.28.md +0 -0
  102. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.29.md +0 -0
  103. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.30.md +0 -0
  104. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.31.md +0 -0
  105. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.32.md +0 -0
  106. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.33.md +0 -0
  107. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.34.md +0 -0
  108. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.35.md +0 -0
  109. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.36.md +0 -0
  110. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.37.md +0 -0
  111. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.38.md +0 -0
  112. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.39.md +0 -0
  113. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/migrations/0.1.40.md +0 -0
  114. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/plugins.md +0 -0
  115. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/recipes/aider.md +0 -0
  116. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/docs/runbook.md +0 -0
  117. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/__init__.py +0 -0
  118. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/_test_helpers.py +0 -0
  119. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/conftest.py +0 -0
  120. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/contract/__init__.py +0 -0
  121. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/contract/test_public_api_surface.py +0 -0
  122. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/e2e/__init__.py +0 -0
  123. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/e2e/conftest.py +0 -0
  124. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/e2e/test_e2e_graceful_stop.py +0 -0
  125. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/e2e/test_e2e_install_systemd.py +0 -0
  126. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/e2e/test_e2e_monitor_remote.py +0 -0
  127. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/e2e/test_e2e_round_lifecycle.py +0 -0
  128. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/fixtures/cli-real-output/claude-2.1.143-assistant-tool-use.jsonl +0 -0
  129. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/fixtures/cli-real-output/claude-2.1.143-result-event.jsonl +0 -0
  130. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/fixtures/cli-real-output/gemini-0.42.0-result-event.jsonl +0 -0
  131. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/__init__.py +0 -0
  132. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_bounded_run.py +0 -0
  133. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_context_enricher_namespacing.py +0 -0
  134. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_fresh_eyes_signal.py +0 -0
  135. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_grace_kill_emission.py +0 -0
  136. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_install_dry_run.py +0 -0
  137. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_monitor_seeded.py +0 -0
  138. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_plugin_detector_loaded.py +0 -0
  139. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_plugin_owned_paths.py +0 -0
  140. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_plugin_real_flow.py +0 -0
  141. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_run_one_round_with_fake_agent.py +0 -0
  142. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_serve_loop.py +0 -0
  143. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_substrate_fingerprint.py +0 -0
  144. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/integration/test_transient_error_backoff.py +0 -0
  145. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/__init__.py +0 -0
  146. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_architecture.py +0 -0
  147. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_atomic_write_enforced.py +0 -0
  148. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_catalogs.py +0 -0
  149. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_classification_ssot.py +0 -0
  150. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_docs_generated.py +0 -0
  151. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_entry_points_resolve.py +0 -0
  152. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_event_kind_registry.py +0 -0
  153. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_event_kinds_ssot.py +0 -0
  154. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_events_doc_contract.py +0 -0
  155. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_layer_2_loop_size.py +0 -0
  156. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_module_boundaries.py +0 -0
  157. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_module_sizes.py +0 -0
  158. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_no_ai_signatures.py +0 -0
  159. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_no_pytest_skip_on_parse_fail.py +0 -0
  160. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_peek_schema_version.py +0 -0
  161. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_repo_constants_patched_in_tests.py +0 -0
  162. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_round_result_stable.py +0 -0
  163. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_stash_uses_sha_not_index.py +0 -0
  164. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/invariants/test_upstream_schema_canary.py +0 -0
  165. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/literate/__init__.py +0 -0
  166. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/literate/parser.py +0 -0
  167. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/literate/test_parser.py +0 -0
  168. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/literate/test_quickstart.py +0 -0
  169. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/__init__.py +0 -0
  170. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_agent_runtime.py +0 -0
  171. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_agent_runtime_grace.py +0 -0
  172. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_agent_runtime_progress.py +0 -0
  173. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_api_assemble_prompt.py +0 -0
  174. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_api_events_stream.py +0 -0
  175. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_api_install.py +0 -0
  176. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_api_observation.py +0 -0
  177. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_api_read_round_num.py +0 -0
  178. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_api_resolve_phase.py +0 -0
  179. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_api_service.py +0 -0
  180. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_api_types.py +0 -0
  181. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_auto_stop_gating.py +0 -0
  182. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_claude_error_detector.py +0 -0
  183. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_cli.py +0 -0
  184. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_cli_common.py +0 -0
  185. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_cli_init_install.py +0 -0
  186. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_cli_monitor_http.py +0 -0
  187. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_cli_service_peek_monitor.py +0 -0
  188. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_cli_upgrade.py +0 -0
  189. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_config.py +0 -0
  190. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_config_fresh_eyes.py +0 -0
  191. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_config_max_rounds.py +0 -0
  192. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_config_stop_file.py +0 -0
  193. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_config_substrate_fingerprint_paths.py +0 -0
  194. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_config_transient_error_action.py +0 -0
  195. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_context_store.py +0 -0
  196. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_defenses.py +0 -0
  197. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_detector_helpers.py +0 -0
  198. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_detector_protocol.py +0 -0
  199. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_docgen.py +0 -0
  200. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_events.py +0 -0
  201. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_events_cmd.py +0 -0
  202. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_fresh_eyes_trigger.py +0 -0
  203. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_gemini_plugin.py +0 -0
  204. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_hook_failure_isolation.py +0 -0
  205. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_hooks.py +0 -0
  206. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_http_progress.py +0 -0
  207. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_init_entry_points.py +0 -0
  208. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_lifecycle.py +0 -0
  209. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_metrics.py +0 -0
  210. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_monitor_assembly.py +0 -0
  211. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_monitor_detect_anomaly_repetitive.py +0 -0
  212. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_monitor_detect_rate_limit.py +0 -0
  213. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_monitor_detect_supervisor_stale.py +0 -0
  214. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_monitor_detectors.py +0 -0
  215. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_monitor_remote.py +0 -0
  216. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_peek_argparse.py +0 -0
  217. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_peek_select.py +0 -0
  218. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_prompt_loader.py +0 -0
  219. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_redact.py +0 -0
  220. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_round_log_helpers.py +0 -0
  221. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_round_view.py +0 -0
  222. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_runner.py +0 -0
  223. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_runner_throttle.py +0 -0
  224. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_scaffold.py +0 -0
  225. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_serve_cmd_bounded.py +0 -0
  226. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_serve_round_log.py +0 -0
  227. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_serve_sentinel.py +0 -0
  228. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_serve_startup_hooks.py +0 -0
  229. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_service_unit.py +0 -0
  230. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_startup_check.py +0 -0
  231. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_substrate.py +0 -0
  232. {cli_agent_runner-0.1.40 → cli_agent_runner-0.1.41}/tests/unit/test_vcs_state.py +0 -0
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.1.41] - 2026-06-07
9
+
10
+ ### Added
11
+ - New `codewhale` preset — supervise Hmbown/CodeWhale (DeepSeek terminal agent) via `codewhale exec --auto --output-format stream-json`. `agent-runner init --preset codewhale`.
12
+ - New built-in `codewhale_error_detector` plugin — emits `agent_usage_recorded` (model + token counts) from codewhale's stream-json output. Transient-error classification is best-effort (mappable buckets only); auth failures surface via the existing monitor `oauth_fail` detector.
13
+
8
14
  ## [0.1.40] - 2026-05-31
9
15
 
10
16
  ### Security
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cli-agent-runner
3
- Version: 0.1.40
3
+ Version: 0.1.41
4
4
  Summary: Restart-on-exit supervisor for autonomous CLI agents
5
5
  Project-URL: Homepage, https://github.com/wan9yu/cli-agent-runner
6
6
  Project-URL: Documentation, https://github.com/wan9yu/cli-agent-runner#readme
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.1.40'
22
- __version_tuple__ = version_tuple = (0, 1, 40)
21
+ __version__ = version = '0.1.41'
22
+ __version_tuple__ = version_tuple = (0, 1, 41)
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -0,0 +1,133 @@
1
+ """Built-in post_round_hook for codewhale CLI: usage events + transient classifier.
2
+
3
+ Third built-in plugin (after claude, gemini). Parses codewhale's `exec
4
+ --output-format stream-json` NDJSON stdout tail; emits agent_usage_recorded
5
+ from the terminal metadata record. Transient-error classification is
6
+ best-effort and emits ONLY when an error maps to an existing bucket (like
7
+ gemini): codewhale's exec stdout surfaces a {"type":"error"} record, but the
8
+ only observed case so far is auth failure (oauth_fail territory, not a
9
+ transient bucket), so nothing maps yet -- usage-only today. 429/5xx mapping
10
+ is added when a real rate-limit sample is captured.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import time
17
+ from collections import deque
18
+ from pathlib import Path
19
+ from typing import Any
20
+
21
+ from agent_runner.api import (
22
+ emit_agent_usage_recorded,
23
+ emit_transient_error_detected,
24
+ )
25
+ from agent_runner.builtin_plugins._constants import (
26
+ _5XX_STATUSES,
27
+ _BACK_OFF_DEFAULTS,
28
+ _RAW_CAP,
29
+ _TAIL_LINES,
30
+ )
31
+ from agent_runner.hooks import HookContext, register_post_round_hook
32
+
33
+
34
+ class CodewhaleErrorDetector:
35
+ """Parse codewhale round log tail; emit usage + transient_error_detected events."""
36
+
37
+ name = "codewhale_error_detector"
38
+
39
+ def after_round(self, ctx: HookContext, result: Any) -> None:
40
+ if ctx.agent_binary != "codewhale":
41
+ return
42
+ log_path = ctx.agent_log_path
43
+ if log_path is None or not log_path.exists():
44
+ return
45
+ parsed = _parse_codewhale_log(log_path)
46
+ if parsed.get("transient_error"):
47
+ emit_transient_error_detected(
48
+ ctx.log_dir, round_num=ctx.round_num, **parsed["transient_error"]
49
+ )
50
+ if parsed.get("usage"):
51
+ emit_agent_usage_recorded(
52
+ ctx.log_dir,
53
+ round_num=ctx.round_num,
54
+ phase=ctx.phase or "",
55
+ success=(result.exit_code == 0 and not result.timed_out),
56
+ **parsed["usage"],
57
+ )
58
+
59
+
60
+ def _parse_codewhale_log(log_path: Path) -> dict[str, Any]:
61
+ """Scan last _TAIL_LINES of codewhale NDJSON; extract usage from the metadata
62
+ record; classify any {"type":"error"} that maps to a transient bucket.
63
+
64
+ Tolerates non-JSON lines (codewhale prefixes some stdout with terminal
65
+ escapes) via per-line try/except.
66
+ """
67
+ with log_path.open("r", encoding="utf-8", errors="replace") as f:
68
+ tail = deque(f, maxlen=_TAIL_LINES)
69
+ metadata: dict | None = None
70
+ error_event: dict | None = None
71
+ for line in tail:
72
+ line = line.strip()
73
+ if not line:
74
+ continue
75
+ try:
76
+ event = json.loads(line)
77
+ except json.JSONDecodeError:
78
+ continue
79
+ if not isinstance(event, dict):
80
+ continue
81
+ etype = event.get("type")
82
+ if etype == "metadata":
83
+ metadata = event.get("meta") or {}
84
+ elif etype == "error":
85
+ error_event = event
86
+
87
+ out: dict[str, Any] = {}
88
+
89
+ if metadata:
90
+ out["usage"] = {
91
+ "agent": "codewhale",
92
+ "model": str(metadata.get("model", "unknown")),
93
+ "input_tokens": int(metadata.get("input_tokens", 0)),
94
+ "output_tokens": int(metadata.get("output_tokens", 0)),
95
+ "cached_tokens": 0, # codewhale exec stdout exposes no cache counts
96
+ "cost_usd": None, # codewhale exec stdout exposes no USD
97
+ "duration_ms": 0, # not in exec metadata
98
+ }
99
+
100
+ if error_event is not None:
101
+ classification = _classify_codewhale_error(error_event)
102
+ if classification:
103
+ duration = _BACK_OFF_DEFAULTS[classification]
104
+ out["transient_error"] = {
105
+ "classification": classification,
106
+ "agent": "codewhale",
107
+ "reset_at_epoch": int(time.time() + duration),
108
+ "raw": str(error_event.get("error", "error"))[:_RAW_CAP],
109
+ }
110
+ return out
111
+
112
+
113
+ def _classify_codewhale_error(error_event: dict[str, Any]) -> str | None:
114
+ """Map a codewhale {"type":"error"} record to a transient bucket, or None.
115
+
116
+ None means 'not a transient error' (e.g. auth failure -> handled by the
117
+ monitor's oauth_fail log-scan, not the transient classifier). codewhale's
118
+ error record currently carries only a free-text 'error' string with no
119
+ status code; until a real rate-limit/5xx sample is captured we cannot map
120
+ to rate_limit_model / api_transient_5xx / api_timeout, so we return None.
121
+ A future revision keys on a numeric status field once observed.
122
+ """
123
+ code = error_event.get("code") or error_event.get("status_code")
124
+ if code == 429:
125
+ return "rate_limit_model"
126
+ if code in _5XX_STATUSES:
127
+ return "api_transient_5xx"
128
+ if code == 408:
129
+ return "api_timeout"
130
+ return None
131
+
132
+
133
+ register_post_round_hook(CodewhaleErrorDetector())
@@ -2,15 +2,27 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import importlib.resources
6
+
5
7
  from agent_runner import api
6
8
  from agent_runner.cli.common import emit, fail, work_dir_from_args
7
9
 
8
10
 
11
+ def _preset_names() -> list[str]:
12
+ """Discover scaffold presets from the shipped ``agent_runner/presets/*.toml``.
13
+
14
+ Derived (not hardcoded) so adding a preset is a single new .toml file — the
15
+ ``--preset`` choices and validation track the filesystem automatically.
16
+ """
17
+ presets = importlib.resources.files("agent_runner.presets")
18
+ return sorted(p.name[:-5] for p in presets.iterdir() if p.name.endswith(".toml"))
19
+
20
+
9
21
  def add_parser(sub, parent) -> None:
10
22
  p = sub.add_parser("init", parents=[parent], help="Scaffold agent-runner project files")
11
23
  p.add_argument(
12
24
  "--preset",
13
- choices=["claude", "aider", "gemini"],
25
+ choices=_preset_names(),
14
26
  default="claude",
15
27
  help="Which agent CLI preset to scaffold (default: claude)",
16
28
  )
@@ -0,0 +1,30 @@
1
+ # agent-runner.toml — generated by `agent-runner init --preset codewhale`.
2
+ #
3
+ # Prereqs:
4
+ # - codewhale installed (ships `codewhale` + `codewhale-tui`; both on PATH):
5
+ # npm i -g codewhale (or cargo/brew per CodeWhale docs)
6
+ # - DEEPSEEK_API_KEY set on the supervisor host (or a key saved via
7
+ # `codewhale auth set`; resolution order is config > keyring > env)
8
+ # - work_dir is a git repo
9
+
10
+ [agent]
11
+ command = ["codewhale", "exec", "--auto", "--output-format", "stream-json"]
12
+ prompt_arg_template = ["{prompt}"]
13
+ name = "codewhale"
14
+
15
+ [runtime]
16
+ work_dir = "."
17
+ log_dir = "~/.agent-runner/{project}/logs"
18
+ round_timeout_s = 1800
19
+ restart_delay_s = 3
20
+
21
+ [prompt]
22
+ file = "./prompts/main.md"
23
+ inject_context = true
24
+
25
+ [vcs]
26
+ dirty_action = "stash"
27
+ stash_idempotency_s = 5
28
+
29
+ [monitor]
30
+ auth_fail_hint = "Run `codewhale auth status` to inspect provider/credentials, or set DEEPSEEK_API_KEY on the supervisor host."
@@ -5,8 +5,8 @@ Writes three files into a git repo:
5
5
  prompts/main.md — neutral 8-line placeholder
6
6
  .gitignore — append "logs/" if missing
7
7
 
8
- Available presets ship as package data in `agent_runner/presets/*.toml`.
9
- Currently: `claude`, `aider`, `gemini`.
8
+ Available presets ship as package data in `agent_runner/presets/*.toml`;
9
+ `agent-runner init --preset <name>` discovers them from that directory.
10
10
 
11
11
  Optionally commits in one step (default true via the CLI).
12
12
  """
@@ -36,7 +36,7 @@ appends `logs/` to `.gitignore`. By default also creates a git commit.
36
36
 
37
37
  Flags:
38
38
 
39
- - `--preset {claude,aider,gemini}` — agent CLI preset to scaffold (default: `claude`)
39
+ - `--preset {claude,aider,gemini,codewhale}` — agent CLI preset to scaffold (default: `claude`)
40
40
  - `--force` — overwrite an existing `agent-runner.toml`
41
41
  - `--no-commit` — skip the initial git commit
42
42
 
@@ -103,10 +103,11 @@ working tree:
103
103
  `[agent.env]` is a flat `dict[str, str]` of environment variables injected into
104
104
  the agent subprocess **per round**. This is preset-supplied per CLI: e.g. the
105
105
  claude preset sets `DISABLE_AUTOUPDATER=1` to prevent mid-loop self-updates;
106
- the aider preset omits `[agent.env]` entirely. Override these values in your
107
- project's `agent-runner.toml` only when you need to deviate from the preset
108
- default. The runtime merges `[agent.env]` on top of the supervisor's own env;
109
- unset (empty string) does not unset an inherited variable.
106
+ the aider and codewhale presets omit `[agent.env]` entirely (both resolve their
107
+ API keys from the ambient environment or their own keyrings). Override these
108
+ values in your project's `agent-runner.toml` only when you need to deviate from
109
+ the preset default. The runtime merges `[agent.env]` on top of the supervisor's
110
+ own env; unset (empty string) does not unset an inherited variable.
110
111
 
111
112
  ## `[monitor].auth_fail_hint` (preset-supplied)
112
113
 
@@ -117,6 +118,8 @@ guidance without authoring it themselves:
117
118
  - `--preset claude` → recommend `claude /login` / refresh `ANTHROPIC_API_KEY`.
118
119
  - `--preset aider` → verify provider env var (`OPENAI_API_KEY` /
119
120
  `ANTHROPIC_API_KEY` / `DEEPSEEK_API_KEY` / etc.); run `aider --models`.
121
+ - `--preset codewhale` → run `codewhale auth status` to inspect provider
122
+ credentials, or set `DEEPSEEK_API_KEY` on the supervisor host.
120
123
 
121
124
  Override in your `agent-runner.toml` if you ship a custom CLI.
122
125
 
@@ -37,7 +37,7 @@ Edit `prompts/main.md` to describe what the agent should do per round.
37
37
  Edit `agent-runner.toml` if you need to change `round_timeout_s` or `[phases]`.
38
38
 
39
39
  The default preset (`--preset claude`) invokes `claude`. Other built-in
40
- presets: `--preset aider` and `--preset gemini`. To use any other CLI,
40
+ presets: `--preset aider`, `--preset gemini`, and `--preset codewhale`. To use any other CLI,
41
41
  edit `agent.command` to your CLI's invocation and `agent.prompt_arg_template`
42
42
  to its prompt-argument syntax — for example:
43
43
 
@@ -0,0 +1,98 @@
1
+ # Running agent-runner with CodeWhale
2
+
3
+ [CodeWhale](https://github.com/Hmbown/CodeWhale) is a DeepSeek-powered terminal
4
+ agent. It runs one-shot via `codewhale exec --auto`, fitting agent-runner's
5
+ per-round lifecycle naturally.
6
+
7
+ ## Prerequisites
8
+
9
+ - `codewhale` installed (ships both `codewhale` and `codewhale-tui`; both must
10
+ be on PATH):
11
+ ```bash
12
+ npm i -g codewhale
13
+ ```
14
+ (or via cargo/brew — see the CodeWhale docs for alternative install methods)
15
+ - DeepSeek API key available to codewhale via one of:
16
+ - `DEEPSEEK_API_KEY` environment variable on the supervisor host, **or**
17
+ - a key saved via `codewhale auth set` (resolution order: config > keyring > env)
18
+ - A git repo as `work_dir` (required for VCS state tracking).
19
+
20
+ ## Scaffold
21
+
22
+ ```bash
23
+ git init my-project && cd my-project
24
+ agent-runner init --preset codewhale
25
+ ```
26
+
27
+ This writes:
28
+ - `agent-runner.toml` — codewhale preset (command, flags, auth hint).
29
+ - `prompts/main.md` — neutral placeholder; replace with your task description.
30
+ - `.gitignore` — adds `logs/` if missing.
31
+
32
+ ## CodeWhale preset (excerpt of `agent_runner/presets/codewhale.toml`)
33
+
34
+ ```toml
35
+ [agent]
36
+ command = ["codewhale", "exec", "--auto", "--output-format", "stream-json"]
37
+ prompt_arg_template = ["{prompt}"]
38
+ name = "codewhale"
39
+ # [agent.env] omitted — DeepSeek key is ambient (env or codewhale keyring).
40
+
41
+ [runtime]
42
+ work_dir = "."
43
+ log_dir = "~/.agent-runner/{project}/logs"
44
+ round_timeout_s = 1800
45
+ restart_delay_s = 3
46
+
47
+ [prompt]
48
+ file = "./prompts/main.md"
49
+ inject_context = true
50
+
51
+ [vcs]
52
+ dirty_action = "stash"
53
+ stash_idempotency_s = 5
54
+
55
+ [monitor]
56
+ auth_fail_hint = "Run `codewhale auth status` to inspect provider/credentials, or set DEEPSEEK_API_KEY on the supervisor host."
57
+ ```
58
+
59
+ ### Why each flag
60
+
61
+ - `exec` — one-shot execution mode (non-interactive, no TUI).
62
+ - `--auto` — non-interactive confirmation; **mandatory** for unattended supervisor
63
+ mode.
64
+ - `--output-format stream-json` — emits NDJSON to stdout; required so the
65
+ `codewhale_error_detector` plugin can parse usage records. Without this flag
66
+ the plugin receives human-readable text and emits no `agent_usage_recorded`
67
+ events.
68
+
69
+ ### What's intentionally not configured
70
+
71
+ - **No `[agent.env]`** — the DeepSeek key is resolved by codewhale from the
72
+ ambient environment or its own keyring. Set `DEEPSEEK_API_KEY` on the
73
+ supervisor host rather than in the TOML.
74
+
75
+ ## What the detector emits
76
+
77
+ The built-in `codewhale_error_detector` plugin parses the round log tail after
78
+ each round completes:
79
+
80
+ - **`agent_usage_recorded`** — emitted from the `{"type":"metadata","meta":{...}}`
81
+ terminal record. Carries `model`, `input_tokens`, `output_tokens`. `cost_usd`
82
+ is always `None` (codewhale's stream-json output does not expose USD cost).
83
+ - **`transient_error_detected`** — emitted only when a `{"type":"error"}` record
84
+ maps to an existing classification bucket (`rate_limit_model`, `api_transient_5xx`,
85
+ `api_timeout`). The only observed error so far is auth failure, which is **not**
86
+ a transient bucket — it surfaces via the monitor's `oauth_fail` detector instead.
87
+
88
+ ## Troubleshooting
89
+
90
+ | Symptom | Probable cause |
91
+ |---|---|
92
+ | `codewhale: command not found` | codewhale not on PATH — `npm i -g codewhale` |
93
+ | Round short-exits with non-zero exit code | likely auth failure; check `peek` and `~/.agent-runner/<project>/logs/rounds/R*.log` for the error record |
94
+ | `oauth_fail` alert in `peek` | DeepSeek auth failure detected. Hint: "Run `codewhale auth status`…". Check key validity and re-export `DEEPSEEK_API_KEY`. |
95
+ | No `agent_usage_recorded` events | `--output-format stream-json` may be missing from command; verify the preset was applied correctly |
96
+ | `codewhale auth status` shows no key | Run `codewhale auth set` to save a key, or export `DEEPSEEK_API_KEY` before starting the supervisor |
97
+
98
+ See also: [`docs/quickstart.md`](../quickstart.md), [`docs/configuration.md`](../configuration.md).
@@ -88,7 +88,7 @@ during sustained upstream outage."
88
88
  Counter reset: any round that completes without firing a new
89
89
  `transient_error_detected` event clears all bucket counters back to zero.
90
90
 
91
- > **Example**: Gateway 2026-05-18 reported sustained 5xx + 529 from
91
+ > **Example**: A 2026-05-18 field report described sustained 5xx + 529 from
92
92
  > Anthropic where our previous fixed 60s wait was too short — the next
93
93
  > round hit the same error, waited 60s again, and again. Rejected: adding
94
94
  > a config knob (`[runtime] transient_backoff_strategy = "fixed" |
@@ -159,6 +159,31 @@ That's prompt-engineering project policy. It varies per use case and evolves
159
159
  faster than a library version cycle. We are a runtime harness, not a usage
160
160
  methodology.
161
161
 
162
+ ### Not a remote-execution portal (agent-local, shell-remote)
163
+
164
+ agent-runner assumes the agent and the supervisor run on the **same host**.
165
+ The unattended model requires it: to survive a disconnected laptop and run
166
+ 24×7, the agent must live on the supervised host, not stream commands to it
167
+ from elsewhere. We do not route an agent's tool calls to a remote shell (SSH,
168
+ container, k8s pod). `monitor --host` provides remote *observation*, not remote
169
+ *execution*.
170
+
171
+ Tools like [zmx](https://zmx.sh) cover the complementary case — an
172
+ interactive, attended agent that stays local while its shell runs remotely.
173
+ That's a different niche (a human watching, full local MCP/skills, ephemeral
174
+ sessions), and the two compose: a consumer can point `[agent].command` at an
175
+ agent that itself routes through such a portal. But a portal adapter in core
176
+ would be an anticipatory feature for a topology our model doesn't use.
177
+
178
+ > **Example**: The 2026-04 zmx "ai portal" release (agent-local, shell-remote
179
+ > via a session) prompted this entry. It validates our CLI-not-MCP stance
180
+ > (its own prior-art notes call MCP servers a configuration pain), but adding
181
+ > remote-execution routing to agent-runner is rejected until a consumer
182
+ > presents a concrete unattended use case that needs it. Note for combined
183
+ > deployments: command + output flowing through such a session is a secret
184
+ > surface outside agent-runner's control (cf. the 0.1.40 event-log
185
+ > containment) — the operator owns it.
186
+
162
187
  ---
163
188
 
164
189
  ## How to evaluate a feature request
@@ -47,6 +47,7 @@ agent-runner = "agent_runner.cli:main"
47
47
  [project.entry-points."agent_runner.post_round_hooks"]
48
48
  claude_error_detector = "agent_runner.builtin_plugins.claude_rate_limit:ClaudeErrorDetector"
49
49
  gemini_error_detector = "agent_runner.builtin_plugins.gemini:GeminiErrorDetector"
50
+ codewhale_error_detector = "agent_runner.builtin_plugins.codewhale:CodewhaleErrorDetector"
50
51
 
51
52
  [project.optional-dependencies]
52
53
  dev = [
@@ -1,4 +1,4 @@
1
- """End-to-end: `agent-runner init --preset {claude,aider,gemini}` produces valid scaffolds."""
1
+ """End-to-end: `agent-runner init --preset <name>` produces valid scaffolds (all presets)."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -9,7 +9,7 @@ from pathlib import Path
9
9
  import pytest
10
10
 
11
11
 
12
- @pytest.mark.parametrize("preset_name", ["claude", "aider", "gemini"])
12
+ @pytest.mark.parametrize("preset_name", ["claude", "aider", "gemini", "codewhale"])
13
13
  def test_given_preset_when_init_then_toml_is_loadable(tmp_git_repo: Path, preset_name: str) -> None:
14
14
  from agent_runner.api import init
15
15
  from agent_runner.config import load_config
@@ -92,6 +92,17 @@ def test_doc_value_sets_match_ssot() -> None:
92
92
  if cls_doc != cls_ssot:
93
93
  failures.append(f"classification doc {cls_doc} != SSOT {cls_ssot}")
94
94
 
95
+ # --preset choices: commands.md "--preset {a,b,c}" must equal the derived SSOT.
96
+ # init_cmd derives choices from presets/*.toml; the hand-written doc list must track it.
97
+ from agent_runner.cli.init_cmd import _preset_names
98
+
99
+ preset_ssot = set(_preset_names())
100
+ cmds_text = (REPO / "docs/commands.md").read_text(encoding="utf-8")
101
+ pm = re.search(r"--preset \{([^}]+)\}", cmds_text)
102
+ preset_doc = set(pm.group(1).split(",")) if pm else set()
103
+ if preset_doc != preset_ssot:
104
+ failures.append(f"--preset doc {preset_doc} != SSOT {preset_ssot}")
105
+
95
106
  assert not failures, "doc value-set drift:\n" + "\n".join(failures)
96
107
 
97
108
 
@@ -0,0 +1,155 @@
1
+ """Unit tests for CodewhaleErrorDetector (usage; classify-only-what-maps)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from unittest.mock import MagicMock, patch
6
+
7
+ from tests._test_helpers import make_hook_context, write_round_log
8
+
9
+ _MOD = "agent_runner.builtin_plugins.codewhale"
10
+
11
+
12
+ def test_given_success_round_when_after_round_then_usage_emitted_from_metadata(tmp_path):
13
+ from agent_runner.builtin_plugins.codewhale import CodewhaleErrorDetector
14
+
15
+ # Real captured codewhale exec stream-json terminal records.
16
+ write_round_log(
17
+ tmp_path,
18
+ 1,
19
+ [
20
+ {"type": "content", "content": "working..."},
21
+ {"type": "tool_result", "id": "c1", "output": "ok", "status": "success"},
22
+ {
23
+ "type": "metadata",
24
+ "meta": {
25
+ "model": "deepseek-v4-pro",
26
+ "input_tokens": 66014,
27
+ "output_tokens": 303,
28
+ "session_id": "f029d9a9",
29
+ "status": "completed",
30
+ },
31
+ },
32
+ {"type": "done"},
33
+ ],
34
+ )
35
+ result = MagicMock(exit_code=0, timed_out=False)
36
+ with patch(f"{_MOD}.emit_agent_usage_recorded") as usage_emit:
37
+ with patch(f"{_MOD}.emit_transient_error_detected") as err_emit:
38
+ CodewhaleErrorDetector().after_round(
39
+ make_hook_context(tmp_path, agent_name="codewhale"), result=result
40
+ )
41
+ usage_emit.assert_called_once()
42
+ kw = usage_emit.call_args.kwargs
43
+ assert kw["agent"] == "codewhale"
44
+ assert kw["model"] == "deepseek-v4-pro"
45
+ assert kw["input_tokens"] == 66014
46
+ assert kw["output_tokens"] == 303
47
+ assert kw["cost_usd"] is None
48
+ assert kw["cached_tokens"] == 0
49
+ err_emit.assert_not_called()
50
+
51
+
52
+ def test_given_non_codewhale_binary_when_after_round_then_no_emit(tmp_path):
53
+ from agent_runner.builtin_plugins.codewhale import CodewhaleErrorDetector
54
+
55
+ write_round_log(
56
+ tmp_path,
57
+ 1,
58
+ [
59
+ {
60
+ "type": "metadata",
61
+ "meta": {
62
+ "model": "x",
63
+ "input_tokens": 1,
64
+ "output_tokens": 1,
65
+ "status": "completed",
66
+ },
67
+ }
68
+ ],
69
+ )
70
+ result = MagicMock(exit_code=0, timed_out=False)
71
+ with patch(f"{_MOD}.emit_agent_usage_recorded") as usage_emit:
72
+ CodewhaleErrorDetector().after_round(
73
+ make_hook_context(tmp_path, agent_name="claude"), result=result
74
+ )
75
+ usage_emit.assert_not_called()
76
+
77
+
78
+ def test_given_auth_error_round_when_after_round_then_no_transient_error(tmp_path):
79
+ """Auth failure is NOT a transient bucket (it's oauth_fail territory) -> usage only."""
80
+ from agent_runner.builtin_plugins.codewhale import CodewhaleErrorDetector
81
+
82
+ write_round_log(
83
+ tmp_path,
84
+ 1,
85
+ [
86
+ {"type": "error", "error": "Authentication failed: invalid key"},
87
+ {
88
+ "type": "metadata",
89
+ "meta": {
90
+ "model": "deepseek-v4-pro",
91
+ "input_tokens": 0,
92
+ "output_tokens": 0,
93
+ "status": "failed",
94
+ },
95
+ },
96
+ {"type": "done"},
97
+ ],
98
+ )
99
+ result = MagicMock(exit_code=1, timed_out=False)
100
+ with patch(f"{_MOD}.emit_agent_usage_recorded") as usage_emit:
101
+ with patch(f"{_MOD}.emit_transient_error_detected") as err_emit:
102
+ CodewhaleErrorDetector().after_round(
103
+ make_hook_context(tmp_path, agent_name="codewhale"), result=result
104
+ )
105
+ err_emit.assert_not_called() # auth error does not map to a transient bucket
106
+ usage_emit.assert_called_once() # usage still emitted (status:failed round)
107
+
108
+
109
+ def test_given_non_json_lines_when_after_round_then_tolerated(tmp_path):
110
+ """Real codewhale stdout has terminal-escape non-JSON lines; parser must skip them."""
111
+ from agent_runner.builtin_plugins.codewhale import CodewhaleErrorDetector
112
+
113
+ # Write raw lines manually (write_round_log only emits JSON dicts).
114
+ # Path must match make_hook_context default: tmp_path/rounds/R1-test.log
115
+ rounds_dir = tmp_path / "rounds"
116
+ rounds_dir.mkdir(parents=True, exist_ok=True)
117
+ meta_line = (
118
+ '{"type":"metadata","meta":{'
119
+ '"model":"deepseek-v4-pro","input_tokens":5,"output_tokens":2,'
120
+ '"status":"completed"}}'
121
+ )
122
+ (rounds_dir / "R1-test.log").write_text(
123
+ "\x1b]9;4;1\x07\x1b]0;\U0001f433 CodeWhale\x07"
124
+ '{"type":"content","content":"hi"}\n' + meta_line + "\n"
125
+ "not json at all\n"
126
+ '{"type":"done"}\n',
127
+ encoding="utf-8",
128
+ )
129
+ result = MagicMock(exit_code=0, timed_out=False)
130
+ with patch(f"{_MOD}.emit_agent_usage_recorded") as usage_emit:
131
+ CodewhaleErrorDetector().after_round(
132
+ make_hook_context(tmp_path, agent_name="codewhale"), result=result
133
+ )
134
+ usage_emit.assert_called_once()
135
+ assert usage_emit.call_args.kwargs["input_tokens"] == 5
136
+
137
+
138
+ def test_classify_codewhale_error_maps_only_known_buckets():
139
+ """Lock the dormant forward-path: numeric status codes map to existing
140
+ buckets; everything else (incl. the captured free-text auth error) → None.
141
+ Guards against silent regression when a real rate-limit sample is wired in.
142
+ """
143
+ from agent_runner.builtin_plugins._constants import _BACK_OFF_DEFAULTS
144
+ from agent_runner.builtin_plugins.codewhale import _classify_codewhale_error
145
+
146
+ assert _classify_codewhale_error({"code": 429}) == "rate_limit_model"
147
+ assert _classify_codewhale_error({"status_code": 503}) == "api_transient_5xx"
148
+ assert _classify_codewhale_error({"code": 408}) == "api_timeout"
149
+ # free-text auth error (the only real captured shape) does not map
150
+ assert _classify_codewhale_error({"error": "Authentication failed: ..."}) is None
151
+ assert _classify_codewhale_error({"code": 418}) is None
152
+ # every non-None result must be a real classification bucket (SSOT)
153
+ for ev in ({"code": 429}, {"status_code": 500}, {"code": 408}):
154
+ cls = _classify_codewhale_error(ev)
155
+ assert cls in _BACK_OFF_DEFAULTS