cli-agent-runner 0.1.34__tar.gz → 0.1.36__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/CHANGELOG.md +14 -0
  2. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/PKG-INFO +6 -6
  3. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/README.md +5 -5
  4. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/README.zh.md +8 -7
  5. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/__init__.py +0 -8
  6. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/_version.py +2 -2
  7. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/api.py +1 -0
  8. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/builtin_plugins/claude_rate_limit.py +3 -4
  9. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/config.py +14 -0
  10. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/monitor.py +43 -2
  11. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/architecture.md +4 -2
  12. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/commands.md +2 -2
  13. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/configuration.md +2 -0
  14. cli_agent_runner-0.1.36/docs/migrations/0.1.35.md +97 -0
  15. cli_agent_runner-0.1.36/docs/migrations/0.1.36.md +73 -0
  16. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/plugins.md +7 -4
  17. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/runbook.md +27 -3
  18. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/pyproject.toml +2 -3
  19. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_architecture.py +3 -2
  20. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_entry_points_resolve.py +11 -6
  21. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_api_observation.py +15 -0
  22. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_api_service.py +22 -0
  23. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_config.py +23 -0
  24. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_docgen.py +2 -2
  25. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_monitor_assembly.py +1 -0
  26. cli_agent_runner-0.1.36/tests/unit/test_monitor_detect_supervisor_stale.py +38 -0
  27. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_monitor_detectors.py +36 -1
  28. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/.codecov.yml +0 -0
  29. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  30. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  31. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  32. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  33. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/.github/workflows/ci.yml +0 -0
  34. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/.github/workflows/release.yml +0 -0
  35. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/.gitignore +0 -0
  36. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/.vulture-whitelist.py +0 -0
  37. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/CODE_OF_CONDUCT.md +0 -0
  38. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/CONTRIBUTING.md +0 -0
  39. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/LICENSE +0 -0
  40. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/SECURITY.md +0 -0
  41. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/_docgen.py +0 -0
  42. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/_emit.py +0 -0
  43. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/_registry.py +0 -0
  44. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/_substrate.py +0 -0
  45. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/_throttle.py +0 -0
  46. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/agent_runtime.py +0 -0
  47. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/api_types.py +0 -0
  48. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/builtin_plugins/__init__.py +0 -0
  49. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/builtin_plugins/_constants.py +0 -0
  50. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/builtin_plugins/gemini.py +0 -0
  51. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/cli/__init__.py +0 -0
  52. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/cli/__main__.py +0 -0
  53. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/cli/common.py +0 -0
  54. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/cli/events_cmd.py +0 -0
  55. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/cli/init_cmd.py +0 -0
  56. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/cli/install_cmd.py +0 -0
  57. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/cli/monitor_cmd.py +0 -0
  58. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/cli/peek_cmd.py +0 -0
  59. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/cli/round_cmd.py +0 -0
  60. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/cli/serve_cmd.py +0 -0
  61. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/cli/service_cmd.py +0 -0
  62. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/cli/upgrade_cmd.py +0 -0
  63. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/context_store.py +0 -0
  64. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/defenses.py +0 -0
  65. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/detector_helpers.py +0 -0
  66. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/events.py +0 -0
  67. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/hooks.py +0 -0
  68. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/http_progress.py +0 -0
  69. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/lifecycle.py +0 -0
  70. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/metrics.py +0 -0
  71. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/presets/__init__.py +0 -0
  72. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/presets/aider.toml +0 -0
  73. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/presets/claude.toml +0 -0
  74. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/presets/gemini.toml +0 -0
  75. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/prompt_loader.py +0 -0
  76. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/round_log.py +0 -0
  77. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/round_view.py +0 -0
  78. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/runner.py +0 -0
  79. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/scaffold.py +0 -0
  80. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/service_unit.py +0 -0
  81. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/startup_check.py +0 -0
  82. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/agent_runner/vcs_state.py +0 -0
  83. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/build.sh +0 -0
  84. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/deploy/example-agent-runner.toml +0 -0
  85. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/deploy/launchd.plist.tmpl +0 -0
  86. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/deploy/run-loop.sh +0 -0
  87. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/deploy/systemd.service.tmpl +0 -0
  88. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/README.md +0 -0
  89. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/events.md +0 -0
  90. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/long-running-agents.md +0 -0
  91. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/marketing/README.md +0 -0
  92. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/marketing/promo-cn.html +0 -0
  93. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.16.md +0 -0
  94. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.17.md +0 -0
  95. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.19.md +0 -0
  96. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.20.md +0 -0
  97. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.21.md +0 -0
  98. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.22.md +0 -0
  99. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.23.md +0 -0
  100. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.24.md +0 -0
  101. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.25.md +0 -0
  102. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.26.md +0 -0
  103. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.27.md +0 -0
  104. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.28.md +0 -0
  105. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.29.md +0 -0
  106. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.30.md +0 -0
  107. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.31.md +0 -0
  108. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.32.md +0 -0
  109. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.33.md +0 -0
  110. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/migrations/0.1.34.md +0 -0
  111. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/quickstart.md +0 -0
  112. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/recipes/aider.md +0 -0
  113. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/docs/thesis.md +0 -0
  114. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/__init__.py +0 -0
  115. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/_test_helpers.py +0 -0
  116. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/conftest.py +0 -0
  117. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/contract/__init__.py +0 -0
  118. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/contract/test_public_api_surface.py +0 -0
  119. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/e2e/__init__.py +0 -0
  120. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/e2e/conftest.py +0 -0
  121. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/e2e/test_e2e_graceful_stop.py +0 -0
  122. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/e2e/test_e2e_install_systemd.py +0 -0
  123. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/e2e/test_e2e_monitor_remote.py +0 -0
  124. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/e2e/test_e2e_round_lifecycle.py +0 -0
  125. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/fixtures/cli-real-output/claude-2.1.143-assistant-tool-use.jsonl +0 -0
  126. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/fixtures/cli-real-output/claude-2.1.143-result-event.jsonl +0 -0
  127. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/fixtures/cli-real-output/gemini-0.42.0-result-event.jsonl +0 -0
  128. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/__init__.py +0 -0
  129. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/test_bounded_run.py +0 -0
  130. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/test_context_enricher_namespacing.py +0 -0
  131. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/test_fresh_eyes_signal.py +0 -0
  132. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/test_grace_kill_emission.py +0 -0
  133. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/test_install_dry_run.py +0 -0
  134. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/test_monitor_seeded.py +0 -0
  135. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/test_plugin_detector_loaded.py +0 -0
  136. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/test_plugin_owned_paths.py +0 -0
  137. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/test_plugin_real_flow.py +0 -0
  138. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/test_run_one_round_with_fake_agent.py +0 -0
  139. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/test_scaffold_presets.py +0 -0
  140. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/test_serve_loop.py +0 -0
  141. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/test_substrate_fingerprint.py +0 -0
  142. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/integration/test_transient_error_backoff.py +0 -0
  143. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/__init__.py +0 -0
  144. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_atomic_write_enforced.py +0 -0
  145. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_catalogs.py +0 -0
  146. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_classification_ssot.py +0 -0
  147. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_docs_generated.py +0 -0
  148. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_event_kind_registry.py +0 -0
  149. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_event_kinds_ssot.py +0 -0
  150. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_events_doc_contract.py +0 -0
  151. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_layer_2_loop_size.py +0 -0
  152. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_module_boundaries.py +0 -0
  153. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_module_sizes.py +0 -0
  154. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_no_ai_signatures.py +0 -0
  155. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_no_pytest_skip_on_parse_fail.py +0 -0
  156. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_peek_schema_version.py +0 -0
  157. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_repo_constants_patched_in_tests.py +0 -0
  158. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_round_result_stable.py +0 -0
  159. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_stash_uses_sha_not_index.py +0 -0
  160. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/invariants/test_upstream_schema_canary.py +0 -0
  161. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/literate/__init__.py +0 -0
  162. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/literate/parser.py +0 -0
  163. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/literate/test_parser.py +0 -0
  164. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/literate/test_quickstart.py +0 -0
  165. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/__init__.py +0 -0
  166. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_agent_runtime.py +0 -0
  167. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_agent_runtime_grace.py +0 -0
  168. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_agent_runtime_progress.py +0 -0
  169. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_api_assemble_prompt.py +0 -0
  170. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_api_events_stream.py +0 -0
  171. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_api_install.py +0 -0
  172. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_api_read_round_num.py +0 -0
  173. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_api_resolve_phase.py +0 -0
  174. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_api_types.py +0 -0
  175. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_auto_stop_gating.py +0 -0
  176. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_claude_error_detector.py +0 -0
  177. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_cli.py +0 -0
  178. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_cli_common.py +0 -0
  179. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_cli_init_install.py +0 -0
  180. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_cli_monitor_http.py +0 -0
  181. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_cli_service_peek_monitor.py +0 -0
  182. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_cli_upgrade.py +0 -0
  183. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_config_fresh_eyes.py +0 -0
  184. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_config_max_rounds.py +0 -0
  185. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_config_stop_file.py +0 -0
  186. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_config_substrate_fingerprint_paths.py +0 -0
  187. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_config_transient_error_action.py +0 -0
  188. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_context_store.py +0 -0
  189. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_defenses.py +0 -0
  190. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_detector_helpers.py +0 -0
  191. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_detector_protocol.py +0 -0
  192. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_events.py +0 -0
  193. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_events_cmd.py +0 -0
  194. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_fresh_eyes_trigger.py +0 -0
  195. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_gemini_plugin.py +0 -0
  196. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_hook_failure_isolation.py +0 -0
  197. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_hooks.py +0 -0
  198. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_http_progress.py +0 -0
  199. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_init_entry_points.py +0 -0
  200. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_lifecycle.py +0 -0
  201. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_metrics.py +0 -0
  202. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_monitor_detect_anomaly_repetitive.py +0 -0
  203. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_monitor_detect_rate_limit.py +0 -0
  204. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_monitor_remote.py +0 -0
  205. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_peek_argparse.py +0 -0
  206. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_peek_select.py +0 -0
  207. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_presets.py +0 -0
  208. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_prompt_loader.py +0 -0
  209. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_round_log_helpers.py +0 -0
  210. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_round_view.py +0 -0
  211. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_runner.py +0 -0
  212. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_runner_throttle.py +0 -0
  213. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_scaffold.py +0 -0
  214. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_serve_cmd_bounded.py +0 -0
  215. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_serve_round_log.py +0 -0
  216. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_serve_sentinel.py +0 -0
  217. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_serve_startup_hooks.py +0 -0
  218. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_service_unit.py +0 -0
  219. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_startup_check.py +0 -0
  220. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_substrate.py +0 -0
  221. {cli_agent_runner-0.1.34 → cli_agent_runner-0.1.36}/tests/unit/test_vcs_state.py +0 -0
@@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.1.36] - 2026-05-21
11
+
12
+ ### Added
13
+ - New monitor detector `supervisor_stale` (notify) — alerts when the supervisor stops emitting events (stuck between rounds or dead), a blind spot the event stream and `detect_hung` cannot catch. Default ON; threshold derives from `round_timeout_s * 1.5`. Detector count 11 → 12.
14
+ - `[monitor] supervisor_stale_threshold_s` config — override the derived staleness threshold (positive = seconds; 0 = disable; unset = derived).
15
+
16
+ ### Changed
17
+ - `docs/runbook.md` documents the liveness-monitoring architecture: run `monitor --host` from a separate machine to detect supervisor silent-death AND host death (a same-host monitor dies with its host).
18
+
19
+ ## [0.1.35] - 2026-05-20
20
+
21
+ ### Removed
22
+ - `claude_rate_limit_detector` plugin alias (0.1.20-era back-compat layer after the 0.1.23 rename to `claude_error_detector`). Hard-cut at both entry-point and config-mapping layers. See `docs/migrations/0.1.35.md` for the 1-line TOML migration.
23
+
10
24
  ## [0.1.34] - 2026-05-20
11
25
 
12
26
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cli-agent-runner
3
- Version: 0.1.34
3
+ Version: 0.1.36
4
4
  Summary: Restart-on-exit supervisor for autonomous CLI agents
5
5
  Project-URL: Homepage, https://github.com/wan9yu/cli-agent-runner
6
6
  Project-URL: Documentation, https://github.com/wan9yu/cli-agent-runner#readme
@@ -49,7 +49,7 @@ full disks, runaway memory.
49
49
 
50
50
  ```
51
51
  ┌──────────────────────────────────────────┐
52
- │ Layer 3: The Witness (monitor) │ 11 detectors + auto-stop
52
+ │ Layer 3: The Witness (monitor) │ 12 detectors + auto-stop
53
53
  ├──────────────────────────────────────────┤
54
54
  │ Layer 2: The Loop (serve, ~120 LOC) │ signal-trapping restart loop
55
55
  ├──────────────────────────────────────────┤
@@ -80,13 +80,13 @@ agent-runner monitor # live anomaly detection
80
80
 
81
81
  Full walkthrough: [`docs/quickstart.md`](docs/quickstart.md).
82
82
 
83
- ## 14 verbs
83
+ ## 16 verbs
84
84
 
85
85
  | Lifecycle | Observation |
86
86
  |---|---|
87
87
  | `init` / `install` / `uninstall` | `peek` — state snapshot |
88
88
  | `start` / `stop` / `kill` / `cancel` | `watch` — peek in a refresh loop |
89
- | `restart` / `status` | `monitor` — 11 detectors, alerts, auto-stop |
89
+ | `restart` / `status` | `monitor` — 12 detectors, alerts, auto-stop |
90
90
  | `round` / `serve` / `upgrade` | `events` — query / stream events.jsonl |
91
91
 
92
92
  Verb reference: [`docs/commands.md`](docs/commands.md).
@@ -106,11 +106,11 @@ guards it. Highlights:
106
106
 
107
107
  Full list and rationale: [`docs/architecture.md`](docs/architecture.md).
108
108
 
109
- ## Monitor: 11 detectors
109
+ ## Monitor: 12 detectors
110
110
 
111
111
  Notify only: `timeout_rate`, `hung`, `orphan_chain`, `disk_warning`,
112
112
  `mem_pressure`, `smoke_fail_rate`, `network_fail`, `rate_limit_active`,
113
- `anomaly_repetitive_active`.
113
+ `anomaly_repetitive_active`, `supervisor_stale`.
114
114
 
115
115
  **Auto-stop the service** (continuing is harmful):
116
116
  - `oauth_fail` — burning API quota on auth-rejected rounds
@@ -12,7 +12,7 @@ full disks, runaway memory.
12
12
 
13
13
  ```
14
14
  ┌──────────────────────────────────────────┐
15
- │ Layer 3: The Witness (monitor) │ 11 detectors + auto-stop
15
+ │ Layer 3: The Witness (monitor) │ 12 detectors + auto-stop
16
16
  ├──────────────────────────────────────────┤
17
17
  │ Layer 2: The Loop (serve, ~120 LOC) │ signal-trapping restart loop
18
18
  ├──────────────────────────────────────────┤
@@ -43,13 +43,13 @@ agent-runner monitor # live anomaly detection
43
43
 
44
44
  Full walkthrough: [`docs/quickstart.md`](docs/quickstart.md).
45
45
 
46
- ## 14 verbs
46
+ ## 16 verbs
47
47
 
48
48
  | Lifecycle | Observation |
49
49
  |---|---|
50
50
  | `init` / `install` / `uninstall` | `peek` — state snapshot |
51
51
  | `start` / `stop` / `kill` / `cancel` | `watch` — peek in a refresh loop |
52
- | `restart` / `status` | `monitor` — 11 detectors, alerts, auto-stop |
52
+ | `restart` / `status` | `monitor` — 12 detectors, alerts, auto-stop |
53
53
  | `round` / `serve` / `upgrade` | `events` — query / stream events.jsonl |
54
54
 
55
55
  Verb reference: [`docs/commands.md`](docs/commands.md).
@@ -69,11 +69,11 @@ guards it. Highlights:
69
69
 
70
70
  Full list and rationale: [`docs/architecture.md`](docs/architecture.md).
71
71
 
72
- ## Monitor: 11 detectors
72
+ ## Monitor: 12 detectors
73
73
 
74
74
  Notify only: `timeout_rate`, `hung`, `orphan_chain`, `disk_warning`,
75
75
  `mem_pressure`, `smoke_fail_rate`, `network_fail`, `rate_limit_active`,
76
- `anomaly_repetitive_active`.
76
+ `anomaly_repetitive_active`, `supervisor_stale`.
77
77
 
78
78
  **Auto-stop the service** (continuing is harmful):
79
79
  - `oauth_fail` — burning API quota on auth-rejected rounds
@@ -20,9 +20,9 @@ supervisor 重启 —— 这是核心模式。中间穿插 11 条防御,避开
20
20
 
21
21
  ```
22
22
  ┌──────────────────────────────────────────┐
23
- │ Layer 3:Witness(monitor) │ 9 个检测器 + 自动停服
23
+ │ Layer 3:Witness(monitor) │ 12 个检测器 + 自动停服
24
24
  ├──────────────────────────────────────────┤
25
- │ Layer 2:Loop(serve,~60 LOC 薄壳) │ 捕获信号,循环拉起 round
25
+ │ Layer 2:Loop(serve,~120 LOC 薄壳) │ 捕获信号,循环拉起 round
26
26
  ├──────────────────────────────────────────┤
27
27
  │ Layer 1:Round(round) │ 跑一次 agent,跑完即退
28
28
  └──────────────────────────────────────────┘
@@ -57,14 +57,14 @@ agent-runner monitor # 实时异常检测,OAuth/磁盘 critical
57
57
 
58
58
  完整上手流程:[`docs/quickstart.md`](docs/quickstart.md)。
59
59
 
60
- ## 13 个动词
60
+ ## 16 个动词
61
61
 
62
62
  | 生命周期 | 观察 |
63
63
  |---|---|
64
64
  | `init` / `install` / `uninstall` | `peek` —— 项目状态快照 |
65
65
  | `start` / `stop` / `kill` / `cancel` | `watch` —— peek 在刷新循环里 |
66
- | `restart` / `status` | `monitor` —— 9 个检测器 + 告警 + 自动停服 |
67
- | `round` / `serve` | |
66
+ | `restart` / `status` | `monitor` —— 12 个检测器 + 告警 + 自动停服 |
67
+ | `round` / `serve` / `upgrade` | `events` —— 查询 / 流式订阅 events.jsonl |
68
68
 
69
69
  **停服三动词**有清晰的语义分层:
70
70
  - `stop` —— 优雅,等当前 round 跑完再退(最常用)
@@ -95,11 +95,12 @@ agent-runner monitor # 实时异常检测,OAuth/磁盘 critical
95
95
 
96
96
  完整列表 + 历史出处:[`docs/architecture.md`](docs/architecture.md)。
97
97
 
98
- ## Monitor:9 个检测器
98
+ ## Monitor:12 个检测器
99
99
 
100
100
  **只告警**(warning 级,服务继续跑):
101
101
  `timeout_rate` / `hung` / `orphan_chain` / `disk_warning` /
102
- `mem_pressure` / `smoke_fail_rate` / `network_fail`
102
+ `mem_pressure` / `smoke_fail_rate` / `network_fail` / `rate_limit_active` /
103
+ `anomaly_repetitive_active` / `supervisor_stale`
103
104
 
104
105
  **自动停服**(critical 级,继续是 net negative):
105
106
 
@@ -20,11 +20,6 @@ _HOOK_GROUPS = (
20
20
  # Surfaced via peek --json `plugins.disabled` for operator visibility.
21
21
  _DISABLED_PLUGIN_NAMES: list[str] = []
22
22
 
23
- # Plugin name aliases for back-compat: old entry-point name -> canonical name.
24
- _PLUGIN_NAME_ALIASES: dict[str, str] = {
25
- "claude_rate_limit_detector": "claude_error_detector", # 0.1.20 -> 0.1.23 rename
26
- }
27
-
28
23
 
29
24
  def _load_plugins_from_group(group: str) -> None:
30
25
  """Discover and load entry_points in ``group``, isolating per-plugin failures.
@@ -98,9 +93,6 @@ def apply_plugin_disable(names: list[str]) -> None:
98
93
  if not names:
99
94
  return
100
95
 
101
- # Translate aliases so old config names keep working
102
- names = [_PLUGIN_NAME_ALIASES.get(n, n) for n in names]
103
-
104
96
  global _DISABLED_PLUGIN_NAMES
105
97
  _DISABLED_PLUGIN_NAMES = list(names)
106
98
 
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.1.34'
22
- __version_tuple__ = version_tuple = (0, 1, 34)
21
+ __version__ = version = '0.1.36'
22
+ __version_tuple__ = version_tuple = (0, 1, 36)
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -452,6 +452,7 @@ def _poll_once(project: str | Path, *, host: str | None) -> list[monitor.Alert]:
452
452
  metrics=metrics,
453
453
  log_tails=log_tails,
454
454
  round_timeout_s=cfg.runtime.round_timeout_s,
455
+ supervisor_stale_threshold_s=cfg.monitor.supervisor_stale_threshold_s,
455
456
  auth_fail_patterns=cfg.monitor.auth_fail_patterns,
456
457
  auth_fail_hint=cfg.monitor.auth_fail_hint,
457
458
  phases_overrides=cfg.phases.overrides if cfg.phases.overrides else None,
@@ -7,10 +7,9 @@ with computed reset_at_epoch. Supervisor consumes the event.
7
7
  Also emits agent_usage_recorded per-round with token/cost data from the
8
8
  claude result event (0.1.24+).
9
9
 
10
- Naming history: was `claude_rate_limit_detector` in 0.1.20 (single-purpose
11
- rate-limit detector). Renamed + generalized to multi-classification in 0.1.23.
12
- Old plugin name `claude_rate_limit_detector` retained as entry-point alias
13
- via pyproject.toml.
10
+ Module name is historical: the original 0.1.20 single-purpose
11
+ rate-limit detector was generalized to multi-classification in 0.1.23
12
+ (class + entry-point renamed to `claude_error_detector`; module path kept).
14
13
  """
15
14
 
16
15
  from __future__ import annotations
@@ -141,6 +141,12 @@ class MonitorConfig:
141
141
  anomaly_repetitive_threshold: int = 0 # 0 = disabled
142
142
  host_health: MonitorHostHealthConfig = field(default_factory=MonitorHostHealthConfig)
143
143
  round_progress_interval_s: int = 0 # 0 = disabled; >0 = emit round_progress every N seconds
144
+ supervisor_stale_threshold_s: int | None = None
145
+ """Staleness deadline for the supervisor_stale detector (seconds).
146
+
147
+ None (unset) → derived default round_timeout_s * 1.5.
148
+ Positive int → explicit threshold. 0 → disable the detector.
149
+ """
144
150
 
145
151
 
146
152
  @dataclass(frozen=True)
@@ -467,6 +473,14 @@ def load_config(toml_path: Path) -> Config:
467
473
  monitor_d.get("round_progress_interval_s", 0),
468
474
  field="monitor.round_progress_interval_s",
469
475
  ),
476
+ supervisor_stale_threshold_s=(
477
+ None
478
+ if monitor_d.get("supervisor_stale_threshold_s") is None
479
+ else _require_non_negative_int(
480
+ monitor_d["supervisor_stale_threshold_s"],
481
+ field="monitor.supervisor_stale_threshold_s",
482
+ )
483
+ ),
470
484
  )
471
485
  plugins_raw = dict(raw.get("plugins") or {}) # copy so we can pop
472
486
  disable = list(plugins_raw.pop("disable", []))
@@ -1,6 +1,6 @@
1
1
  """Monitor — anomaly detectors over events + metrics + log tails.
2
2
 
3
- 11 built-in detectors. Two trigger ``auto_action="stop_service"``:
3
+ 12 built-in detectors. Two trigger ``auto_action="stop_service"``:
4
4
  * oauth_fail — auth pattern in short-exit logs (retrying burns API quota)
5
5
  * disk_critical — disk_used_pct > 95% (writing more risks corruption)
6
6
 
@@ -54,6 +54,7 @@ KNOWN_ALERT_KINDS: frozenset[str] = frozenset(
54
54
  "network_fail",
55
55
  "rate_limit_active",
56
56
  "anomaly_repetitive_active",
57
+ "supervisor_stale",
57
58
  }
58
59
  )
59
60
 
@@ -429,6 +430,39 @@ def detect_anomaly_repetitive_active(
429
430
  )
430
431
 
431
432
 
433
+ def detect_supervisor_stale(
434
+ events: list[dict[str, Any]],
435
+ *,
436
+ now: datetime,
437
+ stale_threshold_s: int,
438
+ ) -> Alert | None:
439
+ """Alert when the most recent event is older than ``stale_threshold_s``.
440
+
441
+ Catches supervisor "silent-death": stuck between rounds (after round_end,
442
+ before the next round_start) emitting no events. The event stream cannot
443
+ distinguish that from a normal idle gap — only a deadline check can.
444
+
445
+ ``stale_threshold_s <= 0`` disables the check (caller resolves the
446
+ sentinel). Empty event list → no alert: that is "never started", not
447
+ silent-death, and there is no baseline to measure staleness against.
448
+ """
449
+ if stale_threshold_s <= 0 or not events:
450
+ return None
451
+ last_ts_str = max((e["ts"] for e in events if "ts" in e), default=None)
452
+ if last_ts_str is None:
453
+ return None
454
+ age_s = (now - parse_iso_ms(last_ts_str)).total_seconds()
455
+ if age_s <= stale_threshold_s:
456
+ return None
457
+ return _alert(
458
+ "supervisor_stale",
459
+ "warning",
460
+ f"No events for {int(age_s)}s (threshold {stale_threshold_s}s) — "
461
+ f"supervisor may be stuck or dead. Last event: {last_ts_str}.",
462
+ {"age_s": int(age_s), "threshold_s": stale_threshold_s, "last_ts": last_ts_str},
463
+ )
464
+
465
+
432
466
  # ---------------------------------------------------------------------------
433
467
  # State-tree assembly (Task 3.2)
434
468
  # ---------------------------------------------------------------------------
@@ -535,6 +569,7 @@ def run_all_detectors(
535
569
  metrics: list[dict[str, Any]],
536
570
  log_tails: dict[int, str],
537
571
  round_timeout_s: int = 1800,
572
+ supervisor_stale_threshold_s: int | None = None,
538
573
  now: datetime | None = None,
539
574
  auth_fail_patterns: list[str] | None = None,
540
575
  auth_fail_hint: str | None = None,
@@ -543,12 +578,17 @@ def run_all_detectors(
543
578
  disk_warning_pct: float = 90.0,
544
579
  disk_critical_pct: float = 95.0,
545
580
  ) -> list[Alert]:
546
- """Run all 11 detectors; returns alerts (empty = healthy)."""
581
+ """Run all 12 detectors; returns alerts (empty = healthy)."""
547
582
  if now is None:
548
583
  now = datetime.now(UTC)
549
584
  compiled_auth_pats = (
550
585
  [re.compile(p, re.IGNORECASE) for p in auth_fail_patterns] if auth_fail_patterns else None
551
586
  )
587
+ effective_stale_s = (
588
+ int(round_timeout_s * 1.5)
589
+ if supervisor_stale_threshold_s is None
590
+ else supervisor_stale_threshold_s
591
+ )
552
592
  candidates = [
553
593
  detect_timeout_rate(events),
554
594
  detect_hung(
@@ -568,6 +608,7 @@ def run_all_detectors(
568
608
  detect_network_fail(events, log_tails),
569
609
  detect_rate_limit_active(events, now=now.timestamp()),
570
610
  detect_anomaly_repetitive_active(events),
611
+ detect_supervisor_stale(events, now=now, stale_threshold_s=effective_stale_s),
571
612
  ]
572
613
  return [a for a in candidates if a is not None]
573
614
 
@@ -65,13 +65,14 @@ surfacing everywhere.
65
65
  | `event_kind_registry` | Prevent events.emit() typos / unregistered kinds slipping past CI | `tests/invariants/test_event_kind_registry.py` |
66
66
  <!-- /gen:defenses-table -->
67
67
 
68
- ## Monitor: 11 detectors
68
+ ## Monitor: 12 detectors
69
69
 
70
70
  Three categories by `auto_action`:
71
71
 
72
72
  **Notify only** (severity `warning`):
73
73
  `timeout_rate`, `hung`, `orphan_chain`, `disk_warning`, `mem_pressure`,
74
- `smoke_fail_rate`, `network_fail`.
74
+ `smoke_fail_rate`, `network_fail`, `rate_limit_active`,
75
+ `anomaly_repetitive_active`, `supervisor_stale`.
75
76
 
76
77
  **Auto-stop service** (severity `critical`, `auto_action="stop_service"`):
77
78
  `oauth_fail`, `disk_critical`. Continuing in either state is harmful (burning
@@ -88,6 +89,7 @@ API quota / writing to a near-full disk).
88
89
  - `orphan_chain`
89
90
  - `rate_limit_active`
90
91
  - `smoke_fail_rate`
92
+ - `supervisor_stale`
91
93
  - `timeout_rate`
92
94
  <!-- /gen:detector-list -->
93
95
 
@@ -117,7 +117,7 @@ agent-runner events --kind transient_error_backoff_capped --tail
117
117
 
118
118
  ### `agent-runner monitor [--host SSH-ALIAS] [--interval N] [--json]`
119
119
 
120
- Anomaly-detection daemon. Runs the 11 detectors against the live state on every
120
+ Anomaly-detection daemon. Runs the 12 detectors against the live state on every
121
121
  poll. Without `--host`, watches local logs at default 30s interval. With
122
122
  `--host`, watches a remote agent-runner over plain ssh at default 60s interval.
123
123
 
@@ -133,7 +133,7 @@ agent-runner monitor --json | jq -c # pipe alerts to a downstream consume
133
133
 
134
134
  ## 中文摘要
135
135
 
136
- 13 个动词:`init / install / uninstall / start / stop / kill / cancel / restart / status / round / serve / peek / watch / monitor`。
136
+ 16 个动词:`init / install / uninstall / start / stop / kill / cancel / restart / status / round / serve / upgrade / peek / watch / events / monitor`。
137
137
 
138
138
  观察类(peek/watch/monitor)三视角对称,全部共用 `--round / --log / --events / --select / --json` 下钻参数。
139
139
 
@@ -80,6 +80,7 @@ running with newly-set `dirty_action = "auto_commit"` is undefined).
80
80
  | `anomaly_repetitive_threshold` | `int` | 0 |
81
81
  | `host_health` | `MonitorHostHealthConfig` | MonitorHostHealthConfig(mem_avail_min_mb=200, disk_warning_pct=90.0, disk_critical_pct=95.0) |
82
82
  | `round_progress_interval_s` | `int` | 0 |
83
+ | `supervisor_stale_threshold_s` | `int | None` | None |
83
84
  <!-- /gen:config-schema -->
84
85
 
85
86
  ### `vcs.dirty_action`
@@ -203,6 +204,7 @@ Unconfigured phases (and configs without `[phases]`) keep using the global
203
204
  [monitor]
204
205
  auto_stop_on = ["oauth_fail", "disk_critical"]
205
206
  round_progress_interval_s = 0 # 0 = disabled; set >0 to emit round_progress heartbeat events
207
+ # supervisor_stale_threshold_s = 2700 # unset = round_timeout_s * 1.5; 0 = disable
206
208
 
207
209
  [monitor.host_health]
208
210
  mem_avail_min_mb = 200 # mem_pressure fires when mem_available_mb < this
@@ -0,0 +1,97 @@
1
+ # 0.1.35 — `claude_rate_limit_detector` alias removed
2
+
3
+ **Date**: 2026-05-20
4
+
5
+ ## What changed
6
+
7
+ The `claude_rate_limit_detector` alias (introduced 0.1.20, kept as
8
+ back-compat after the 0.1.23 rename to `claude_error_detector`) was
9
+ hard-removed in 0.1.35 at **both** layers:
10
+
11
+ 1. **Entry-point level** (`pyproject.toml`): the old key is no longer
12
+ declared, so `importlib.metadata.entry_points(group=...)` no longer
13
+ returns it.
14
+ 2. **Config-level alias mapping** (`agent_runner/__init__.py`): the
15
+ `_PLUGIN_NAME_ALIASES` dict that previously auto-translated the old
16
+ name in `[plugins] disable/enable` was deleted. Stale TOML entries
17
+ now trigger the existing typo-catcher UserWarning.
18
+
19
+ The underlying class (`ClaudeErrorDetector`, module
20
+ `agent_runner.builtin_plugins.claude_rate_limit`) is unchanged; only
21
+ the alias surface was removed.
22
+
23
+ ## Migration (one-line edit, only if you used the old name)
24
+
25
+ Search your `agent-runner.toml` for `claude_rate_limit_detector`:
26
+
27
+ ```bash
28
+ grep -nE 'claude_rate_limit_detector' agent-runner.toml \
29
+ && echo "switch to claude_error_detector" \
30
+ || echo "no migration needed"
31
+ ```
32
+
33
+ If found, switch:
34
+
35
+ ```diff
36
+ [plugins]
37
+ - disable = ["claude_rate_limit_detector"]
38
+ + disable = ["claude_error_detector"]
39
+ ```
40
+
41
+ Same for `enable = [...]` if you opted-in by name.
42
+
43
+ The behavior is identical — same class, same hooks, same events emitted.
44
+
45
+ ## Migration (consumers of `importlib.metadata.entry_points`)
46
+
47
+ If any of your code introspects the entry-point group:
48
+
49
+ ```python
50
+ from importlib.metadata import entry_points
51
+ hooks = entry_points(group="agent_runner.post_round_hooks")
52
+ ```
53
+
54
+ The old key `claude_rate_limit_detector` is no longer in the result.
55
+ Either filter against the canonical name (`claude_error_detector`) or
56
+ iterate by `.value` (module:class target) which has been unchanged
57
+ since 0.1.20.
58
+
59
+ ## Why hard-cut (no deprecation cycle)
60
+
61
+ The 0.1.23 rename is 12 releases / ~6 days old (as measured by the rapid
62
+ 0.1.25-0.1.34 ship cadence). The Argus Gateway team — our only known
63
+ production consumer — explicitly migrated away from the old name during
64
+ their 2026-05-19/20 monitoring overhaul. Carrying an unused back-compat
65
+ entry-point line indefinitely is debt; per project policy
66
+ (`docs/thesis.md` — zero tech debt per release), we hard-cut.
67
+
68
+ This matches the precedent set in 0.1.29 (legacy `rate_limit_*` event
69
+ aliases removed) and 0.1.34 (`peek --select events.<kind>` selector
70
+ removed): when consumers are migrated, the back-compat layer is debt
71
+ that compounds.
72
+
73
+ ## What did NOT change
74
+
75
+ - `claude_error_detector` (canonical entry-point key since 0.1.23) — unchanged
76
+ - `gemini_error_detector` — unchanged
77
+ - The plugin's class name (`ClaudeErrorDetector`) — unchanged
78
+ - All events emitted by the plugin (`transient_error_detected` /
79
+ `agent_usage_recorded` / `anomaly_repetitive_tool`) — unchanged
80
+ - Config schema — no new TOML keys
81
+ - Public Python API — unchanged
82
+
83
+ ## Verification
84
+
85
+ After upgrade:
86
+
87
+ ```bash
88
+ agent-runner peek --json | jq '.plugins.post_round_hooks'
89
+ ```
90
+
91
+ Expected output:
92
+
93
+ ```json
94
+ ["claude_error_detector", "gemini_error_detector"]
95
+ ```
96
+
97
+ (Order may vary; the key thing is `claude_rate_limit_detector` is not present.)
@@ -0,0 +1,73 @@
1
+ # Migrating to 0.1.36
2
+
3
+ ## TL;DR
4
+
5
+ ```bash
6
+ pip install --upgrade cli-agent-runner==0.1.36
7
+ ```
8
+
9
+ No action required. The new `supervisor_stale` detector is ON by default with
10
+ a derived threshold and is notify-only — it never stops your service.
11
+
12
+ ## What changed
13
+
14
+ 0.1.36 adds a 12th monitor detector, `supervisor_stale`, that closes a
15
+ liveness blind spot: a supervisor that hangs *between* rounds (after a round
16
+ ends, before the next one starts) emits no events. The event stream cannot
17
+ tell a permanent silence from a normal idle gap, and `detect_hung` only
18
+ covers a round that *started* and then hung mid-execution. `supervisor_stale`
19
+ watches the age of the most recent event and alerts when it exceeds a
20
+ staleness deadline.
21
+
22
+ ## Default behavior (no action needed)
23
+
24
+ - ON by default.
25
+ - Threshold derives from `round_timeout_s * 1.5` — comfortably above the
26
+ longest legitimate inter-event gap (a round running to full timeout, plus
27
+ restart delay), so it does not false-positive on healthy systems.
28
+ - Notify-only: it emits an alert, never an auto-stop. A stuck or dead
29
+ supervisor cannot honor an auto-stop anyway; the alert is for a human or an
30
+ external watchdog.
31
+
32
+ ## Tuning (optional)
33
+
34
+ Set `[monitor] supervisor_stale_threshold_s` when the derived default does not
35
+ fit your project's cadence:
36
+
37
+ ```toml
38
+ [monitor]
39
+ supervisor_stale_threshold_s = 3600 # explicit seconds
40
+ # supervisor_stale_threshold_s = 0 # disable the detector
41
+ # (unset) # derive round_timeout_s * 1.5
42
+ ```
43
+
44
+ - **Very short rounds with occasional long legitimate gaps** (e.g. 2-minute
45
+ rounds plus a periodic maintenance pause): set a value higher than derived.
46
+ - **Phase overrides that raise `round_timeout_s`** for some phase: the derived
47
+ threshold uses the *base* `round_timeout_s`, so a round in a longer-timeout
48
+ phase can exceed `base * 1.5`. Set
49
+ `supervisor_stale_threshold_s >= max_phase_timeout * 1.5`.
50
+
51
+ ## The liveness architecture (important)
52
+
53
+ A monitor on the *same host* as the supervisor dies when that host dies — it
54
+ cannot report its own host's death. For true liveness coverage, run the
55
+ monitor from a **separate machine**:
56
+
57
+ ```bash
58
+ # On your laptop / a second host, not on the supervised host:
59
+ agent-runner monitor --host pi
60
+ ```
61
+
62
+ That catches both failure modes: a stuck supervisor on a live host
63
+ (`supervisor_stale`, events frozen) and a dead host or severed network (SSH
64
+ poll fails → `monitor_remote_giveup`).
65
+
66
+ ## What did NOT change
67
+
68
+ - The existing 11 detectors are unchanged.
69
+ - `detect_hung` still covers the in-round hang case.
70
+ - `round_timeout_s` is unchanged; the staleness threshold derives from it but
71
+ does not modify it.
72
+ - No new core event kind: `supervisor_stale` is a monitor *alert kind*, surfaced
73
+ through the existing monitor alert path.
@@ -266,13 +266,16 @@ agent-runner ships two built-in `post_round_hooks` plugins registered
266
266
  automatically via their own entry-points: `claude_error_detector` (below)
267
267
  and `gemini_error_detector` (0.1.24+, parallel for gemini CLI).
268
268
 
269
- ### `claude_error_detector` (0.1.23+, formerly `claude_rate_limit_detector`)
269
+ ### `claude_error_detector` (0.1.23+)
270
270
 
271
271
  **Entry-point group:** `agent_runner.post_round_hooks`
272
272
  **Module:** `agent_runner.builtin_plugins.claude_rate_limit`
273
- **Old name:** `claude_rate_limit_detector` retained as an alias in
274
- `pyproject.toml` so `[plugins] disable = ["claude_rate_limit_detector"]`
275
- still works for back-compat.
273
+
274
+ Renamed from `claude_rate_limit_detector` in 0.1.23 when the detector
275
+ was generalized from single-rate-limit to multi-classification. The
276
+ old-name alias was kept as a `pyproject.toml` entry-point through 0.1.34
277
+ and removed in 0.1.35. Operators still using `[plugins] disable =
278
+ ["claude_rate_limit_detector"]` must switch to `claude_error_detector`.
276
279
 
277
280
  After each round, scans the last 50 lines of the round's JSONL log for
278
281
  transient errors and usage data:
@@ -45,9 +45,10 @@ correctly (process still runs as your user, not root).
45
45
  ### Health check
46
46
 
47
47
  ```bash
48
- agent-runner status # service running?
49
- agent-runner peek # full state snapshot
50
- agent-runner peek --json | jq .defenses # what's defended
48
+ agent-runner status # service running?
49
+ agent-runner peek # full state snapshot
50
+ agent-runner peek --json | jq .defenses # what's defended
51
+ agent-runner peek --json | jq .system.agent_process_count # orphan agent count (0.1.34+)
51
52
  journalctl --user -u agent-runner@<project> --since "1 hour ago"
52
53
  ```
53
54
 
@@ -252,6 +253,29 @@ API. Power profile:
252
253
  real state change. Verify the detector logic and thresholds before enabling
253
254
  `auto_stop` on a production remote.
254
255
 
256
+ ### Liveness monitoring: run monitor from a separate machine
257
+
258
+ `agent-runner monitor` detects anomalies including `supervisor_stale` — the
259
+ supervisor stopped emitting events because it is stuck between rounds or dead.
260
+ But a monitor running on the *same host* as the supervisor dies when that host
261
+ dies, so it cannot report its own host's death.
262
+
263
+ For true liveness coverage, run the monitor from a **separate machine**:
264
+
265
+ # On your laptop / a second host, NOT on the supervised host:
266
+ agent-runner monitor --host pi
267
+
268
+ This catches both failure modes:
269
+
270
+ - Supervisor stuck on a live host → `supervisor_stale` alert (events frozen).
271
+ - Host itself dead / network gone → SSH poll fails → `monitor_remote_giveup`.
272
+
273
+ The `supervisor_stale` threshold defaults to `round_timeout_s * 1.5`. Override
274
+ with `[monitor] supervisor_stale_threshold_s = N` for projects whose legitimate
275
+ cadence — very short rounds with occasional long legitimate gaps, or phase
276
+ overrides that raise `round_timeout_s` — does not fit the derived default. Set
277
+ to `0` to disable the detector entirely.
278
+
255
279
  ## Live event stream (machine-readable)
256
280
 
257
281
  For machine consumption (parity comparisons, custom dashboards, automation
@@ -45,9 +45,8 @@ Changelog = "https://github.com/wan9yu/cli-agent-runner/blob/main/CHANGELOG.md"
45
45
  agent-runner = "agent_runner.cli:main"
46
46
 
47
47
  [project.entry-points."agent_runner.post_round_hooks"]
48
- claude_rate_limit_detector = "agent_runner.builtin_plugins.claude_rate_limit:ClaudeErrorDetector" # 0.1.20 alias
49
- claude_error_detector = "agent_runner.builtin_plugins.claude_rate_limit:ClaudeErrorDetector" # 0.1.23 canonical
50
- gemini_error_detector = "agent_runner.builtin_plugins.gemini:GeminiErrorDetector" # 0.1.24
48
+ claude_error_detector = "agent_runner.builtin_plugins.claude_rate_limit:ClaudeErrorDetector"
49
+ gemini_error_detector = "agent_runner.builtin_plugins.gemini:GeminiErrorDetector"
51
50
 
52
51
  [project.optional-dependencies]
53
52
  dev = [