cli-agent-runner 0.1.35__tar.gz → 0.1.37__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/CHANGELOG.md +22 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/PKG-INFO +5 -5
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/README.md +4 -4
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/README.zh.md +5 -4
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/_version.py +2 -2
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/api.py +1 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/cli/upgrade_cmd.py +147 -20
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/config.py +14 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/events.py +1 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/monitor.py +43 -2
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/architecture.md +5 -2
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/commands.md +20 -2
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/configuration.md +2 -0
- cli_agent_runner-0.1.37/docs/migrations/0.1.36.md +73 -0
- cli_agent_runner-0.1.37/docs/migrations/0.1.37.md +65 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/runbook.md +54 -23
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_architecture.py +2 -1
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_catalogs.py +7 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_api_observation.py +15 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_api_service.py +22 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_cli_upgrade.py +235 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_config.py +23 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_docgen.py +2 -2
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_monitor_assembly.py +1 -0
- cli_agent_runner-0.1.37/tests/unit/test_monitor_detect_supervisor_stale.py +38 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_monitor_detectors.py +36 -1
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/.codecov.yml +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/.github/workflows/ci.yml +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/.github/workflows/release.yml +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/.gitignore +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/.vulture-whitelist.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/CODE_OF_CONDUCT.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/CONTRIBUTING.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/LICENSE +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/SECURITY.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/__init__.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/_docgen.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/_emit.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/_registry.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/_substrate.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/_throttle.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/agent_runtime.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/api_types.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/builtin_plugins/__init__.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/builtin_plugins/_constants.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/builtin_plugins/claude_rate_limit.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/builtin_plugins/gemini.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/cli/__init__.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/cli/__main__.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/cli/common.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/cli/events_cmd.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/cli/init_cmd.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/cli/install_cmd.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/cli/monitor_cmd.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/cli/peek_cmd.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/cli/round_cmd.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/cli/serve_cmd.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/cli/service_cmd.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/context_store.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/defenses.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/detector_helpers.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/hooks.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/http_progress.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/lifecycle.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/metrics.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/presets/__init__.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/presets/aider.toml +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/presets/claude.toml +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/presets/gemini.toml +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/prompt_loader.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/round_log.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/round_view.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/runner.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/scaffold.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/service_unit.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/startup_check.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/agent_runner/vcs_state.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/build.sh +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/deploy/example-agent-runner.toml +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/deploy/launchd.plist.tmpl +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/deploy/run-loop.sh +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/deploy/systemd.service.tmpl +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/README.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/events.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/long-running-agents.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/marketing/README.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/marketing/promo-cn.html +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.16.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.17.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.19.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.20.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.21.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.22.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.23.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.24.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.25.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.26.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.27.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.28.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.29.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.30.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.31.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.32.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.33.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.34.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/migrations/0.1.35.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/plugins.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/quickstart.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/recipes/aider.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/docs/thesis.md +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/pyproject.toml +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/__init__.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/_test_helpers.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/conftest.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/contract/__init__.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/contract/test_public_api_surface.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/e2e/__init__.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/e2e/conftest.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/e2e/test_e2e_graceful_stop.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/e2e/test_e2e_install_systemd.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/e2e/test_e2e_monitor_remote.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/e2e/test_e2e_round_lifecycle.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/fixtures/cli-real-output/claude-2.1.143-assistant-tool-use.jsonl +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/fixtures/cli-real-output/claude-2.1.143-result-event.jsonl +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/fixtures/cli-real-output/gemini-0.42.0-result-event.jsonl +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/__init__.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/test_bounded_run.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/test_context_enricher_namespacing.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/test_fresh_eyes_signal.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/test_grace_kill_emission.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/test_install_dry_run.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/test_monitor_seeded.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/test_plugin_detector_loaded.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/test_plugin_owned_paths.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/test_plugin_real_flow.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/test_run_one_round_with_fake_agent.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/test_scaffold_presets.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/test_serve_loop.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/test_substrate_fingerprint.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/integration/test_transient_error_backoff.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/__init__.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_atomic_write_enforced.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_classification_ssot.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_docs_generated.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_entry_points_resolve.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_event_kind_registry.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_event_kinds_ssot.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_events_doc_contract.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_layer_2_loop_size.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_module_boundaries.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_module_sizes.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_no_ai_signatures.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_no_pytest_skip_on_parse_fail.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_peek_schema_version.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_repo_constants_patched_in_tests.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_round_result_stable.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_stash_uses_sha_not_index.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/invariants/test_upstream_schema_canary.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/literate/__init__.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/literate/parser.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/literate/test_parser.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/literate/test_quickstart.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/__init__.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_agent_runtime.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_agent_runtime_grace.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_agent_runtime_progress.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_api_assemble_prompt.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_api_events_stream.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_api_install.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_api_read_round_num.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_api_resolve_phase.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_api_types.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_auto_stop_gating.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_claude_error_detector.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_cli.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_cli_common.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_cli_init_install.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_cli_monitor_http.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_cli_service_peek_monitor.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_config_fresh_eyes.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_config_max_rounds.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_config_stop_file.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_config_substrate_fingerprint_paths.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_config_transient_error_action.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_context_store.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_defenses.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_detector_helpers.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_detector_protocol.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_events.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_events_cmd.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_fresh_eyes_trigger.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_gemini_plugin.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_hook_failure_isolation.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_hooks.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_http_progress.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_init_entry_points.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_lifecycle.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_metrics.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_monitor_detect_anomaly_repetitive.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_monitor_detect_rate_limit.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_monitor_remote.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_peek_argparse.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_peek_select.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_presets.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_prompt_loader.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_round_log_helpers.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_round_view.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_runner.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_runner_throttle.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_scaffold.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_serve_cmd_bounded.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_serve_round_log.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_serve_sentinel.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_serve_startup_hooks.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_service_unit.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_startup_check.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_substrate.py +0 -0
- {cli_agent_runner-0.1.35 → cli_agent_runner-0.1.37}/tests/unit/test_vcs_state.py +0 -0
|
@@ -7,6 +7,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.1.37] - 2026-05-22
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- `upgrade` no longer crashes when run from a directory without `agent-runner.toml` — it upgrades the package and falls back to package-only mode.
|
|
14
|
+
- `upgrade` handles PEP 668 externally-managed environments (Debian 12 etc.): retries pip with `--break-system-packages` (and `--user` for user-site installs) when not in a venv.
|
|
15
|
+
|
|
16
|
+
### Changed
|
|
17
|
+
- `upgrade` only stop/start-orchestrates the `systemd --user` service it installed. For a self-managed service (e.g. a systemd system unit) it does package-only upgrade + smoke and prints the restart command to run yourself — no more silent no-op, and no more `agent-runner start` suggestion (which could spawn a conflicting second supervisor).
|
|
18
|
+
- New `--no-restart` flag forces package-only upgrade.
|
|
19
|
+
|
|
20
|
+
### Added
|
|
21
|
+
- New event `package_upgraded` (on-disk package changed; restart deferred to the operator), distinct from `service_upgraded` (the live service is now on the new version).
|
|
22
|
+
|
|
23
|
+
## [0.1.36] - 2026-05-21
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
- New monitor detector `supervisor_stale` (notify) — alerts when the supervisor stops emitting events (stuck between rounds or dead), a blind spot the event stream and `detect_hung` cannot catch. Default ON; threshold derives from `round_timeout_s * 1.5`. Detector count 11 → 12.
|
|
27
|
+
- `[monitor] supervisor_stale_threshold_s` config — override the derived staleness threshold (positive = seconds; 0 = disable; unset = derived).
|
|
28
|
+
|
|
29
|
+
### Changed
|
|
30
|
+
- `docs/runbook.md` documents the liveness-monitoring architecture: run `monitor --host` from a separate machine to detect supervisor silent-death AND host death (a same-host monitor dies with its host).
|
|
31
|
+
|
|
10
32
|
## [0.1.35] - 2026-05-20
|
|
11
33
|
|
|
12
34
|
### Removed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cli-agent-runner
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.37
|
|
4
4
|
Summary: Restart-on-exit supervisor for autonomous CLI agents
|
|
5
5
|
Project-URL: Homepage, https://github.com/wan9yu/cli-agent-runner
|
|
6
6
|
Project-URL: Documentation, https://github.com/wan9yu/cli-agent-runner#readme
|
|
@@ -49,7 +49,7 @@ full disks, runaway memory.
|
|
|
49
49
|
|
|
50
50
|
```
|
|
51
51
|
┌──────────────────────────────────────────┐
|
|
52
|
-
│ Layer 3: The Witness (monitor) │
|
|
52
|
+
│ Layer 3: The Witness (monitor) │ 12 detectors + auto-stop
|
|
53
53
|
├──────────────────────────────────────────┤
|
|
54
54
|
│ Layer 2: The Loop (serve, ~120 LOC) │ signal-trapping restart loop
|
|
55
55
|
├──────────────────────────────────────────┤
|
|
@@ -86,7 +86,7 @@ Full walkthrough: [`docs/quickstart.md`](docs/quickstart.md).
|
|
|
86
86
|
|---|---|
|
|
87
87
|
| `init` / `install` / `uninstall` | `peek` — state snapshot |
|
|
88
88
|
| `start` / `stop` / `kill` / `cancel` | `watch` — peek in a refresh loop |
|
|
89
|
-
| `restart` / `status` | `monitor` —
|
|
89
|
+
| `restart` / `status` | `monitor` — 12 detectors, alerts, auto-stop |
|
|
90
90
|
| `round` / `serve` / `upgrade` | `events` — query / stream events.jsonl |
|
|
91
91
|
|
|
92
92
|
Verb reference: [`docs/commands.md`](docs/commands.md).
|
|
@@ -106,11 +106,11 @@ guards it. Highlights:
|
|
|
106
106
|
|
|
107
107
|
Full list and rationale: [`docs/architecture.md`](docs/architecture.md).
|
|
108
108
|
|
|
109
|
-
## Monitor:
|
|
109
|
+
## Monitor: 12 detectors
|
|
110
110
|
|
|
111
111
|
Notify only: `timeout_rate`, `hung`, `orphan_chain`, `disk_warning`,
|
|
112
112
|
`mem_pressure`, `smoke_fail_rate`, `network_fail`, `rate_limit_active`,
|
|
113
|
-
`anomaly_repetitive_active`.
|
|
113
|
+
`anomaly_repetitive_active`, `supervisor_stale`.
|
|
114
114
|
|
|
115
115
|
**Auto-stop the service** (continuing is harmful):
|
|
116
116
|
- `oauth_fail` — burning API quota on auth-rejected rounds
|
|
@@ -12,7 +12,7 @@ full disks, runaway memory.
|
|
|
12
12
|
|
|
13
13
|
```
|
|
14
14
|
┌──────────────────────────────────────────┐
|
|
15
|
-
│ Layer 3: The Witness (monitor) │
|
|
15
|
+
│ Layer 3: The Witness (monitor) │ 12 detectors + auto-stop
|
|
16
16
|
├──────────────────────────────────────────┤
|
|
17
17
|
│ Layer 2: The Loop (serve, ~120 LOC) │ signal-trapping restart loop
|
|
18
18
|
├──────────────────────────────────────────┤
|
|
@@ -49,7 +49,7 @@ Full walkthrough: [`docs/quickstart.md`](docs/quickstart.md).
|
|
|
49
49
|
|---|---|
|
|
50
50
|
| `init` / `install` / `uninstall` | `peek` — state snapshot |
|
|
51
51
|
| `start` / `stop` / `kill` / `cancel` | `watch` — peek in a refresh loop |
|
|
52
|
-
| `restart` / `status` | `monitor` —
|
|
52
|
+
| `restart` / `status` | `monitor` — 12 detectors, alerts, auto-stop |
|
|
53
53
|
| `round` / `serve` / `upgrade` | `events` — query / stream events.jsonl |
|
|
54
54
|
|
|
55
55
|
Verb reference: [`docs/commands.md`](docs/commands.md).
|
|
@@ -69,11 +69,11 @@ guards it. Highlights:
|
|
|
69
69
|
|
|
70
70
|
Full list and rationale: [`docs/architecture.md`](docs/architecture.md).
|
|
71
71
|
|
|
72
|
-
## Monitor:
|
|
72
|
+
## Monitor: 12 detectors
|
|
73
73
|
|
|
74
74
|
Notify only: `timeout_rate`, `hung`, `orphan_chain`, `disk_warning`,
|
|
75
75
|
`mem_pressure`, `smoke_fail_rate`, `network_fail`, `rate_limit_active`,
|
|
76
|
-
`anomaly_repetitive_active`.
|
|
76
|
+
`anomaly_repetitive_active`, `supervisor_stale`.
|
|
77
77
|
|
|
78
78
|
**Auto-stop the service** (continuing is harmful):
|
|
79
79
|
- `oauth_fail` — burning API quota on auth-rejected rounds
|
|
@@ -20,7 +20,7 @@ supervisor 重启 —— 这是核心模式。中间穿插 11 条防御,避开
|
|
|
20
20
|
|
|
21
21
|
```
|
|
22
22
|
┌──────────────────────────────────────────┐
|
|
23
|
-
│ Layer 3:Witness(monitor) │
|
|
23
|
+
│ Layer 3:Witness(monitor) │ 12 个检测器 + 自动停服
|
|
24
24
|
├──────────────────────────────────────────┤
|
|
25
25
|
│ Layer 2:Loop(serve,~120 LOC 薄壳) │ 捕获信号,循环拉起 round
|
|
26
26
|
├──────────────────────────────────────────┤
|
|
@@ -63,7 +63,7 @@ agent-runner monitor # 实时异常检测,OAuth/磁盘 critical
|
|
|
63
63
|
|---|---|
|
|
64
64
|
| `init` / `install` / `uninstall` | `peek` —— 项目状态快照 |
|
|
65
65
|
| `start` / `stop` / `kill` / `cancel` | `watch` —— peek 在刷新循环里 |
|
|
66
|
-
| `restart` / `status` | `monitor` ——
|
|
66
|
+
| `restart` / `status` | `monitor` —— 12 个检测器 + 告警 + 自动停服 |
|
|
67
67
|
| `round` / `serve` / `upgrade` | `events` —— 查询 / 流式订阅 events.jsonl |
|
|
68
68
|
|
|
69
69
|
**停服三动词**有清晰的语义分层:
|
|
@@ -95,11 +95,12 @@ agent-runner monitor # 实时异常检测,OAuth/磁盘 critical
|
|
|
95
95
|
|
|
96
96
|
完整列表 + 历史出处:[`docs/architecture.md`](docs/architecture.md)。
|
|
97
97
|
|
|
98
|
-
## Monitor:
|
|
98
|
+
## Monitor:12 个检测器
|
|
99
99
|
|
|
100
100
|
**只告警**(warning 级,服务继续跑):
|
|
101
101
|
`timeout_rate` / `hung` / `orphan_chain` / `disk_warning` /
|
|
102
|
-
`mem_pressure` / `smoke_fail_rate` / `network_fail`
|
|
102
|
+
`mem_pressure` / `smoke_fail_rate` / `network_fail` / `rate_limit_active` /
|
|
103
|
+
`anomaly_repetitive_active` / `supervisor_stale`
|
|
103
104
|
|
|
104
105
|
**自动停服**(critical 级,继续是 net negative):
|
|
105
106
|
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.1.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
21
|
+
__version__ = version = '0.1.37'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 1, 37)
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -452,6 +452,7 @@ def _poll_once(project: str | Path, *, host: str | None) -> list[monitor.Alert]:
|
|
|
452
452
|
metrics=metrics,
|
|
453
453
|
log_tails=log_tails,
|
|
454
454
|
round_timeout_s=cfg.runtime.round_timeout_s,
|
|
455
|
+
supervisor_stale_threshold_s=cfg.monitor.supervisor_stale_threshold_s,
|
|
455
456
|
auth_fail_patterns=cfg.monitor.auth_fail_patterns,
|
|
456
457
|
auth_fail_hint=cfg.monitor.auth_fail_hint,
|
|
457
458
|
phases_overrides=cfg.phases.overrides if cfg.phases.overrides else None,
|
|
@@ -18,7 +18,9 @@ import sys
|
|
|
18
18
|
import time
|
|
19
19
|
from pathlib import Path
|
|
20
20
|
|
|
21
|
+
import agent_runner
|
|
21
22
|
from agent_runner import __version__, api, events
|
|
23
|
+
from agent_runner.api_types import ServiceMode
|
|
22
24
|
from agent_runner.cli.common import cfg_from_args, fail, info
|
|
23
25
|
from agent_runner.config import Config
|
|
24
26
|
|
|
@@ -28,8 +30,8 @@ def add_parser(sub, parent) -> None:
|
|
|
28
30
|
"upgrade",
|
|
29
31
|
parents=[parent],
|
|
30
32
|
help=(
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
+
"Package upgrade with service-mode gate: orchestrated stop/start"
|
|
34
|
+
" for systemd --user; package-only otherwise"
|
|
33
35
|
),
|
|
34
36
|
)
|
|
35
37
|
p.add_argument(
|
|
@@ -40,24 +42,69 @@ def add_parser(sub, parent) -> None:
|
|
|
40
42
|
help="Pin a specific version (e.g. 0.1.13). Default: latest from PyPI. "
|
|
41
43
|
"Use to roll back: `--target <previous-version>`.",
|
|
42
44
|
)
|
|
45
|
+
p.add_argument(
|
|
46
|
+
"--no-restart",
|
|
47
|
+
action="store_true",
|
|
48
|
+
help="Upgrade the package + smoke only; do not stop/start the service "
|
|
49
|
+
"(you restart it yourself).",
|
|
50
|
+
)
|
|
43
51
|
p.set_defaults(func=cmd)
|
|
44
52
|
|
|
45
53
|
|
|
46
54
|
def cmd(args) -> int:
|
|
47
|
-
cfg =
|
|
48
|
-
return _run_upgrade(
|
|
55
|
+
cfg = _try_load_cfg(args)
|
|
56
|
+
return _run_upgrade(
|
|
57
|
+
cfg,
|
|
58
|
+
target=args.target,
|
|
59
|
+
cfg_path=args.config,
|
|
60
|
+
no_restart=getattr(args, "no_restart", False),
|
|
61
|
+
)
|
|
49
62
|
|
|
50
63
|
|
|
51
|
-
def
|
|
52
|
-
"""
|
|
64
|
+
def _try_load_cfg(args) -> Config | None:
|
|
65
|
+
"""Load the project config if present; None when absent (package-only)."""
|
|
66
|
+
try:
|
|
67
|
+
return cfg_from_args(args)
|
|
68
|
+
except FileNotFoundError:
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _pip_env_flags() -> list[str]:
|
|
73
|
+
"""Extra pip flags for the current install under PEP 668.
|
|
53
74
|
|
|
54
|
-
|
|
55
|
-
|
|
75
|
+
Inside a venv: none (pip is unrestricted). Otherwise (system/user
|
|
76
|
+
interpreter on an externally-managed distro) the caller retries with these.
|
|
77
|
+
``--user`` is added only when agent_runner lives in user-site, matching
|
|
78
|
+
where the existing install actually is.
|
|
79
|
+
"""
|
|
80
|
+
import sys
|
|
81
|
+
|
|
82
|
+
if sys.prefix != sys.base_prefix: # inside a venv → no PEP 668
|
|
83
|
+
return []
|
|
84
|
+
import site
|
|
85
|
+
|
|
86
|
+
flags = ["--break-system-packages"]
|
|
87
|
+
user_site = site.getusersitepackages()
|
|
88
|
+
if str(Path(agent_runner.__file__)).startswith(str(Path(user_site))):
|
|
89
|
+
flags.insert(0, "--user")
|
|
90
|
+
return flags
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _pip_install(spec: str, *, force_reinstall: bool = False) -> subprocess.CompletedProcess:
|
|
94
|
+
"""pip install --upgrade <spec>, retrying once with PEP668 flags on an
|
|
95
|
+
externally-managed environment. Returns CompletedProcess (rc check by caller).
|
|
56
96
|
"""
|
|
57
|
-
|
|
97
|
+
base = [sys.executable, "-m", "pip", "install", "--upgrade", spec]
|
|
58
98
|
if force_reinstall:
|
|
59
|
-
|
|
60
|
-
|
|
99
|
+
base.insert(4, "--force-reinstall")
|
|
100
|
+
r = subprocess.run(base, capture_output=True, text=True, check=False)
|
|
101
|
+
if r.returncode == 0 or "externally-managed-environment" not in (r.stderr or ""):
|
|
102
|
+
return r
|
|
103
|
+
extra = _pip_env_flags()
|
|
104
|
+
if not extra:
|
|
105
|
+
return r
|
|
106
|
+
info(f"externally-managed env detected; retrying pip with {' '.join(extra)}")
|
|
107
|
+
return subprocess.run(base + extra, capture_output=True, text=True, check=False)
|
|
61
108
|
|
|
62
109
|
|
|
63
110
|
def _smoke_version() -> tuple[int, str]:
|
|
@@ -93,18 +140,40 @@ def _smoke_peek(cfg_path: Path) -> tuple[int, str]:
|
|
|
93
140
|
return 0, ""
|
|
94
141
|
|
|
95
142
|
|
|
96
|
-
def _run_upgrade(
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
143
|
+
def _run_upgrade(
|
|
144
|
+
cfg: Config | None,
|
|
145
|
+
*,
|
|
146
|
+
target: str | None,
|
|
147
|
+
cfg_path: Path,
|
|
148
|
+
no_restart: bool = False,
|
|
149
|
+
) -> int:
|
|
150
|
+
"""Dispatch: full orchestration for the systemd --user service we installed;
|
|
151
|
+
package-only everywhere else."""
|
|
101
152
|
if target is not None and not target.strip():
|
|
102
153
|
return fail("--target must be a non-empty version string (e.g. 0.1.13)")
|
|
154
|
+
from_version = __version__
|
|
155
|
+
if _orchestrate_capable(cfg, no_restart):
|
|
156
|
+
return _orchestrated_upgrade(
|
|
157
|
+
cfg, target=target, cfg_path=cfg_path, from_version=from_version
|
|
158
|
+
)
|
|
159
|
+
return _package_only_upgrade(cfg, target=target, from_version=from_version)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _orchestrate_capable(cfg: Config | None, no_restart: bool) -> bool:
|
|
163
|
+
if cfg is None or no_restart:
|
|
164
|
+
return False
|
|
165
|
+
pname = api._resolve_project(cfg.runtime.work_dir)
|
|
166
|
+
return api.detect_service_mode(pname, log_dir=cfg.runtime.log_dir) == ServiceMode.SYSTEMD_USER
|
|
103
167
|
|
|
168
|
+
|
|
169
|
+
def _orchestrated_upgrade(
|
|
170
|
+
cfg: Config, *, target: str | None, cfg_path: Path, from_version: str
|
|
171
|
+
) -> int:
|
|
172
|
+
"""Full stop → pip → smoke(--version + peek) → start → emit service_upgraded,
|
|
173
|
+
with auto-rollback on smoke failure. Only reached for the systemd --user
|
|
174
|
+
service agent-runner installed (api.start works there)."""
|
|
104
175
|
log_dir = cfg.runtime.log_dir
|
|
105
176
|
log_dir.mkdir(parents=True, exist_ok=True)
|
|
106
|
-
|
|
107
|
-
from_version = __version__
|
|
108
177
|
t0 = time.monotonic()
|
|
109
178
|
|
|
110
179
|
info("stopping service...")
|
|
@@ -155,14 +224,13 @@ def _run_upgrade(cfg: Config, *, target: str | None, cfg_path: Path) -> int:
|
|
|
155
224
|
started_at=t0,
|
|
156
225
|
cfg_path=cfg_path,
|
|
157
226
|
)
|
|
158
|
-
|
|
159
227
|
info(f"smoke OK (now at {to_version})")
|
|
160
228
|
|
|
161
229
|
info("starting service...")
|
|
162
230
|
t_start = time.monotonic()
|
|
163
231
|
try:
|
|
164
232
|
api.start(cfg.runtime.work_dir)
|
|
165
|
-
except Exception as e: # noqa: BLE001 — new version installed but service stopped
|
|
233
|
+
except Exception as e: # noqa: BLE001 — new version installed but service stopped
|
|
166
234
|
return _rollback_failed(
|
|
167
235
|
log_dir,
|
|
168
236
|
to_version,
|
|
@@ -183,6 +251,65 @@ def _run_upgrade(cfg: Config, *, target: str | None, cfg_path: Path) -> int:
|
|
|
183
251
|
return 0
|
|
184
252
|
|
|
185
253
|
|
|
254
|
+
def _package_only_upgrade(cfg: Config | None, *, target: str | None, from_version: str) -> int:
|
|
255
|
+
"""Upgrade the on-disk package + smoke (--version), with pip-level rollback.
|
|
256
|
+
Never touches the service — the operator restarts it. Used for any deployment
|
|
257
|
+
not managed as a systemd --user service (system unit, foreground, none, no
|
|
258
|
+
config, or --no-restart)."""
|
|
259
|
+
spec = "cli-agent-runner" if target is None else f"cli-agent-runner=={target}"
|
|
260
|
+
info(f"package-only upgrade (service not managed by agent-runner); installing {spec}...")
|
|
261
|
+
pip_result = _pip_install(spec)
|
|
262
|
+
if pip_result.returncode != 0:
|
|
263
|
+
return fail(
|
|
264
|
+
f"pip install failed (rc={pip_result.returncode}): "
|
|
265
|
+
f"{pip_result.stderr.strip()[:200]}; "
|
|
266
|
+
f"package unchanged, your service keeps running the current version"
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
rc_v, version_or_err = _smoke_version()
|
|
270
|
+
if rc_v != 0:
|
|
271
|
+
attempted = target or "latest"
|
|
272
|
+
info(f"smoke failed at {attempted} ({version_or_err}); reinstalling {from_version}...")
|
|
273
|
+
rb = _pip_install(f"cli-agent-runner=={from_version}", force_reinstall=True)
|
|
274
|
+
if rb.returncode != 0:
|
|
275
|
+
return fail(
|
|
276
|
+
f"package smoke failed AND rollback reinstall failed (rc={rb.returncode}): "
|
|
277
|
+
f"{rb.stderr.strip()[:200]}; run: "
|
|
278
|
+
f"pip install --force-reinstall cli-agent-runner=={from_version}"
|
|
279
|
+
)
|
|
280
|
+
return fail(
|
|
281
|
+
f"package smoke failed at {attempted}; reinstalled {from_version}; service untouched"
|
|
282
|
+
)
|
|
283
|
+
to_version = version_or_err
|
|
284
|
+
|
|
285
|
+
if cfg is not None:
|
|
286
|
+
log_dir = cfg.runtime.log_dir
|
|
287
|
+
log_dir.mkdir(parents=True, exist_ok=True)
|
|
288
|
+
events.emit(
|
|
289
|
+
log_dir,
|
|
290
|
+
events.PACKAGE_UPGRADED,
|
|
291
|
+
from_version=from_version,
|
|
292
|
+
to_version=to_version,
|
|
293
|
+
restart_deferred=True,
|
|
294
|
+
)
|
|
295
|
+
info(f"package upgraded {from_version} → {to_version}. Restart your supervisor to load it:")
|
|
296
|
+
info(_restart_hint(cfg))
|
|
297
|
+
return 0
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def _restart_hint(cfg: Config | None) -> str:
|
|
301
|
+
"""Mode-correct restart command. Never suggests `agent-runner start`
|
|
302
|
+
(which would spawn a conflicting supervisor on a system-unit host)."""
|
|
303
|
+
if cfg is not None:
|
|
304
|
+
pname = api._resolve_project(cfg.runtime.work_dir)
|
|
305
|
+
if api.detect_service_mode(pname, log_dir=cfg.runtime.log_dir) == ServiceMode.SYSTEMD_USER:
|
|
306
|
+
return f" systemctl --user restart {api.serve_unit_filename(pname)}"
|
|
307
|
+
return (
|
|
308
|
+
" sudo systemctl restart <your-unit> # if run by a systemd system unit\n"
|
|
309
|
+
" (agent-runner can't know a service it didn't install; substitute your unit name)"
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
|
|
186
313
|
def _rollback(
|
|
187
314
|
cfg: Config,
|
|
188
315
|
log_dir: Path,
|
|
@@ -141,6 +141,12 @@ class MonitorConfig:
|
|
|
141
141
|
anomaly_repetitive_threshold: int = 0 # 0 = disabled
|
|
142
142
|
host_health: MonitorHostHealthConfig = field(default_factory=MonitorHostHealthConfig)
|
|
143
143
|
round_progress_interval_s: int = 0 # 0 = disabled; >0 = emit round_progress every N seconds
|
|
144
|
+
supervisor_stale_threshold_s: int | None = None
|
|
145
|
+
"""Staleness deadline for the supervisor_stale detector (seconds).
|
|
146
|
+
|
|
147
|
+
None (unset) → derived default round_timeout_s * 1.5.
|
|
148
|
+
Positive int → explicit threshold. 0 → disable the detector.
|
|
149
|
+
"""
|
|
144
150
|
|
|
145
151
|
|
|
146
152
|
@dataclass(frozen=True)
|
|
@@ -467,6 +473,14 @@ def load_config(toml_path: Path) -> Config:
|
|
|
467
473
|
monitor_d.get("round_progress_interval_s", 0),
|
|
468
474
|
field="monitor.round_progress_interval_s",
|
|
469
475
|
),
|
|
476
|
+
supervisor_stale_threshold_s=(
|
|
477
|
+
None
|
|
478
|
+
if monitor_d.get("supervisor_stale_threshold_s") is None
|
|
479
|
+
else _require_non_negative_int(
|
|
480
|
+
monitor_d["supervisor_stale_threshold_s"],
|
|
481
|
+
field="monitor.supervisor_stale_threshold_s",
|
|
482
|
+
)
|
|
483
|
+
),
|
|
470
484
|
)
|
|
471
485
|
plugins_raw = dict(raw.get("plugins") or {}) # copy so we can pop
|
|
472
486
|
disable = list(plugins_raw.pop("disable", []))
|
|
@@ -46,6 +46,7 @@ MONITOR_STARTED = "monitor_started"
|
|
|
46
46
|
ORPHAN_IDEMPOTENT_SKIP = "orphan_idempotent_skip"
|
|
47
47
|
ORPHAN_STASH_FAILED = "orphan_stash_failed"
|
|
48
48
|
ORPHAN_STASHED = "orphan_stashed"
|
|
49
|
+
PACKAGE_UPGRADED = "package_upgraded"
|
|
49
50
|
PROMPT_OVERWRITTEN = "prompt_overwritten"
|
|
50
51
|
ROUND_END = "round_end"
|
|
51
52
|
ROUND_GRACE_KILL = "round_grace_kill"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Monitor — anomaly detectors over events + metrics + log tails.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
12 built-in detectors. Two trigger ``auto_action="stop_service"``:
|
|
4
4
|
* oauth_fail — auth pattern in short-exit logs (retrying burns API quota)
|
|
5
5
|
* disk_critical — disk_used_pct > 95% (writing more risks corruption)
|
|
6
6
|
|
|
@@ -54,6 +54,7 @@ KNOWN_ALERT_KINDS: frozenset[str] = frozenset(
|
|
|
54
54
|
"network_fail",
|
|
55
55
|
"rate_limit_active",
|
|
56
56
|
"anomaly_repetitive_active",
|
|
57
|
+
"supervisor_stale",
|
|
57
58
|
}
|
|
58
59
|
)
|
|
59
60
|
|
|
@@ -429,6 +430,39 @@ def detect_anomaly_repetitive_active(
|
|
|
429
430
|
)
|
|
430
431
|
|
|
431
432
|
|
|
433
|
+
def detect_supervisor_stale(
|
|
434
|
+
events: list[dict[str, Any]],
|
|
435
|
+
*,
|
|
436
|
+
now: datetime,
|
|
437
|
+
stale_threshold_s: int,
|
|
438
|
+
) -> Alert | None:
|
|
439
|
+
"""Alert when the most recent event is older than ``stale_threshold_s``.
|
|
440
|
+
|
|
441
|
+
Catches supervisor "silent-death": stuck between rounds (after round_end,
|
|
442
|
+
before the next round_start) emitting no events. The event stream cannot
|
|
443
|
+
distinguish that from a normal idle gap — only a deadline check can.
|
|
444
|
+
|
|
445
|
+
``stale_threshold_s <= 0`` disables the check (caller resolves the
|
|
446
|
+
sentinel). Empty event list → no alert: that is "never started", not
|
|
447
|
+
silent-death, and there is no baseline to measure staleness against.
|
|
448
|
+
"""
|
|
449
|
+
if stale_threshold_s <= 0 or not events:
|
|
450
|
+
return None
|
|
451
|
+
last_ts_str = max((e["ts"] for e in events if "ts" in e), default=None)
|
|
452
|
+
if last_ts_str is None:
|
|
453
|
+
return None
|
|
454
|
+
age_s = (now - parse_iso_ms(last_ts_str)).total_seconds()
|
|
455
|
+
if age_s <= stale_threshold_s:
|
|
456
|
+
return None
|
|
457
|
+
return _alert(
|
|
458
|
+
"supervisor_stale",
|
|
459
|
+
"warning",
|
|
460
|
+
f"No events for {int(age_s)}s (threshold {stale_threshold_s}s) — "
|
|
461
|
+
f"supervisor may be stuck or dead. Last event: {last_ts_str}.",
|
|
462
|
+
{"age_s": int(age_s), "threshold_s": stale_threshold_s, "last_ts": last_ts_str},
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
|
|
432
466
|
# ---------------------------------------------------------------------------
|
|
433
467
|
# State-tree assembly (Task 3.2)
|
|
434
468
|
# ---------------------------------------------------------------------------
|
|
@@ -535,6 +569,7 @@ def run_all_detectors(
|
|
|
535
569
|
metrics: list[dict[str, Any]],
|
|
536
570
|
log_tails: dict[int, str],
|
|
537
571
|
round_timeout_s: int = 1800,
|
|
572
|
+
supervisor_stale_threshold_s: int | None = None,
|
|
538
573
|
now: datetime | None = None,
|
|
539
574
|
auth_fail_patterns: list[str] | None = None,
|
|
540
575
|
auth_fail_hint: str | None = None,
|
|
@@ -543,12 +578,17 @@ def run_all_detectors(
|
|
|
543
578
|
disk_warning_pct: float = 90.0,
|
|
544
579
|
disk_critical_pct: float = 95.0,
|
|
545
580
|
) -> list[Alert]:
|
|
546
|
-
"""Run all
|
|
581
|
+
"""Run all 12 detectors; returns alerts (empty = healthy)."""
|
|
547
582
|
if now is None:
|
|
548
583
|
now = datetime.now(UTC)
|
|
549
584
|
compiled_auth_pats = (
|
|
550
585
|
[re.compile(p, re.IGNORECASE) for p in auth_fail_patterns] if auth_fail_patterns else None
|
|
551
586
|
)
|
|
587
|
+
effective_stale_s = (
|
|
588
|
+
int(round_timeout_s * 1.5)
|
|
589
|
+
if supervisor_stale_threshold_s is None
|
|
590
|
+
else supervisor_stale_threshold_s
|
|
591
|
+
)
|
|
552
592
|
candidates = [
|
|
553
593
|
detect_timeout_rate(events),
|
|
554
594
|
detect_hung(
|
|
@@ -568,6 +608,7 @@ def run_all_detectors(
|
|
|
568
608
|
detect_network_fail(events, log_tails),
|
|
569
609
|
detect_rate_limit_active(events, now=now.timestamp()),
|
|
570
610
|
detect_anomaly_repetitive_active(events),
|
|
611
|
+
detect_supervisor_stale(events, now=now, stale_threshold_s=effective_stale_s),
|
|
571
612
|
]
|
|
572
613
|
return [a for a in candidates if a is not None]
|
|
573
614
|
|
|
@@ -65,13 +65,14 @@ surfacing everywhere.
|
|
|
65
65
|
| `event_kind_registry` | Prevent events.emit() typos / unregistered kinds slipping past CI | `tests/invariants/test_event_kind_registry.py` |
|
|
66
66
|
<!-- /gen:defenses-table -->
|
|
67
67
|
|
|
68
|
-
## Monitor:
|
|
68
|
+
## Monitor: 12 detectors
|
|
69
69
|
|
|
70
70
|
Three categories by `auto_action`:
|
|
71
71
|
|
|
72
72
|
**Notify only** (severity `warning`):
|
|
73
73
|
`timeout_rate`, `hung`, `orphan_chain`, `disk_warning`, `mem_pressure`,
|
|
74
|
-
`smoke_fail_rate`, `network_fail
|
|
74
|
+
`smoke_fail_rate`, `network_fail`, `rate_limit_active`,
|
|
75
|
+
`anomaly_repetitive_active`, `supervisor_stale`.
|
|
75
76
|
|
|
76
77
|
**Auto-stop service** (severity `critical`, `auto_action="stop_service"`):
|
|
77
78
|
`oauth_fail`, `disk_critical`. Continuing in either state is harmful (burning
|
|
@@ -88,6 +89,7 @@ API quota / writing to a near-full disk).
|
|
|
88
89
|
- `orphan_chain`
|
|
89
90
|
- `rate_limit_active`
|
|
90
91
|
- `smoke_fail_rate`
|
|
92
|
+
- `supervisor_stale`
|
|
91
93
|
- `timeout_rate`
|
|
92
94
|
<!-- /gen:detector-list -->
|
|
93
95
|
|
|
@@ -163,6 +165,7 @@ hook (vs ALL pre-round hooks), use `[plugins] disable = ["that_entry_point_name"
|
|
|
163
165
|
- `orphan_idempotent_skip`
|
|
164
166
|
- `orphan_stash_failed`
|
|
165
167
|
- `orphan_stashed`
|
|
168
|
+
- `package_upgraded`
|
|
166
169
|
- `prompt_overwritten`
|
|
167
170
|
- `round_end`
|
|
168
171
|
- `round_grace_kill`
|
|
@@ -24,7 +24,7 @@ are shared between `peek`, `watch`, and `monitor`.
|
|
|
24
24
|
| `monitor` | Anomaly detection, narrate/events stream, or HTTP progress page |
|
|
25
25
|
| `serve` | Long-running supervisor loop |
|
|
26
26
|
| `round` | Run one round and exit |
|
|
27
|
-
| `upgrade` |
|
|
27
|
+
| `upgrade` | Package upgrade with service-mode gate: orchestrated stop/start for systemd --user; package-only otherwise |
|
|
28
28
|
<!-- /gen:verb-table -->
|
|
29
29
|
|
|
30
30
|
## Lifecycle
|
|
@@ -76,6 +76,24 @@ Long-running supervisor loop. Traps SIGTERM (graceful stop), SIGINT (graceful),
|
|
|
76
76
|
SIGUSR1 (cancel — forwards SIGINT to current round). Writes `serve.pid` and
|
|
77
77
|
`round.pid`. `--once` runs a single round then exits (debug).
|
|
78
78
|
|
|
79
|
+
### `agent-runner upgrade [--target VERSION] [--no-restart] [--config PATH]`
|
|
80
|
+
|
|
81
|
+
Upgrade the agent-runner package. Behavior depends on the detected service mode:
|
|
82
|
+
|
|
83
|
+
- **systemd --user service** (installed via `agent-runner install`): full
|
|
84
|
+
orchestrated flow — stop → pip install → smoke (`--version` + `peek`) →
|
|
85
|
+
start → emit `service_upgraded`. Auto-rollback on smoke failure.
|
|
86
|
+
- **Anything else** (system unit, foreground, no config): package-only —
|
|
87
|
+
PEP 668-aware pip + `--version` smoke + pip-level rollback, emits
|
|
88
|
+
`package_upgraded`, prints the restart command. Never touches your running
|
|
89
|
+
service, never runs `sudo`.
|
|
90
|
+
|
|
91
|
+
`--config` is optional: when omitted (or the file is absent), `upgrade` falls
|
|
92
|
+
back to package-only mode automatically.
|
|
93
|
+
|
|
94
|
+
`--no-restart` forces package-only even on a systemd --user host (upgrade the
|
|
95
|
+
package now, restart your service yourself).
|
|
96
|
+
|
|
79
97
|
## Observation
|
|
80
98
|
|
|
81
99
|
### `agent-runner peek [flags]`
|
|
@@ -117,7 +135,7 @@ agent-runner events --kind transient_error_backoff_capped --tail
|
|
|
117
135
|
|
|
118
136
|
### `agent-runner monitor [--host SSH-ALIAS] [--interval N] [--json]`
|
|
119
137
|
|
|
120
|
-
Anomaly-detection daemon. Runs the
|
|
138
|
+
Anomaly-detection daemon. Runs the 12 detectors against the live state on every
|
|
121
139
|
poll. Without `--host`, watches local logs at default 30s interval. With
|
|
122
140
|
`--host`, watches a remote agent-runner over plain ssh at default 60s interval.
|
|
123
141
|
|
|
@@ -80,6 +80,7 @@ running with newly-set `dirty_action = "auto_commit"` is undefined).
|
|
|
80
80
|
| `anomaly_repetitive_threshold` | `int` | 0 |
|
|
81
81
|
| `host_health` | `MonitorHostHealthConfig` | MonitorHostHealthConfig(mem_avail_min_mb=200, disk_warning_pct=90.0, disk_critical_pct=95.0) |
|
|
82
82
|
| `round_progress_interval_s` | `int` | 0 |
|
|
83
|
+
| `supervisor_stale_threshold_s` | `int | None` | None |
|
|
83
84
|
<!-- /gen:config-schema -->
|
|
84
85
|
|
|
85
86
|
### `vcs.dirty_action`
|
|
@@ -203,6 +204,7 @@ Unconfigured phases (and configs without `[phases]`) keep using the global
|
|
|
203
204
|
[monitor]
|
|
204
205
|
auto_stop_on = ["oauth_fail", "disk_critical"]
|
|
205
206
|
round_progress_interval_s = 0 # 0 = disabled; set >0 to emit round_progress heartbeat events
|
|
207
|
+
# supervisor_stale_threshold_s = 2700 # unset = round_timeout_s * 1.5; 0 = disable
|
|
206
208
|
|
|
207
209
|
[monitor.host_health]
|
|
208
210
|
mem_avail_min_mb = 200 # mem_pressure fires when mem_available_mb < this
|