coderouter-cli 2.0.0__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/CHANGELOG.md +77 -0
  2. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/PKG-INFO +7 -6
  3. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/README.md +6 -5
  4. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/config/schemas.py +103 -0
  5. coderouter_cli-2.1.0/coderouter/guards/continuous_probe.py +349 -0
  6. coderouter_cli-2.1.0/coderouter/guards/drift_actions.py +111 -0
  7. coderouter_cli-2.1.0/coderouter/guards/drift_detection.py +308 -0
  8. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/ingress/anthropic_routes.py +75 -11
  9. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/ingress/app.py +39 -0
  10. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/logging.py +262 -0
  11. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/metrics/collector.py +93 -0
  12. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/metrics/prometheus.py +141 -0
  13. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/routing/adaptive.py +23 -0
  14. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/routing/fallback.py +285 -4
  15. coderouter_cli-2.1.0/docs/continuous-probing.md +102 -0
  16. coderouter_cli-2.1.0/docs/drift-detection.md +146 -0
  17. coderouter_cli-2.1.0/docs/partial-stitch.md +71 -0
  18. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/pyproject.toml +1 -1
  19. coderouter_cli-2.1.0/tests/test_continuous_probe.py +511 -0
  20. coderouter_cli-2.1.0/tests/test_drift_actions.py +174 -0
  21. coderouter_cli-2.1.0/tests/test_drift_detection.py +365 -0
  22. coderouter_cli-2.1.0/tests/test_drift_detection_integration.py +528 -0
  23. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_ingress_anthropic.py +16 -0
  24. coderouter_cli-2.1.0/tests/test_partial_stitch.py +417 -0
  25. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/.gitignore +0 -0
  26. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/LICENSE +0 -0
  27. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/README.en.md +0 -0
  28. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/__init__.py +0 -0
  29. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/__main__.py +0 -0
  30. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/adapters/__init__.py +0 -0
  31. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/adapters/anthropic_native.py +0 -0
  32. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/adapters/base.py +0 -0
  33. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/adapters/openai_compat.py +0 -0
  34. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/adapters/registry.py +0 -0
  35. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/cli.py +0 -0
  36. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/cli_stats.py +0 -0
  37. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/config/__init__.py +0 -0
  38. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/config/capability_registry.py +0 -0
  39. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/config/env_file.py +0 -0
  40. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/config/loader.py +0 -0
  41. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/cost.py +0 -0
  42. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/data/__init__.py +0 -0
  43. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/data/model-capabilities.yaml +0 -0
  44. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/doctor.py +0 -0
  45. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/doctor_apply.py +0 -0
  46. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/env_security.py +0 -0
  47. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/errors.py +0 -0
  48. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/guards/__init__.py +0 -0
  49. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/guards/backend_health.py +0 -0
  50. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/guards/context_budget.py +0 -0
  51. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/guards/memory_pressure.py +0 -0
  52. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/guards/tool_loop.py +0 -0
  53. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/ingress/__init__.py +0 -0
  54. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/ingress/dashboard_routes.py +0 -0
  55. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/ingress/metrics_routes.py +0 -0
  56. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/ingress/openai_routes.py +0 -0
  57. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/metrics/__init__.py +0 -0
  58. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/output_filters.py +0 -0
  59. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/routing/__init__.py +0 -0
  60. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/routing/auto_router.py +0 -0
  61. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/routing/budget.py +0 -0
  62. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/routing/capability.py +0 -0
  63. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/token_estimation.py +0 -0
  64. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/translation/__init__.py +0 -0
  65. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/translation/anthropic.py +0 -0
  66. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/translation/convert.py +0 -0
  67. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/coderouter/translation/tool_repair.py +0 -0
  68. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/assets/dashboard-demo.png +0 -0
  69. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/context-budget.md +0 -0
  70. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/designs/v1.5-dashboard-mockup.html +0 -0
  71. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/designs/v1.6-auto-router-verification.md +0 -0
  72. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/designs/v1.6-auto-router.md +0 -0
  73. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/free-tier-guide.en.md +0 -0
  74. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/free-tier-guide.md +0 -0
  75. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/gguf_dl.md +0 -0
  76. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/hf-ollama-models.md +0 -0
  77. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/llamacpp-direct.en.md +0 -0
  78. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/llamacpp-direct.md +0 -0
  79. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/lmstudio-direct.en.md +0 -0
  80. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/lmstudio-direct.md +0 -0
  81. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/openrouter-roster/CHANGES.md +0 -0
  82. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/openrouter-roster/README.md +0 -0
  83. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/openrouter-roster/latest.json +0 -0
  84. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/quickstart.en.md +0 -0
  85. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/quickstart.md +0 -0
  86. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/retrospectives/v0.4.md +0 -0
  87. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/retrospectives/v0.5-verify.md +0 -0
  88. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/retrospectives/v0.5.md +0 -0
  89. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/retrospectives/v0.6.md +0 -0
  90. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/retrospectives/v0.7.md +0 -0
  91. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/retrospectives/v1.0-verify.md +0 -0
  92. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/retrospectives/v1.0.md +0 -0
  93. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/security.en.md +0 -0
  94. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/security.md +0 -0
  95. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/troubleshooting.en.md +0 -0
  96. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/troubleshooting.md +0 -0
  97. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/usage-guide.en.md +0 -0
  98. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/usage-guide.md +0 -0
  99. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/when-do-i-need-coderouter.en.md +0 -0
  100. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/docs/when-do-i-need-coderouter.md +0 -0
  101. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/examples/.env.example +0 -0
  102. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/examples/providers.auto-custom.yaml +0 -0
  103. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/examples/providers.auto.yaml +0 -0
  104. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/examples/providers.note-2026.yaml +0 -0
  105. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/examples/providers.nvidia-nim.yaml +0 -0
  106. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/examples/providers.raspberrypi.yaml +0 -0
  107. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/examples/providers.v2-context-budget.yaml +0 -0
  108. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/examples/providers.yaml +0 -0
  109. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/scripts/demo_traffic.sh +0 -0
  110. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/scripts/openrouter_roster_diff.py +0 -0
  111. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/scripts/verify_v0_5.sh +0 -0
  112. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/scripts/verify_v1_0.sh +0 -0
  113. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/__init__.py +0 -0
  114. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/conftest.py +0 -0
  115. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_adapter_anthropic.py +0 -0
  116. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_auto_router.py +0 -0
  117. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_backend_health.py +0 -0
  118. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_budget.py +0 -0
  119. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_capability.py +0 -0
  120. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_capability_degraded_payload.py +0 -0
  121. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_capability_registry.py +0 -0
  122. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_capability_registry_cache_control.py +0 -0
  123. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_claude_code_suitability.py +0 -0
  124. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_cli.py +0 -0
  125. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_cli_stats.py +0 -0
  126. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_config.py +0 -0
  127. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_context_budget.py +0 -0
  128. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_dashboard_endpoint.py +0 -0
  129. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_doctor.py +0 -0
  130. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_doctor_apply.py +0 -0
  131. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_doctor_cache_probe.py +0 -0
  132. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_env_file.py +0 -0
  133. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_env_security.py +0 -0
  134. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_errors.py +0 -0
  135. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_examples_yaml.py +0 -0
  136. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_fallback.py +0 -0
  137. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_fallback_anthropic.py +0 -0
  138. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_fallback_cache_control.py +0 -0
  139. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_fallback_cache_observed.py +0 -0
  140. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_fallback_misconfig_warn.py +0 -0
  141. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_fallback_paid_gate.py +0 -0
  142. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_fallback_thinking.py +0 -0
  143. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_guards_tool_loop.py +0 -0
  144. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_ingress_profile.py +0 -0
  145. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_memory_pressure.py +0 -0
  146. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_metrics_cache.py +0 -0
  147. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_metrics_collector.py +0 -0
  148. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_metrics_cost.py +0 -0
  149. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_metrics_endpoint.py +0 -0
  150. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_metrics_jsonl.py +0 -0
  151. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_metrics_prometheus.py +0 -0
  152. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_metrics_prometheus_cache.py +0 -0
  153. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_openai_compat.py +0 -0
  154. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_openrouter_roster_diff.py +0 -0
  155. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_output_filters.py +0 -0
  156. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_output_filters_adapters.py +0 -0
  157. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_reasoning_strip.py +0 -0
  158. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_routing_adaptive.py +0 -0
  159. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_setup_sh.py +0 -0
  160. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_token_estimation.py +0 -0
  161. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_tool_repair.py +0 -0
  162. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_translation_anthropic.py +0 -0
  163. {coderouter_cli-2.0.0 → coderouter_cli-2.1.0}/tests/test_translation_reverse.py +0 -0
@@ -6,6 +6,83 @@ versioning follows [SemVer](https://semver.org/).
6
6
 
7
7
  ---
8
8
 
9
+ ## [v2.1.0] — 2026-05-05 (Long-run Reliability 完成 — v2.0-G/H/I)
10
+
11
+ **Theme: L4 品質劣化 / L6 mid-stream 失敗 / L5 idle 時障害の 3 系統を同時解決し、Long-run Reliability pillar を完成させる。** v2.0-F (L1 context overflow) と合わせ、6 系統障害のうち 4 系統を CodeRouter が能動的にガードする状態に到達。
12
+
13
+ ### v2.0-G: Drift Detection (L4 品質劣化ガード)
14
+
15
+ **長時間 agent session でモデル応答品質が徐々に劣化する "drift" を自動検知し、corrective action を実行。** Ollama ローカルモデルが数時間稼働すると KV cache 汚染や VRAM 圧迫で応答が空になる / 短くなる / tool_use を返さなくなる現象 (L4) を 5 つのシグナルで検知。warn → promote (chain 降格) → reload (Ollama KV flush) の 3 段階アクションで品質を自動回復。
16
+
17
+ | 機能 | 説明 |
18
+ |---|---|
19
+ | 5 Signal Detector | empty_response_rate / length_collapse / tool_silence_rate / stop_anomaly_rate / error_rate を per-provider rolling window で監視 |
20
+ | `detect_drift()` | Pure function — severity none/mild/severe 判定 (severe×1 or mild×2 → severe) |
21
+ | `drift_detection_action: off/warn/promote/reload` | profile 単位で guard 有効化 (default: off) |
22
+ | `drift_detection_sensitivity: low/normal/high` | 閾値プリセット選択 |
23
+ | promote action | AdaptiveAdjuster の rank demotion で traffic を別 provider へ迂回 |
24
+ | reload action | Ollama `keep_alive=0` で KV cache flush → fresh context で再開 |
25
+ | Cooldown & Recovery | 設定秒数後に rank 復帰 + window クリア |
26
+ | `X-CodeRouter-Drift` header | response header で mild/severe ステータスを通知 (streaming 対応) |
27
+ | Prometheus metrics | `coderouter_drift_detected_total`, `coderouter_drift_promoted_total`, `coderouter_drift_reload_total` |
28
+
29
+ - Tests: ~930 → **~970** (+40, drift_detection 27 + drift_integration 10 + drift_actions 5)
30
+ - Runtime deps: 5 → 5 (**36 sub-release 連続据え置き**)
31
+ - Backward compat: 完全互換、`drift_detection_action` default は `"off"` — opt-in するまで既存挙動完全一致
32
+
33
+ ### 設定例
34
+
35
+ ```yaml
36
+ profiles:
37
+ - name: long-session
38
+ providers: [ollama-qwen3]
39
+ drift_detection_action: reload # off | warn | promote | reload
40
+ drift_detection_sensitivity: normal # low | normal | high
41
+ drift_detection_window_size: 20 # rolling window サイズ
42
+ drift_detection_cooldown_s: 300 # 復帰までの待機秒数
43
+ ```
44
+
45
+ ### 新規ファイル
46
+
47
+ - `coderouter/guards/drift_detection.py` — 検知ロジック (observation model + detector + window manager)
48
+ - `coderouter/guards/drift_actions.py` — reload action (Ollama KV flush)
49
+ - `tests/test_drift_detection.py` — pure function tests (27 本)
50
+ - `tests/test_drift_detection_integration.py` — engine integration tests (10 本)
51
+ - `tests/test_drift_actions.py` — reload action tests (5 本)
52
+ - `docs/drift-detection.md` — ユーザードキュメント
53
+
54
+ ### v2.0-H: Mid-stream Partial Stitching (L6 拡張)
55
+
56
+ **streaming 応答が途中で失敗した際、蓄積済み��キストを破棄せずクライアントに返却。**
57
+
58
+ | 機能 | 説明 |
59
+ |---|---|
60
+ | `_StreamUsageAccumulator` text 蓄積 | content_block_start/delta/stop を追跡し text block を in-memory 蓄積 |
61
+ | `MidStreamError.partial_content` | 例外に蓄積テキストを搬送 (tool_use 部分 JSON は除外) |
62
+ | `partial_stitch_action: off/surface` | profile 単位で有効化 (default: off) |
63
+ | `event: coderouter_partial` | 蓄積テキスト + provider + reason を SSE メタデータとして返却 |
64
+ | Prometheus metric | `coderouter_partial_stitch_surfaced_total` |
65
+
66
+ ### v2.0-I: Continuous Probing (L5 能動ヘルスチェック)
67
+
68
+ **idle 時間帯のプロバイダ障害を能動的に検知し backend health state machine を更新。**
69
+
70
+ | 機能 | 説明 |
71
+ |---|---|
72
+ | `probe_one()` | 1-token completion で全 model pipeline の正常性を確認 |
73
+ | `probe_loop()` | asyncio background task — sequential probe + graceful shutdown |
74
+ | `continuous_probe: off/active` | グローバル config で有効化 (default: off) |
75
+ | Model drift detection | probe response の model 名と config を照合 → 不一致で warn |
76
+ | Prometheus metrics | `probe_total`, `probe_outcomes_total`, `probe_rounds_total`, `probe_latency_ms`, `probe_drift_detected_total` |
77
+
78
+ ### 全体サマリ
79
+
80
+ - Tests: ~930 → **~1005** (+75)
81
+ - Runtime deps: 5 → 5 (**38 sub-release 連続据え置き**)
82
+ - Backward compat: 完全互換、全機能 default off — opt-in するまで既���挙動完全一致
83
+
84
+ ---
85
+
9
86
  ## [v2.0.0] — 2026-05-05 (Context Budget Management — L1 overflow 防止)
10
87
 
11
88
  **Theme: 長時間 agent session の context overflow を未然に防止する guard を実装。** Claude Code / Cline / OpenClaw 等の agentic session が 8 時間超え loop で動くと messages が context window に漸近し、backend が 400 / truncation を返して session 死亡する問題 (L1) を根本解決。warn (80%) → auto trim (90%) の 2 段階 guard で overflow をゼロに。
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coderouter-cli
3
- Version: 2.0.0
3
+ Version: 2.1.0
4
4
  Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
5
5
  Project-URL: Homepage, https://github.com/zephel01/CodeRouter
6
6
  Project-URL: Repository, https://github.com/zephel01/CodeRouter
@@ -60,7 +60,7 @@ Description-Content-Type: text/markdown
60
60
  <p align="center">
61
61
  <a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
62
62
  <a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
63
- <a href=""><img src="https://img.shields.io/badge/version-2.0.0-blue" alt="version"></a>
63
+ <a href=""><img src="https://img.shields.io/badge/version-2.1.0-blue" alt="version"></a>
64
64
  <a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
65
65
  <a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
66
66
  <a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
@@ -91,7 +91,8 @@ Description-Content-Type: text/markdown
91
91
  - **v1.10.0 で Long-run reliability pillar が完成**: `cost.monthly_budget_usd` で provider 月次 USD 予算を強制、**L2 memory pressure detector**(Ollama / LM Studio が VRAM 切れで OOM になった時に自動クールダウン)、**L5 backend health 状態機械**(連続失敗で UNHEALTHY → chain 末尾に降格、1 回成功で即復帰)
92
92
  - **v1.10.0 で auto-router が 6 matcher に揃う**: `has_image` / `code_fence_ratio_min` / `content_contains` / `content_regex` / `model_pattern`(Opus/Sonnet/Haiku 分岐)/ `content_token_count_min`(長文 → 1M ctx Gemini Flash 等へ自動切替)
93
93
  - **v2.0.0 で Context Budget Management (L1 overflow 防止) を搭載**: 長時間 agent session で messages が context window に漸近 → backend 400 エラーで session 死亡する問題を根本解決。warn (80%) → auto trim (90%) の 2 段階 guard で **context overflow ゼロ**を実現。tool_use / tool_result ペアは atomic 保全、`X-CodeRouter-Context-Budget` ヘッダで状態通知、Prometheus メトリクス完備
94
- - ランタイム依存 5 個(`fastapi` / `uvicorn` / `httpx` / `pydantic` / `pyyaml`)— Python、MIT、テスト 930 本緑
94
+ - **v2.1.0 で Long-run Reliability 3 機能を追加搭載**: **Drift Detection** (L4 品質劣化検知 — 5 シグナル rolling window + warn/promote/reload 3 段階アクション)、**Partial Stitching** (L6 mid-stream 失敗時の蓄積テキスト返却)、**Continuous Probing** (P3 idle 時 1-token 定期 probe + model drift 検知 + backend health 自動更新)
95
+ - ランタイム依存 5 個(`fastapi` / `uvicorn` / `httpx` / `pydantic` / `pyyaml`)— 純 Python、MIT、テスト 950 本緑
95
96
 
96
97
  → **Claude Code / gemini-cli / codex + Ollama / llama.cpp / NVIDIA NIM で、破綻しない local-first agent が組める**
97
98
 
@@ -198,7 +199,7 @@ CodeRouter / Voice Bridge ともに独立した repo で進化していて、HTT
198
199
 
199
200
  ## クイックスタート(3 コマンド)
200
201
 
201
- **v2.0.0 で Context Budget Management (L1 overflow 防止) を搭載** 長時間 agent session context window を使い切って死ぬ問題を根本解決。`uvx` 一発で動きます (Python 3.12 以上必須):
202
+ **v2.1.0 で Long-run Reliability pillar 完成** — Context Budget (L1) + Drift Detection (L4) + Partial Stitching (L6) + Continuous Probing (P3)。`uvx` 一発で動きます (Python 3.12 以上必須):
202
203
 
203
204
  ```bash
204
205
  # 1. サンプル設定を置く
@@ -266,9 +267,9 @@ CodeRouter 自体は純 Python 3.12+ で、実質的な OS 対応範囲は `min(
266
267
 
267
268
  注意点や「ローカル GPU なし」向けレシピを含むフル版マトリクス: [利用ガイド §1](./docs/usage-guide.md#1-os-互換性)
268
269
 
269
- ## ステータス — v2.0.0 (2026-05)
270
+ ## ステータス — v2.1.0 (2026-05)
270
271
 
271
- **テスト 930 本通過。ランタイム依存 5 個 (36 sub-release 連続据え置き)。macOS / Linux / Windows WSL2 で動作。** v2.0.0 で **Context Budget Management (L1 overflow 防止)** を搭載 長時間 agent session context window を使い切って死ぬ問題を根本解決。v1.10.0 までに **Long-run Reliability** (L2/L3/L5)、**Cost pillar**、**Auto-router 6 matcher** が完成済み。v1.0 の総まとめは [`docs/retrospectives/v1.0.md`](./docs/retrospectives/v1.0.md)。
272
+ **テスト 950 本通過。ランタイム依存 5 個 (39 sub-release 連続据え置き)。macOS / Linux / Windows WSL2 で動作。** v2.1.0 で **Long-run Reliability pillar が完成** — Context Budget (L1) / Drift Detection (L4) / Partial Stitching (L6) / Continuous Probing (P3) の 4 sub-release を統合出荷。v1.10.0 までに **Long-run Reliability** (L2/L3/L5)、**Cost pillar**、**Auto-router 6 matcher** が完成済み。v1.0 の総まとめは [`docs/retrospectives/v1.0.md`](./docs/retrospectives/v1.0.md)。
272
273
 
273
274
  今日の CodeRouter が届ける価値:
274
275
 
@@ -19,7 +19,7 @@
19
19
  <p align="center">
20
20
  <a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
21
21
  <a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
22
- <a href=""><img src="https://img.shields.io/badge/version-2.0.0-blue" alt="version"></a>
22
+ <a href=""><img src="https://img.shields.io/badge/version-2.1.0-blue" alt="version"></a>
23
23
  <a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
24
24
  <a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
25
25
  <a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
@@ -50,7 +50,8 @@
50
50
  - **v1.10.0 で Long-run reliability pillar が完成**: `cost.monthly_budget_usd` で provider 月次 USD 予算を強制、**L2 memory pressure detector**(Ollama / LM Studio が VRAM 切れで OOM になった時に自動クールダウン)、**L5 backend health 状態機械**(連続失敗で UNHEALTHY → chain 末尾に降格、1 回成功で即復帰)
51
51
  - **v1.10.0 で auto-router が 6 matcher に揃う**: `has_image` / `code_fence_ratio_min` / `content_contains` / `content_regex` / `model_pattern`(Opus/Sonnet/Haiku 分岐)/ `content_token_count_min`(長文 → 1M ctx Gemini Flash 等へ自動切替)
52
52
  - **v2.0.0 で Context Budget Management (L1 overflow 防止) を搭載**: 長時間 agent session で messages が context window に漸近 → backend 400 エラーで session 死亡する問題を根本解決。warn (80%) → auto trim (90%) の 2 段階 guard で **context overflow ゼロ**を実現。tool_use / tool_result ペアは atomic 保全、`X-CodeRouter-Context-Budget` ヘッダで状態通知、Prometheus メトリクス完備
53
- - ランタイム依存 5 個(`fastapi` / `uvicorn` / `httpx` / `pydantic` / `pyyaml`)— Python、MIT、テスト 930 本緑
53
+ - **v2.1.0 で Long-run Reliability 3 機能を追加搭載**: **Drift Detection** (L4 品質劣化検知 — 5 シグナル rolling window + warn/promote/reload 3 段階アクション)、**Partial Stitching** (L6 mid-stream 失敗時の蓄積テキスト返却)、**Continuous Probing** (P3 idle 時 1-token 定期 probe + model drift 検知 + backend health 自動更新)
54
+ - ランタイム依存 5 個(`fastapi` / `uvicorn` / `httpx` / `pydantic` / `pyyaml`)— 純 Python、MIT、テスト 950 本緑
54
55
 
55
56
  → **Claude Code / gemini-cli / codex + Ollama / llama.cpp / NVIDIA NIM で、破綻しない local-first agent が組める**
56
57
 
@@ -157,7 +158,7 @@ CodeRouter / Voice Bridge ともに独立した repo で進化していて、HTT
157
158
 
158
159
  ## クイックスタート(3 コマンド)
159
160
 
160
- **v2.0.0 で Context Budget Management (L1 overflow 防止) を搭載** 長時間 agent session context window を使い切って死ぬ問題を根本解決。`uvx` 一発で動きます (Python 3.12 以上必須):
161
+ **v2.1.0 で Long-run Reliability pillar 完成** — Context Budget (L1) + Drift Detection (L4) + Partial Stitching (L6) + Continuous Probing (P3)。`uvx` 一発で動きます (Python 3.12 以上必須):
161
162
 
162
163
  ```bash
163
164
  # 1. サンプル設定を置く
@@ -225,9 +226,9 @@ CodeRouter 自体は純 Python 3.12+ で、実質的な OS 対応範囲は `min(
225
226
 
226
227
  注意点や「ローカル GPU なし」向けレシピを含むフル版マトリクス: [利用ガイド §1](./docs/usage-guide.md#1-os-互換性)
227
228
 
228
- ## ステータス — v2.0.0 (2026-05)
229
+ ## ステータス — v2.1.0 (2026-05)
229
230
 
230
- **テスト 930 本通過。ランタイム依存 5 個 (36 sub-release 連続据え置き)。macOS / Linux / Windows WSL2 で動作。** v2.0.0 で **Context Budget Management (L1 overflow 防止)** を搭載 長時間 agent session context window を使い切って死ぬ問題を根本解決。v1.10.0 までに **Long-run Reliability** (L2/L3/L5)、**Cost pillar**、**Auto-router 6 matcher** が完成済み。v1.0 の総まとめは [`docs/retrospectives/v1.0.md`](./docs/retrospectives/v1.0.md)。
231
+ **テスト 950 本通過。ランタイム依存 5 個 (39 sub-release 連続据え置き)。macOS / Linux / Windows WSL2 で動作。** v2.1.0 で **Long-run Reliability pillar が完成** — Context Budget (L1) / Drift Detection (L4) / Partial Stitching (L6) / Continuous Probing (P3) の 4 sub-release を統合出荷。v1.10.0 までに **Long-run Reliability** (L2/L3/L5)、**Cost pillar**、**Auto-router 6 matcher** が完成済み。v1.0 の総まとめは [`docs/retrospectives/v1.0.md`](./docs/retrospectives/v1.0.md)。
231
232
 
232
233
  今日の CodeRouter が届ける価値:
233
234
 
@@ -531,6 +531,73 @@ class FallbackChain(BaseModel):
531
531
  ),
532
532
  )
533
533
 
534
+ # ------------------------------------------------------------------
535
+ # v2.0-G (L4): Drift detection — response quality degradation guard
536
+ # ------------------------------------------------------------------
537
+ #
538
+ # Long-running sessions on local LLMs can suffer gradual quality
539
+ # decay (KV cache pressure, thermal throttling, VRAM fragmentation)
540
+ # where the model "succeeds" but produces empty/short/toolless
541
+ # responses. This guard observes response quality signals in a
542
+ # rolling window and detects statistical drift.
543
+ #
544
+ # Four actions:
545
+ # * ``off`` — no detection (default).
546
+ # * ``warn`` — emit structured log + response header.
547
+ # * ``promote`` — ``warn`` + demote drifted provider in chain.
548
+ # * ``reload`` — ``promote`` + attempt KV cache flush (Ollama).
549
+ drift_detection_action: Literal["off", "warn", "promote", "reload"] = Field(
550
+ default="off",
551
+ description=(
552
+ "v2.0-G (L4): action on response quality drift detection. "
553
+ "``off`` (default) disables drift detection. ``warn`` emits "
554
+ "a log and response header. ``promote`` additionally demotes "
555
+ "the drifted provider in the chain. ``reload`` attempts to "
556
+ "flush the provider's KV cache (Ollama only) before promoting."
557
+ ),
558
+ )
559
+ drift_detection_window_size: int = Field(
560
+ default=20,
561
+ ge=4,
562
+ le=200,
563
+ description=(
564
+ "v2.0-G (L4): number of recent responses to keep in the "
565
+ "rolling observation window per provider. Larger windows "
566
+ "are more robust to noise but slower to detect drift."
567
+ ),
568
+ )
569
+ drift_detection_cooldown_s: int = Field(
570
+ default=300,
571
+ ge=10,
572
+ le=3600,
573
+ description=(
574
+ "v2.0-G (L4): seconds after a promote/reload action before "
575
+ "the drifted provider's rank is reset for recovery check. "
576
+ "Default 300s (5 min) gives the model time to stabilize."
577
+ ),
578
+ )
579
+ drift_detection_sensitivity: Literal["low", "normal", "high"] = Field(
580
+ default="normal",
581
+ description=(
582
+ "v2.0-G (L4): threshold preset for drift signals. "
583
+ "``low`` tolerates more degradation before triggering, "
584
+ "``high`` is stricter (fewer bad responses needed)."
585
+ ),
586
+ )
587
+
588
+ # --- v2.0-H (L6): Mid-stream partial stitching --------------------------
589
+ # * ``off`` — discard partial content on mid-stream failure (legacy).
590
+ # * ``surface`` — return partial content as a truncated-but-valid response.
591
+ partial_stitch_action: Literal["off", "surface"] = Field(
592
+ default="off",
593
+ description=(
594
+ "v2.0-H (L6): action when a streaming response fails mid-stream. "
595
+ "``off`` discards partial content (legacy error event). "
596
+ "``surface`` returns accumulated text as a graceful stream "
597
+ "termination with a ``coderouter_partial`` metadata event."
598
+ ),
599
+ )
600
+
534
601
 
535
602
  # ---------------------------------------------------------------------------
536
603
  # v1.6-A: auto_router — declarative request-body classifier
@@ -768,6 +835,42 @@ class CodeRouterConfig(BaseModel):
768
835
  ),
769
836
  )
770
837
 
838
+ # v2.0-I: Continuous probing — background health checks for idle periods.
839
+ continuous_probe: Literal["off", "active"] = Field(
840
+ default="off",
841
+ description=(
842
+ "v2.0-I: enable background health probes. 'active' starts a "
843
+ "background task that periodically sends 1-token requests to "
844
+ "each provider, feeding results into the L5 backend health "
845
+ "state machine. 'off' = no probing (backward-compatible default)."
846
+ ),
847
+ )
848
+ probe_interval_s: float = Field(
849
+ default=60.0,
850
+ ge=5.0,
851
+ le=3600.0,
852
+ description=(
853
+ "v2.0-I: seconds between probe rounds. Lower = faster detection "
854
+ "but more probe traffic. 60s is a good balance for local models."
855
+ ),
856
+ )
857
+ probe_paid: bool = Field(
858
+ default=False,
859
+ description=(
860
+ "v2.0-I: whether to probe providers marked ``paid: true``. "
861
+ "Default false protects operators from accidental API charges."
862
+ ),
863
+ )
864
+ probe_timeout_s: float = Field(
865
+ default=10.0,
866
+ ge=1.0,
867
+ le=60.0,
868
+ description=(
869
+ "v2.0-I: per-provider timeout for probe requests. A provider "
870
+ "that doesn't respond within this window is recorded as failed."
871
+ ),
872
+ )
873
+
771
874
  @model_validator(mode="after")
772
875
  def _check_default_profile_exists(self) -> CodeRouterConfig:
773
876
  """v0.6-A: surface a typo'd ``default_profile`` at load time.
@@ -0,0 +1,349 @@
1
+ """Continuous health probing (v2.0-I).
2
+
3
+ Background task that periodically sends minimal 1-token requests to each
4
+ configured provider, feeding the results into the L5 backend health
5
+ state machine. Detects provider crashes during idle periods (no user
6
+ traffic) so the chain resolver knows to skip/demote a dead backend
7
+ before the next real request hits it.
8
+
9
+ Architecture
10
+ ============
11
+
12
+ ::
13
+
14
+ lifespan startup
15
+ └─ asyncio.create_task(probe_loop(...))
16
+
17
+ probe_loop:
18
+ while not shutdown:
19
+ sleep(interval_s)
20
+ for provider in providers:
21
+ result = await probe_one(provider)
22
+ backend_health.record_attempt(...)
23
+ emit log + metrics
24
+
25
+ Design choices
26
+ ==============
27
+
28
+ - **1-token completion** rather than ``/api/version`` or ``/api/tags``
29
+ because version endpoints are Ollama-only; a 1-token generate confirms
30
+ the entire model-serving pipeline is operational (model loaded, KV
31
+ allocated, inference works).
32
+ - **Sequential** probing (not parallel) to avoid hammering backends and
33
+ to keep the implementation trivially correct without gather/semaphore.
34
+ - **No new dependency** — uses httpx (already a runtime dep) + asyncio
35
+ (stdlib).
36
+ - **Graceful shutdown** via an ``asyncio.Event`` set by the lifespan
37
+ exit path. The loop checks the event each iteration and breaks cleanly.
38
+ """
39
+
40
+ from __future__ import annotations
41
+
42
+ import asyncio
43
+ import contextlib
44
+ import time
45
+ from dataclasses import dataclass, field
46
+ from typing import Any
47
+
48
+ import httpx
49
+
50
+ from coderouter.config.schemas import ProviderConfig
51
+ from coderouter.logging import (
52
+ get_logger,
53
+ log_probe_capabilities_drift,
54
+ log_probe_completed,
55
+ log_probe_round_completed,
56
+ )
57
+
58
+ logger = get_logger(__name__)
59
+
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # ProbeResult
63
+ # ---------------------------------------------------------------------------
64
+
65
+
66
+ @dataclass(slots=True)
67
+ class ProbeResult:
68
+ """Outcome of a single provider probe."""
69
+
70
+ provider: str
71
+ success: bool
72
+ latency_ms: float
73
+ error: str | None = None
74
+ model_name: str | None = None
75
+ timestamp: float = field(default_factory=time.time)
76
+
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # probe_one: single-provider 1-token probe
80
+ # ---------------------------------------------------------------------------
81
+
82
+
83
+ async def probe_one(
84
+ provider: ProviderConfig,
85
+ *,
86
+ timeout_s: float = 10.0,
87
+ ) -> ProbeResult:
88
+ """Send a minimal 1-token completion request and measure response.
89
+
90
+ For ``kind: openai_compat``: POST /v1/chat/completions
91
+ For ``kind: anthropic``: POST /v1/messages
92
+
93
+ The request asks for ``max_tokens: 1`` so the probe is as cheap as
94
+ possible (a single output token is generated, exercising the full
95
+ model pipeline without producing meaningful output).
96
+
97
+ Never raises — all failures are captured in ProbeResult(success=False).
98
+ """
99
+ import os
100
+
101
+ start = time.monotonic()
102
+ provider_name = provider.name
103
+ base_url = str(provider.base_url).rstrip("/")
104
+
105
+ # Resolve API key from env (same logic as the adapters)
106
+ headers: dict[str, str] = {}
107
+ if provider.api_key_env:
108
+ api_key = os.environ.get(provider.api_key_env, "")
109
+ if api_key:
110
+ if provider.kind == "anthropic":
111
+ headers["x-api-key"] = api_key
112
+ headers["anthropic-version"] = "2023-06-01"
113
+ else:
114
+ headers["Authorization"] = f"Bearer {api_key}"
115
+
116
+ try:
117
+ async with httpx.AsyncClient(timeout=timeout_s) as client:
118
+ if provider.kind == "anthropic":
119
+ url = f"{base_url}/v1/messages"
120
+ body: dict[str, Any] = {
121
+ "model": provider.model,
122
+ "max_tokens": 1,
123
+ "messages": [{"role": "user", "content": "hi"}],
124
+ }
125
+ resp = await client.post(url, json=body, headers=headers)
126
+ else:
127
+ # openai_compat: Ollama, LM Studio, OpenRouter, etc.
128
+ url = f"{base_url}/chat/completions"
129
+ body = {
130
+ "model": provider.model,
131
+ "max_tokens": 1,
132
+ "messages": [{"role": "user", "content": "hi"}],
133
+ }
134
+ resp = await client.post(url, json=body, headers=headers)
135
+
136
+ latency_ms = (time.monotonic() - start) * 1000
137
+
138
+ if resp.status_code >= 400:
139
+ return ProbeResult(
140
+ provider=provider_name,
141
+ success=False,
142
+ latency_ms=latency_ms,
143
+ error=f"HTTP {resp.status_code}: {resp.text[:200]}",
144
+ )
145
+
146
+ # Extract model name from response (for capabilities drift check)
147
+ model_name: str | None = None
148
+ try:
149
+ data = resp.json()
150
+ model_name = data.get("model")
151
+ except Exception:
152
+ pass
153
+
154
+ return ProbeResult(
155
+ provider=provider_name,
156
+ success=True,
157
+ latency_ms=latency_ms,
158
+ model_name=model_name,
159
+ )
160
+
161
+ except httpx.TimeoutException:
162
+ latency_ms = (time.monotonic() - start) * 1000
163
+ return ProbeResult(
164
+ provider=provider_name,
165
+ success=False,
166
+ latency_ms=latency_ms,
167
+ error=f"timeout after {timeout_s}s",
168
+ )
169
+ except Exception as exc:
170
+ latency_ms = (time.monotonic() - start) * 1000
171
+ return ProbeResult(
172
+ provider=provider_name,
173
+ success=False,
174
+ latency_ms=latency_ms,
175
+ error=str(exc)[:200],
176
+ )
177
+
178
+
179
+ # ---------------------------------------------------------------------------
180
+ # capabilities drift detection (Phase 3)
181
+ # ---------------------------------------------------------------------------
182
+
183
+
184
+ @dataclass(slots=True)
185
+ class DriftReport:
186
+ """Report of a model-name mismatch between config and probe response."""
187
+
188
+ provider: str
189
+ configured_model: str
190
+ observed_model: str
191
+ in_registry: bool
192
+
193
+
194
+ def check_probe_drift(
195
+ provider: ProviderConfig,
196
+ observed_model: str | None,
197
+ *,
198
+ registry: Any = None,
199
+ ) -> DriftReport | None:
200
+ """Compare the probe response model name against the configured model.
201
+
202
+ Returns a :class:`DriftReport` when the observed model differs from
203
+ ``provider.model``, or ``None`` when they match (or when no model
204
+ name was returned by the probe). The ``registry`` argument is an
205
+ optional :class:`CapabilityRegistry` instance used to check whether
206
+ the observed model has a known entry — when it doesn't, the report
207
+ sets ``in_registry=False`` as an extra signal for the operator.
208
+
209
+ Never raises — a missing registry or lookup error just defaults to
210
+ ``in_registry=True`` (conservative, avoids false positives).
211
+ """
212
+ if not observed_model:
213
+ return None
214
+
215
+ configured = provider.model or ""
216
+
217
+ # Normalize: some backends return the model with a prefix or
218
+ # formatting variation. We compare case-sensitively but strip
219
+ # whitespace.
220
+ if observed_model.strip() == configured.strip():
221
+ return None
222
+
223
+ # Check registry for the observed model
224
+ in_registry = True
225
+ if registry is not None:
226
+ try:
227
+ resolved = registry.lookup(kind=provider.kind, model=observed_model)
228
+ # If every resolved field is None, the model is unknown
229
+ if (
230
+ resolved.thinking is None
231
+ and resolved.tools is None
232
+ and resolved.max_context_tokens is None
233
+ and resolved.claude_code_suitability is None
234
+ and resolved.cache_control is None
235
+ ):
236
+ in_registry = False
237
+ except Exception:
238
+ pass # defensive — never crash the probe loop
239
+
240
+ return DriftReport(
241
+ provider=provider.name,
242
+ configured_model=configured,
243
+ observed_model=observed_model,
244
+ in_registry=in_registry,
245
+ )
246
+
247
+
248
+ # ---------------------------------------------------------------------------
249
+ # probe_loop: background task
250
+ # ---------------------------------------------------------------------------
251
+
252
+
253
+ async def probe_loop(
254
+ providers: list[ProviderConfig],
255
+ *,
256
+ record_fn: Any = None,
257
+ interval_s: float = 60.0,
258
+ timeout_s: float = 10.0,
259
+ probe_paid: bool = False,
260
+ shutdown_event: asyncio.Event | None = None,
261
+ health_threshold: int = 3,
262
+ registry: Any = None,
263
+ ) -> None:
264
+ """Run continuous health probes in an infinite loop until shutdown.
265
+
266
+ Args:
267
+ providers: list of provider configs to probe.
268
+ record_fn: callable(provider_name, *, success, threshold) that
269
+ feeds the backend health state machine. When None, results
270
+ are only logged (useful for testing).
271
+ interval_s: seconds to sleep between probe rounds.
272
+ timeout_s: per-provider probe timeout.
273
+ probe_paid: if False, providers with ``paid=True`` are skipped.
274
+ shutdown_event: set this event to stop the loop gracefully.
275
+ health_threshold: consecutive-failure threshold passed to record_fn.
276
+ registry: optional CapabilityRegistry for model drift detection.
277
+ """
278
+ _shutdown = shutdown_event or asyncio.Event()
279
+
280
+ # Initial delay: let the server finish startup before first probe round.
281
+ try:
282
+ await asyncio.wait_for(_shutdown.wait(), timeout=interval_s)
283
+ return # shutdown during initial delay
284
+ except TimeoutError:
285
+ pass # normal: timeout means the delay elapsed without shutdown
286
+
287
+ while not _shutdown.is_set():
288
+ probed = 0
289
+ failures = 0
290
+
291
+ for provider in providers:
292
+ if _shutdown.is_set():
293
+ break
294
+ if provider.paid and not probe_paid:
295
+ continue
296
+
297
+ result = await probe_one(provider, timeout_s=timeout_s)
298
+ probed += 1
299
+
300
+ if not result.success:
301
+ failures += 1
302
+
303
+ # Feed into backend health state machine
304
+ if record_fn is not None:
305
+ with contextlib.suppress(Exception):
306
+ record_fn(
307
+ result.provider,
308
+ success=result.success,
309
+ threshold=health_threshold,
310
+ )
311
+
312
+ # Log individual result
313
+ log_probe_completed(
314
+ logger,
315
+ provider=result.provider,
316
+ success=result.success,
317
+ latency_ms=result.latency_ms,
318
+ error=result.error,
319
+ model_name=result.model_name,
320
+ )
321
+
322
+ # Check for model-capabilities drift on success
323
+ if result.success and result.model_name:
324
+ drift = check_probe_drift(
325
+ provider, result.model_name, registry=registry
326
+ )
327
+ if drift is not None:
328
+ log_probe_capabilities_drift(
329
+ logger,
330
+ provider=drift.provider,
331
+ configured_model=drift.configured_model,
332
+ observed_model=drift.observed_model,
333
+ in_registry=drift.in_registry,
334
+ )
335
+
336
+ # Log round summary
337
+ if probed > 0:
338
+ log_probe_round_completed(
339
+ logger,
340
+ providers_probed=probed,
341
+ failures=failures,
342
+ )
343
+
344
+ # Wait for next interval or shutdown
345
+ try:
346
+ await asyncio.wait_for(_shutdown.wait(), timeout=interval_s)
347
+ break # shutdown signaled
348
+ except TimeoutError:
349
+ pass # normal: sleep elapsed, start next round