coderouter-cli 1.10.1__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/CHANGELOG.md +138 -0
  2. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/PKG-INFO +11 -8
  3. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/README.en.md +8 -7
  4. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/README.md +10 -7
  5. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/cli_stats.py +48 -1
  6. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/config/schemas.py +189 -0
  7. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/data/model-capabilities.yaml +79 -0
  8. coderouter_cli-2.1.0/coderouter/guards/context_budget.py +376 -0
  9. coderouter_cli-2.1.0/coderouter/guards/continuous_probe.py +349 -0
  10. coderouter_cli-2.1.0/coderouter/guards/drift_actions.py +111 -0
  11. coderouter_cli-2.1.0/coderouter/guards/drift_detection.py +308 -0
  12. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/ingress/anthropic_routes.py +93 -12
  13. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/ingress/app.py +39 -0
  14. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/logging.py +351 -0
  15. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/metrics/collector.py +142 -2
  16. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/metrics/prometheus.py +212 -0
  17. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/routing/adaptive.py +23 -0
  18. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/routing/auto_router.py +2 -42
  19. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/routing/fallback.py +481 -4
  20. coderouter_cli-2.1.0/coderouter/token_estimation.py +161 -0
  21. coderouter_cli-2.1.0/docs/context-budget.md +175 -0
  22. coderouter_cli-2.1.0/docs/continuous-probing.md +102 -0
  23. coderouter_cli-2.1.0/docs/drift-detection.md +146 -0
  24. coderouter_cli-2.1.0/docs/gguf_dl.md +190 -0
  25. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/llamacpp-direct.md +8 -4
  26. coderouter_cli-2.1.0/docs/openrouter-roster/CHANGES.md +24 -0
  27. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/openrouter-roster/latest.json +37 -7
  28. coderouter_cli-2.1.0/docs/partial-stitch.md +71 -0
  29. coderouter_cli-2.1.0/examples/providers.v2-context-budget.yaml +111 -0
  30. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/examples/providers.yaml +9 -0
  31. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/pyproject.toml +1 -1
  32. coderouter_cli-2.1.0/tests/test_context_budget.py +404 -0
  33. coderouter_cli-2.1.0/tests/test_continuous_probe.py +511 -0
  34. coderouter_cli-2.1.0/tests/test_drift_actions.py +174 -0
  35. coderouter_cli-2.1.0/tests/test_drift_detection.py +365 -0
  36. coderouter_cli-2.1.0/tests/test_drift_detection_integration.py +528 -0
  37. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_ingress_anthropic.py +149 -0
  38. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_metrics_collector.py +66 -0
  39. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_metrics_prometheus.py +30 -0
  40. coderouter_cli-2.1.0/tests/test_partial_stitch.py +417 -0
  41. coderouter_cli-2.1.0/tests/test_token_estimation.py +135 -0
  42. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/.gitignore +0 -0
  43. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/LICENSE +0 -0
  44. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/__init__.py +0 -0
  45. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/__main__.py +0 -0
  46. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/adapters/__init__.py +0 -0
  47. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/adapters/anthropic_native.py +0 -0
  48. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/adapters/base.py +0 -0
  49. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/adapters/openai_compat.py +0 -0
  50. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/adapters/registry.py +0 -0
  51. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/cli.py +0 -0
  52. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/config/__init__.py +0 -0
  53. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/config/capability_registry.py +0 -0
  54. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/config/env_file.py +0 -0
  55. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/config/loader.py +0 -0
  56. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/cost.py +0 -0
  57. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/data/__init__.py +0 -0
  58. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/doctor.py +0 -0
  59. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/doctor_apply.py +0 -0
  60. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/env_security.py +0 -0
  61. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/errors.py +0 -0
  62. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/guards/__init__.py +0 -0
  63. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/guards/backend_health.py +0 -0
  64. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/guards/memory_pressure.py +0 -0
  65. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/guards/tool_loop.py +0 -0
  66. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/ingress/__init__.py +0 -0
  67. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/ingress/dashboard_routes.py +0 -0
  68. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/ingress/metrics_routes.py +0 -0
  69. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/ingress/openai_routes.py +0 -0
  70. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/metrics/__init__.py +0 -0
  71. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/output_filters.py +0 -0
  72. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/routing/__init__.py +0 -0
  73. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/routing/budget.py +0 -0
  74. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/routing/capability.py +0 -0
  75. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/translation/__init__.py +0 -0
  76. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/translation/anthropic.py +0 -0
  77. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/translation/convert.py +0 -0
  78. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/coderouter/translation/tool_repair.py +0 -0
  79. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/assets/dashboard-demo.png +0 -0
  80. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/designs/v1.5-dashboard-mockup.html +0 -0
  81. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/designs/v1.6-auto-router-verification.md +0 -0
  82. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/designs/v1.6-auto-router.md +0 -0
  83. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/free-tier-guide.en.md +0 -0
  84. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/free-tier-guide.md +0 -0
  85. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/hf-ollama-models.md +0 -0
  86. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/llamacpp-direct.en.md +0 -0
  87. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/lmstudio-direct.en.md +0 -0
  88. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/lmstudio-direct.md +0 -0
  89. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/openrouter-roster/README.md +0 -0
  90. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/quickstart.en.md +0 -0
  91. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/quickstart.md +0 -0
  92. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/retrospectives/v0.4.md +0 -0
  93. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/retrospectives/v0.5-verify.md +0 -0
  94. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/retrospectives/v0.5.md +0 -0
  95. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/retrospectives/v0.6.md +0 -0
  96. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/retrospectives/v0.7.md +0 -0
  97. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/retrospectives/v1.0-verify.md +0 -0
  98. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/retrospectives/v1.0.md +0 -0
  99. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/security.en.md +0 -0
  100. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/security.md +0 -0
  101. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/troubleshooting.en.md +0 -0
  102. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/troubleshooting.md +0 -0
  103. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/usage-guide.en.md +0 -0
  104. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/usage-guide.md +0 -0
  105. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/when-do-i-need-coderouter.en.md +0 -0
  106. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/docs/when-do-i-need-coderouter.md +0 -0
  107. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/examples/.env.example +0 -0
  108. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/examples/providers.auto-custom.yaml +0 -0
  109. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/examples/providers.auto.yaml +0 -0
  110. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/examples/providers.note-2026.yaml +0 -0
  111. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/examples/providers.nvidia-nim.yaml +0 -0
  112. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/examples/providers.raspberrypi.yaml +0 -0
  113. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/scripts/demo_traffic.sh +0 -0
  114. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/scripts/openrouter_roster_diff.py +0 -0
  115. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/scripts/verify_v0_5.sh +0 -0
  116. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/scripts/verify_v1_0.sh +0 -0
  117. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/__init__.py +0 -0
  118. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/conftest.py +0 -0
  119. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_adapter_anthropic.py +0 -0
  120. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_auto_router.py +0 -0
  121. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_backend_health.py +0 -0
  122. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_budget.py +0 -0
  123. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_capability.py +0 -0
  124. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_capability_degraded_payload.py +0 -0
  125. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_capability_registry.py +0 -0
  126. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_capability_registry_cache_control.py +0 -0
  127. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_claude_code_suitability.py +0 -0
  128. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_cli.py +0 -0
  129. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_cli_stats.py +0 -0
  130. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_config.py +0 -0
  131. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_dashboard_endpoint.py +0 -0
  132. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_doctor.py +0 -0
  133. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_doctor_apply.py +0 -0
  134. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_doctor_cache_probe.py +0 -0
  135. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_env_file.py +0 -0
  136. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_env_security.py +0 -0
  137. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_errors.py +0 -0
  138. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_examples_yaml.py +0 -0
  139. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_fallback.py +0 -0
  140. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_fallback_anthropic.py +0 -0
  141. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_fallback_cache_control.py +0 -0
  142. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_fallback_cache_observed.py +0 -0
  143. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_fallback_misconfig_warn.py +0 -0
  144. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_fallback_paid_gate.py +0 -0
  145. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_fallback_thinking.py +0 -0
  146. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_guards_tool_loop.py +0 -0
  147. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_ingress_profile.py +0 -0
  148. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_memory_pressure.py +0 -0
  149. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_metrics_cache.py +0 -0
  150. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_metrics_cost.py +0 -0
  151. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_metrics_endpoint.py +0 -0
  152. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_metrics_jsonl.py +0 -0
  153. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_metrics_prometheus_cache.py +0 -0
  154. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_openai_compat.py +0 -0
  155. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_openrouter_roster_diff.py +0 -0
  156. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_output_filters.py +0 -0
  157. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_output_filters_adapters.py +0 -0
  158. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_reasoning_strip.py +0 -0
  159. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_routing_adaptive.py +0 -0
  160. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_setup_sh.py +0 -0
  161. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_tool_repair.py +0 -0
  162. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_translation_anthropic.py +0 -0
  163. {coderouter_cli-1.10.1 → coderouter_cli-2.1.0}/tests/test_translation_reverse.py +0 -0
@@ -6,6 +6,144 @@ versioning follows [SemVer](https://semver.org/).
6
6
 
7
7
  ---
8
8
 
9
+ ## [v2.1.0] — 2026-05-05 (Long-run Reliability 完成 — v2.0-G/H/I)
10
+
11
+ **Theme: L4 品質劣化 / L6 mid-stream 失敗 / L5 idle 時障害の 3 系統を同時解決し、Long-run Reliability pillar を完成させる。** v2.0-F (L1 context overflow) と合わせ、6 系統障害のうち 4 系統を CodeRouter が能動的にガードする状態に到達。
12
+
13
+ ### v2.0-G: Drift Detection (L4 品質劣化ガード)
14
+
15
+ **長時間 agent session でモデル応答品質が徐々に劣化する "drift" を自動検知し、corrective action を実行。** Ollama ローカルモデルが数時間稼働すると KV cache 汚染や VRAM 圧迫で応答が空になる / 短くなる / tool_use を返さなくなる現象 (L4) を 5 つのシグナルで検知。warn → promote (chain 降格) → reload (Ollama KV flush) の 3 段階アクションで品質を自動回復。
16
+
17
+ | 機能 | 説明 |
18
+ |---|---|
19
+ | 5 Signal Detector | empty_response_rate / length_collapse / tool_silence_rate / stop_anomaly_rate / error_rate を per-provider rolling window で監視 |
20
+ | `detect_drift()` | Pure function — severity none/mild/severe 判定 (severe×1 or mild×2 → severe) |
21
+ | `drift_detection_action: off/warn/promote/reload` | profile 単位で guard 有効化 (default: off) |
22
+ | `drift_detection_sensitivity: low/normal/high` | 閾値プリセット選択 |
23
+ | promote action | AdaptiveAdjuster の rank demotion で traffic を別 provider へ迂回 |
24
+ | reload action | Ollama `keep_alive=0` で KV cache flush → fresh context で再開 |
25
+ | Cooldown & Recovery | 設定秒数後に rank 復帰 + window クリア |
26
+ | `X-CodeRouter-Drift` header | response header で mild/severe ステータスを通知 (streaming 対応) |
27
+ | Prometheus metrics | `coderouter_drift_detected_total`, `coderouter_drift_promoted_total`, `coderouter_drift_reload_total` |
28
+
29
+ - Tests: ~930 → **~970** (+40, drift_detection 27 + drift_integration 10 + drift_actions 5)
30
+ - Runtime deps: 5 → 5 (**36 sub-release 連続据え置き**)
31
+ - Backward compat: 完全互換、`drift_detection_action` default は `"off"` — opt-in するまで既存挙動完全一致
32
+
33
+ ### 設定例
34
+
35
+ ```yaml
36
+ profiles:
37
+ - name: long-session
38
+ providers: [ollama-qwen3]
39
+ drift_detection_action: reload # off | warn | promote | reload
40
+ drift_detection_sensitivity: normal # low | normal | high
41
+ drift_detection_window_size: 20 # rolling window サイズ
42
+ drift_detection_cooldown_s: 300 # 復帰までの待機秒数
43
+ ```
44
+
45
+ ### 新規ファイル
46
+
47
+ - `coderouter/guards/drift_detection.py` — 検知ロジック (observation model + detector + window manager)
48
+ - `coderouter/guards/drift_actions.py` — reload action (Ollama KV flush)
49
+ - `tests/test_drift_detection.py` — pure function tests (27 本)
50
+ - `tests/test_drift_detection_integration.py` — engine integration tests (10 本)
51
+ - `tests/test_drift_actions.py` — reload action tests (5 本)
52
+ - `docs/drift-detection.md` — ユーザードキュメント
53
+
54
+ ### v2.0-H: Mid-stream Partial Stitching (L6 拡張)
55
+
56
+ **streaming 応答が途中で失敗した際、蓄積済み��キストを破棄せずクライアントに返却。**
57
+
58
+ | 機能 | 説明 |
59
+ |---|---|
60
+ | `_StreamUsageAccumulator` text 蓄積 | content_block_start/delta/stop を追跡し text block を in-memory 蓄積 |
61
+ | `MidStreamError.partial_content` | 例外に蓄積テキストを搬送 (tool_use 部分 JSON は除外) |
62
+ | `partial_stitch_action: off/surface` | profile 単位で有効化 (default: off) |
63
+ | `event: coderouter_partial` | 蓄積テキスト + provider + reason を SSE メタデータとして返却 |
64
+ | Prometheus metric | `coderouter_partial_stitch_surfaced_total` |
65
+
66
+ ### v2.0-I: Continuous Probing (L5 能動ヘルスチェック)
67
+
68
+ **idle 時間帯のプロバイダ障害を能動的に検知し backend health state machine を更新。**
69
+
70
+ | 機能 | 説明 |
71
+ |---|---|
72
+ | `probe_one()` | 1-token completion で全 model pipeline の正常性を確認 |
73
+ | `probe_loop()` | asyncio background task — sequential probe + graceful shutdown |
74
+ | `continuous_probe: off/active` | グローバル config で有効化 (default: off) |
75
+ | Model drift detection | probe response の model 名と config を照合 → 不一致で warn |
76
+ | Prometheus metrics | `probe_total`, `probe_outcomes_total`, `probe_rounds_total`, `probe_latency_ms`, `probe_drift_detected_total` |
77
+
78
+ ### 全体サマリ
79
+
80
+ - Tests: ~930 → **~1005** (+75)
81
+ - Runtime deps: 5 → 5 (**38 sub-release 連続据え置き**)
82
+ - Backward compat: 完全互換、全機能 default off — opt-in するまで既���挙動完全一致
83
+
84
+ ---
85
+
86
+ ## [v2.0.0] — 2026-05-05 (Context Budget Management — L1 overflow 防止)
87
+
88
+ **Theme: 長時間 agent session の context overflow を未然に防止する guard を実装。** Claude Code / Cline / OpenClaw 等の agentic session が 8 時間超え loop で動くと messages が context window に漸近し、backend が 400 / truncation を返して session 死亡する問題 (L1) を根本解決。warn (80%) → auto trim (90%) の 2 段階 guard で overflow をゼロに。
89
+
90
+ | 機能 | 説明 |
91
+ |---|---|
92
+ | `estimate_context_usage()` | char/4 heuristic で request の context 充填率を推定 (5-deps 不変) |
93
+ | `trim_to_budget()` | 古い messages を先頭から削除、tool_use/tool_result ペアを tool_use_id ベースで atomic 保全 |
94
+ | `context_budget_action: off/warn/trim` | profile 単位で guard 有効化 (default: off) |
95
+ | `X-CodeRouter-Context-Budget` header | response header で warn/trimmed ステータスを通知 (streaming 対応) |
96
+ | Prometheus metrics | `coderouter_context_budget_warnings_total`, `coderouter_context_budget_trims_total`, `coderouter_context_budget_usage_ratio` |
97
+ | `coderouter stats` TUI | Fallback & Gates パネルに context budget warn/trim count + latest ratio 表示 |
98
+ | model-capabilities.yaml | 主要モデル (Claude 200K, Qwen3/3.5/3.6 32-131K, Gemma4 131K, DeepSeek 131K 等) の max_context_tokens bundled |
99
+
100
+ - Tests: 878 → **~930** (+50, token_estimation 13 + context_budget 22 + ingress header 5 + metrics 6 + prometheus 3)
101
+ - Runtime deps: 5 → 5 (**35 sub-release 連続据え置き**)
102
+ - Backward compat: 完全互換、`context_budget_action` default は `"off"` — opt-in するまで既存挙動完全一致
103
+
104
+ ### 設定例
105
+
106
+ ```yaml
107
+ profiles:
108
+ - name: long-session
109
+ providers: [ollama-qwen3]
110
+ context_budget_action: trim # off | warn | trim
111
+ context_budget_warn_threshold: 0.80 # ratio で警告
112
+ context_budget_trim_threshold: 0.90 # ratio で自動 trim
113
+ context_budget_trim_target: 0.75 # trim 後の目標充填率
114
+ context_budget_preserve_last_n: 4 # 直近 N messages は必ず保持
115
+
116
+ providers:
117
+ - name: ollama-qwen3
118
+ base_url: http://localhost:11434/v1
119
+ model: qwen3:30b-a3b
120
+ max_context_tokens: 32768 # 明示 (registry に乗ってない場合)
121
+ ```
122
+
123
+ ### Files touched (主要)
124
+
125
+ ```
126
+ A coderouter/token_estimation.py
127
+ A coderouter/guards/context_budget.py
128
+ M coderouter/config/schemas.py
129
+ M coderouter/routing/fallback.py
130
+ M coderouter/routing/auto_router.py
131
+ M coderouter/ingress/anthropic_routes.py
132
+ M coderouter/logging.py
133
+ M coderouter/metrics/collector.py
134
+ M coderouter/metrics/prometheus.py
135
+ M coderouter/cli_stats.py
136
+ M coderouter/data/model-capabilities.yaml
137
+ A tests/test_token_estimation.py
138
+ A tests/test_context_budget.py
139
+ M tests/test_ingress_anthropic.py
140
+ M tests/test_metrics_collector.py
141
+ M tests/test_metrics_prometheus.py
142
+ A docs/inside/v2.0-F-context-budget-plan.md
143
+ ```
144
+
145
+ ---
146
+
9
147
  ## [v1.10.1] — 2026-05-04 (Patch — tool-aware auto routing + Raspberry Pi starter)
10
148
 
11
149
  **Theme: 「ローカル小型モデルでは tool calling できないので tool-laden な request だけクラウドに逃がしたい」というユースケース (OpenClaw + Pi 8GB シナリオ) を declarative に解決。** v1.10.0 で feature complete を宣言した auto_router の 6 matcher を 7 matcher に拡張、`has_tools` を追加して「tools[] を宣言したリクエストか否か」で profile を分岐できるように。併せて Raspberry Pi 8GB 向けの starter YAML (`examples/providers.raspberrypi.yaml`) を同梱、SBC 上で OpenClaw / Claude Code 互換 agent を回すユーザーが yaml 1 個 copy するだけで動く状態にした。
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coderouter-cli
3
- Version: 1.10.1
3
+ Version: 2.1.0
4
4
  Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
5
5
  Project-URL: Homepage, https://github.com/zephel01/CodeRouter
6
6
  Project-URL: Repository, https://github.com/zephel01/CodeRouter
@@ -60,7 +60,7 @@ Description-Content-Type: text/markdown
60
60
  <p align="center">
61
61
  <a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
62
62
  <a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
63
- <a href=""><img src="https://img.shields.io/badge/version-1.10.0-blue" alt="version"></a>
63
+ <a href=""><img src="https://img.shields.io/badge/version-2.1.0-blue" alt="version"></a>
64
64
  <a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
65
65
  <a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
66
66
  <a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
@@ -90,7 +90,9 @@ Description-Content-Type: text/markdown
90
90
  - v1.9.0 から **adaptive routing** で「いま遅い provider」を自動降格(profile に `adaptive: true` を付けるだけ)、**tool-loop guard** で stuck loop を検出(`warn` / `inject` / `break` の 3 段階 policy)
91
91
  - **v1.10.0 で Long-run reliability pillar が完成**: `cost.monthly_budget_usd` で provider 月次 USD 予算を強制、**L2 memory pressure detector**(Ollama / LM Studio が VRAM 切れで OOM になった時に自動クールダウン)、**L5 backend health 状態機械**(連続失敗で UNHEALTHY → chain 末尾に降格、1 回成功で即復帰)
92
92
  - **v1.10.0 で auto-router が 6 matcher に揃う**: `has_image` / `code_fence_ratio_min` / `content_contains` / `content_regex` / `model_pattern`(Opus/Sonnet/Haiku 分岐)/ `content_token_count_min`(長文 → 1M ctx Gemini Flash 等へ自動切替)
93
- - ランタイム依存 5 個(`fastapi` / `uvicorn` / `httpx` / `pydantic` / `pyyaml`)— Python、MIT、テスト 871 本緑
93
+ - **v2.0.0 Context Budget Management (L1 overflow 防止) を搭載**: 長時間 agent session で messages が context window に漸近 → backend 400 エラーで session 死亡する問題を根本解決。warn (80%) → auto trim (90%) の 2 段階 guard で **context overflow ゼロ**を実現。tool_use / tool_result ペアは atomic 保全、`X-CodeRouter-Context-Budget` ヘッダで状態通知、Prometheus メトリクス完備
94
+ - **v2.1.0 で Long-run Reliability 3 機能を追加搭載**: **Drift Detection** (L4 品質劣化検知 — 5 シグナル rolling window + warn/promote/reload 3 段階アクション)、**Partial Stitching** (L6 mid-stream 失敗時の蓄積テキスト返却)、**Continuous Probing** (P3 idle 時 1-token 定期 probe + model drift 検知 + backend health 自動更新)
95
+ - ランタイム依存 5 個(`fastapi` / `uvicorn` / `httpx` / `pydantic` / `pyyaml`)— 純 Python、MIT、テスト 950 本緑
94
96
 
95
97
  → **Claude Code / gemini-cli / codex + Ollama / llama.cpp / NVIDIA NIM で、破綻しない local-first agent が組める**
96
98
 
@@ -102,11 +104,12 @@ Description-Content-Type: text/markdown
102
104
  | **使いこなす** | [利用ガイド](./docs/usage-guide.md) | HW 別モデル選定・チューニング既定値・OS ごとの起動フロー・`doctor` / `verify` の読み方 |
103
105
  | **無料で回す** | [無料枠ガイド](./docs/free-tier-guide.md) | NVIDIA NIM 40 req/min × OpenRouter 無料枠の使い分け・live 検証済みモデル表・地雷 5 点 |
104
106
  | **要るか判断する** | [要否判定ガイド](./docs/when-do-i-need-coderouter.md) | エージェント × モデルの詳細マトリクスで「そもそも自分に必要か」を決める |
107
+ | **長時間 session** | [Context Budget](./docs/context-budget.md) | v2.0.0 新機能。長時間 agent session の context overflow を自動防止する guard の設定・仕組み・可観測性 |
105
108
  | **詰まったとき** | [トラブルシューティング](./docs/troubleshooting.md) | `doctor` の使い方、`.env` の export 必須、Ollama サイレント失敗 5 症状、Claude Code 連携の罠 |
106
109
  | **llama.cpp 直叩き** | [llama.cpp 直叩きガイド](./docs/llamacpp-direct.md) | Qwen3.6 を Ollama 詰みから救出する経路。`llama.cpp` build → Unsloth GGUF → `llama-server` → CodeRouter 接続を 7 step で(v1.8.3 実機検証済)|
107
110
  | **LM Studio 直接** | [LM Studio 直接ガイド](./docs/lmstudio-direct.md) | `qwen35` / `qwen35moe` を救う第 2 経路。LM Studio 0.4.12+ Local Server 経由で OpenAI 互換 + Anthropic 互換 (`/v1/messages`) 両対応、prompt caching 透過(v1.8.4 実機検証済)|
108
111
  | **安全に使う** | [セキュリティ方針](./docs/security.md) | 脅威モデル・秘密情報の扱い・脆弱性報告経路 |
109
- | **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.10.0 — Cost enforcement (`monthly_budget_usd`) + Long-run reliability completion (L2 memory pressure / L5 backend health) + Auto-router feature complete (6 matcher) を 1 minor で出荷) |
112
+ | **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v2.0.0 — Context Budget Management (L1 overflow 防止) で長時間 agent session の安定性を根本改善) |
110
113
  | **設計を追う** | [plan.md](./plan.md) | 設計不変項・マイルストーン・今後のロードマップ |
111
114
 
112
115
  English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [llama.cpp direct](./docs/llamacpp-direct.en.md) · [LM Studio direct](./docs/lmstudio-direct.en.md) · [Security](./docs/security.en.md)
@@ -122,7 +125,7 @@ CodeRouter は、コーディングエージェント(Claude Code / gemini-cli
122
125
  - **うっかり課金しない。** `ALLOW_PAID=false` が既定。有料プロバイダをチェーンから外したときは理由を 1 行ログに出すので、なぜ使われなかったかが後で grep できます。
123
126
  - **ローカル Ollama の上で Claude Code / gemini-cli / codex が動く。** Claude Code は Anthropic のワイアフォーマット、Ollama / llama.cpp / LM Studio は OpenAI。CodeRouter が双方向に変換し、小さいローカルモデルがテキストで吐いてしまう `{"name":..., "arguments":...}` を tool_use ブロックへ復元してからエージェントに渡します。
124
127
  - **「なぜか動かない」の原因を教えてくれる。** `coderouter doctor --check-model <provider>` が 6 種類の典型的な失敗モード(コンテキスト切り詰め / ストリーム早期終了 / ツール呼び出し欠落 / reasoning フィールド漏れ / 認証 / Anthropic `thinking`)を実地プローブし、コピペ可能な YAML パッチを出します。
125
- - **監査しやすい。** ランタイム依存 5 個(LiteLLM は 100+)。Pure Python、MIT、テスト 871 本緑。
128
+ - **監査しやすい。** ランタイム依存 5 個(LiteLLM は 100+)。Pure Python、MIT、テスト 930 本緑。
126
129
 
127
130
  ```
128
131
  クライアント (Claude Code / OpenAI SDK / gemini-cli / codex / curl)
@@ -196,7 +199,7 @@ CodeRouter / Voice Bridge ともに独立した repo で進化していて、HTT
196
199
 
197
200
  ## クイックスタート(3 コマンド)
198
201
 
199
- **v1.7.0 で PyPI 公開**、**v1.8.0 で用途別 4 プロファイル + Z.AI/GLM 連携**、**v1.9.0 で Cache observability / Adaptive routing / Cost-aware dashboard / Tool-loop guard を pillar 化**、**v1.10.0 Cost enforcement (`monthly_budget_usd`) / Long-run reliability (L2 memory pressure + L5 backend health) / Auto-router 6 matcher feature complete を出荷**しました。`uvx` 一発で動きます (Python 3.12 以上必須):
202
+ **v2.1.0 で Long-run Reliability pillar 完成** Context Budget (L1) + Drift Detection (L4) + Partial Stitching (L6) + Continuous Probing (P3)。`uvx` 一発で動きます (Python 3.12 以上必須):
200
203
 
201
204
  ```bash
202
205
  # 1. サンプル設定を置く
@@ -264,9 +267,9 @@ CodeRouter 自体は純 Python 3.12+ で、実質的な OS 対応範囲は `min(
264
267
 
265
268
  注意点や「ローカル GPU なし」向けレシピを含むフル版マトリクス: [利用ガイド §1](./docs/usage-guide.md#1-os-互換性)
266
269
 
267
- ## ステータス — v1.10.0 minor (2026-05)
270
+ ## ステータス — v2.1.0 (2026-05)
268
271
 
269
- **テスト 871 本通過。ランタイム依存 5 個 (33 sub-release 連続据え置き)。macOS / Linux / Windows WSL2 で動作。** ルーターは日常的な Claude Code 用途で安定し、v1.10.0 **Vision pillar P2 Long-run Reliability** が完成 (L2/L3/L5)、**Cost pillar** が観測 → 制約まで閉じる、**Auto-router** 6 matcher で feature complete に到達しました。v1.0 の総まとめは [`docs/retrospectives/v1.0.md`](./docs/retrospectives/v1.0.md)。
272
+ **テスト 950 本通過。ランタイム依存 5 個 (39 sub-release 連続据え置き)。macOS / Linux / Windows WSL2 で動作。** v2.1.0 **Long-run Reliability pillar が完成** — Context Budget (L1) / Drift Detection (L4) / Partial Stitching (L6) / Continuous Probing (P3) の 4 sub-release を統合出荷。v1.10.0 までに **Long-run Reliability** (L2/L3/L5)、**Cost pillar**、**Auto-router 6 matcher** が完成済み。v1.0 の総まとめは [`docs/retrospectives/v1.0.md`](./docs/retrospectives/v1.0.md)。
270
273
 
271
274
  今日の CodeRouter が届ける価値:
272
275
 
@@ -20,7 +20,7 @@
20
20
  <p align="center">
21
21
  <a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
22
22
  <a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
23
- <a href=""><img src="https://img.shields.io/badge/version-1.10.0-blue" alt="version"></a>
23
+ <a href=""><img src="https://img.shields.io/badge/version-2.0.0-blue" alt="version"></a>
24
24
  <a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
25
25
  <a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
26
26
  <a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
@@ -49,7 +49,8 @@
49
49
  - v1.9.0 ships **adaptive routing** that auto-demotes a temporarily-slow provider (set `adaptive: true` on a profile) and a **tool-loop guard** that catches stuck-loop patterns with a 3-tier policy (`warn` / `inject` / `break`)
50
50
  - **v1.10.0 completes the long-run reliability pillar**: `cost.monthly_budget_usd` enforces a per-provider monthly USD cap, the **L2 memory-pressure detector** automatically cools down a backend when Ollama / LM Studio reports VRAM exhaustion, and the **L5 backend-health state machine** demotes UNHEALTHY providers to the back of the chain (consecutive-failure threshold, single-success recovery)
51
51
  - **v1.10.0 brings the auto-router to 6 matchers**: `has_image` / `code_fence_ratio_min` / `content_contains` / `content_regex` / `model_pattern` (Opus/Sonnet/Haiku branching) / `content_token_count_min` (long prompts → 1M-ctx Gemini Flash, etc.)
52
- - Five runtime dependencies (`fastapi` / `uvicorn` / `httpx` / `pydantic` / `pyyaml`)pure Python, MIT, 871 tests green
52
+ - **v2.0.0 ships Context Budget Management (L1 overflow prevention)**: long-running agent sessions approaching the context window are automatically trimmed warn at 80%, auto-trim at 90%, tool_use/tool_result pairs preserved atomically, `X-CodeRouter-Context-Budget` response header, Prometheus metrics included
53
+ - Five runtime dependencies (`fastapi` / `uvicorn` / `httpx` / `pydantic` / `pyyaml`) — pure Python, MIT, 930 tests green
53
54
 
54
55
  → **Claude Code / gemini-cli / codex on top of Ollama / llama.cpp / NVIDIA NIM, without the agent falling apart.**
55
56
 
@@ -65,7 +66,7 @@
65
66
  | **llama.cpp direct** | [llama.cpp direct guide](./docs/llamacpp-direct.en.md) | Rescue path for Qwen3.6 (Ollama is brittle). 7-step recipe: `llama.cpp` build → Unsloth GGUF → `llama-server` → CodeRouter wiring. Real-machine verified in v1.8.3. |
66
67
  | **LM Studio direct** | [LM Studio direct guide](./docs/lmstudio-direct.en.md) | Second rescue path for `qwen35` / `qwen35moe`. LM Studio 0.4.12+ Local Server with both OpenAI-compatible and Anthropic-compatible (`/v1/messages`) routes — prompt caching survives end-to-end. Real-machine verified in v1.8.4. |
67
68
  | **Operate safely** | [Security](./docs/security.en.md) | Threat model, secret handling, vulnerability reporting |
68
- | **History** | [CHANGELOG](./CHANGELOG.md) | All releases (latest: v1.10.0 — Cost enforcement (`monthly_budget_usd`) + Long-run reliability completion (L2 memory pressure / L5 backend health) + Auto-router feature complete (6 matchers) shipped in one minor) |
69
+ | **History** | [CHANGELOG](./CHANGELOG.md) | All releases (latest: v2.0.0 — Context Budget Management (L1 overflow prevention) for rock-solid long-running agent sessions) |
69
70
  | **Track the design** | [plan.md](./plan.md) | Design invariants, milestones, roadmap |
70
71
 
71
72
  日本語版: [Quickstart](./docs/quickstart.md) · [利用ガイド](./docs/usage-guide.md) · [無料枠ガイド](./docs/free-tier-guide.md) · [要否判定](./docs/when-do-i-need-coderouter.md) · [トラブルシューティング](./docs/troubleshooting.md) · [LM Studio 直接](./docs/lmstudio-direct.md) · [Security](./docs/security.md)
@@ -81,7 +82,7 @@ Concretely, it takes care of things most beginners hit the hard way:
81
82
  - **No surprise bill.** `ALLOW_PAID=false` is the default; when CodeRouter drops a paid provider from the chain it logs one clear line so you can see why.
82
83
  - **Use Claude Code / gemini-cli / codex on top of local Ollama.** Claude Code speaks Anthropic wire format, Ollama / llama.cpp / LM Studio speak OpenAI. CodeRouter translates both directions, and repairs the malformed `{"name":..., "arguments":...}` JSON that small local models emit as plain text.
83
84
  - **Know *why* your local model is acting weird.** `coderouter doctor --check-model <provider>` probes six common failure modes (context truncation, streaming cutoff, missing tool-use, reasoning leaks, auth, Anthropic `thinking`) and prints a copy-paste YAML patch.
84
- - **Auditable.** 5 runtime dependencies (vs. 100+ for LiteLLM). Pure Python, MIT, 871 tests passing.
85
+ - **Auditable.** 5 runtime dependencies (vs. 100+ for LiteLLM). Pure Python, MIT, 930 tests passing.
85
86
 
86
87
  ```
87
88
  Client (Claude Code / OpenAI SDK / gemini-cli / codex / curl)
@@ -155,7 +156,7 @@ CodeRouter and Voice Bridge live in separate repos and evolve independently, con
155
156
 
156
157
  ## Quickstart (2 commands)
157
158
 
158
- **v1.7.0 published to PyPI**, **v1.8.0 added use-case-aware 4 profiles + Z.AI/GLM integration**, **v1.9.0 promoted cache observability / adaptive routing / cost-aware dashboard / tool-loop guard to first-class pillars**, **v1.10.0 shipped Cost enforcement (`monthly_budget_usd`) / Long-run reliability completion (L2 memory pressure + L5 backend health) / Auto-router feature complete (6 matchers)**. `uvx` installs and runs in one shot (Python 3.12+ required):
159
+ **v2.0.0 ships Context Budget Management (L1 overflow prevention)** long-running agent sessions that approach the context window are automatically trimmed to prevent session death. `uvx` installs and runs in one shot (Python 3.12+ required):
159
160
 
160
161
  ```bash
161
162
  # 1. Drop a sample config
@@ -224,9 +225,9 @@ CodeRouter is pure Python 3.12+; OS support is effectively `min(coderouter, olla
224
225
 
225
226
  Full matrix with caveats and the "no local GPU" recipe: [usage guide §1](./docs/usage-guide.en.md#1-os-compatibility).
226
227
 
227
- ## Status — v1.10.0 minor (2026-05)
228
+ ## Status — v2.0.0 (2026-05)
228
229
 
229
- **871 tests pass. 5 runtime dependencies (33 sub-releases held to the same 5).** Works on macOS / Linux / Windows WSL2. The router is stable for day-to-day Claude Code use, and v1.10.0 closes out the **Vision pillar P2 (Long-run reliability, L2/L3/L5)**, the **Cost pillar (observation → enforcement)**, and **Auto-router feature complete (6 matchers)**. The v1.0 wrap-up is in [`docs/retrospectives/v1.0.md`](./docs/retrospectives/v1.0.md).
230
+ **930 tests pass. 5 runtime dependencies (36 sub-releases held to the same 5).** Works on macOS / Linux / Windows WSL2. v2.0.0 ships **Context Budget Management (L1 overflow prevention)** the final piece for rock-solid long-running agent sessions. Previous milestones: **Long-run Reliability** (L2/L3/L5), **Cost pillar**, **Auto-router 6 matchers**. The v1.0 wrap-up is in [`docs/retrospectives/v1.0.md`](./docs/retrospectives/v1.0.md).
230
231
 
231
232
  What CodeRouter can do for you today:
232
233
 
@@ -19,7 +19,7 @@
19
19
  <p align="center">
20
20
  <a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
21
21
  <a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
22
- <a href=""><img src="https://img.shields.io/badge/version-1.10.0-blue" alt="version"></a>
22
+ <a href=""><img src="https://img.shields.io/badge/version-2.1.0-blue" alt="version"></a>
23
23
  <a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
24
24
  <a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
25
25
  <a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
@@ -49,7 +49,9 @@
49
49
  - v1.9.0 から **adaptive routing** で「いま遅い provider」を自動降格(profile に `adaptive: true` を付けるだけ)、**tool-loop guard** で stuck loop を検出(`warn` / `inject` / `break` の 3 段階 policy)
50
50
  - **v1.10.0 で Long-run reliability pillar が完成**: `cost.monthly_budget_usd` で provider 月次 USD 予算を強制、**L2 memory pressure detector**(Ollama / LM Studio が VRAM 切れで OOM になった時に自動クールダウン)、**L5 backend health 状態機械**(連続失敗で UNHEALTHY → chain 末尾に降格、1 回成功で即復帰)
51
51
  - **v1.10.0 で auto-router が 6 matcher に揃う**: `has_image` / `code_fence_ratio_min` / `content_contains` / `content_regex` / `model_pattern`(Opus/Sonnet/Haiku 分岐)/ `content_token_count_min`(長文 → 1M ctx Gemini Flash 等へ自動切替)
52
- - ランタイム依存 5 個(`fastapi` / `uvicorn` / `httpx` / `pydantic` / `pyyaml`)— Python、MIT、テスト 871 本緑
52
+ - **v2.0.0 Context Budget Management (L1 overflow 防止) を搭載**: 長時間 agent session で messages が context window に漸近 → backend 400 エラーで session 死亡する問題を根本解決。warn (80%) → auto trim (90%) の 2 段階 guard で **context overflow ゼロ**を実現。tool_use / tool_result ペアは atomic 保全、`X-CodeRouter-Context-Budget` ヘッダで状態通知、Prometheus メトリクス完備
53
+ - **v2.1.0 で Long-run Reliability 3 機能を追加搭載**: **Drift Detection** (L4 品質劣化検知 — 5 シグナル rolling window + warn/promote/reload 3 段階アクション)、**Partial Stitching** (L6 mid-stream 失敗時の蓄積テキスト返却)、**Continuous Probing** (P3 idle 時 1-token 定期 probe + model drift 検知 + backend health 自動更新)
54
+ - ランタイム依存 5 個(`fastapi` / `uvicorn` / `httpx` / `pydantic` / `pyyaml`)— 純 Python、MIT、テスト 950 本緑
53
55
 
54
56
  → **Claude Code / gemini-cli / codex + Ollama / llama.cpp / NVIDIA NIM で、破綻しない local-first agent が組める**
55
57
 
@@ -61,11 +63,12 @@
61
63
  | **使いこなす** | [利用ガイド](./docs/usage-guide.md) | HW 別モデル選定・チューニング既定値・OS ごとの起動フロー・`doctor` / `verify` の読み方 |
62
64
  | **無料で回す** | [無料枠ガイド](./docs/free-tier-guide.md) | NVIDIA NIM 40 req/min × OpenRouter 無料枠の使い分け・live 検証済みモデル表・地雷 5 点 |
63
65
  | **要るか判断する** | [要否判定ガイド](./docs/when-do-i-need-coderouter.md) | エージェント × モデルの詳細マトリクスで「そもそも自分に必要か」を決める |
66
+ | **長時間 session** | [Context Budget](./docs/context-budget.md) | v2.0.0 新機能。長時間 agent session の context overflow を自動防止する guard の設定・仕組み・可観測性 |
64
67
  | **詰まったとき** | [トラブルシューティング](./docs/troubleshooting.md) | `doctor` の使い方、`.env` の export 必須、Ollama サイレント失敗 5 症状、Claude Code 連携の罠 |
65
68
  | **llama.cpp 直叩き** | [llama.cpp 直叩きガイド](./docs/llamacpp-direct.md) | Qwen3.6 を Ollama 詰みから救出する経路。`llama.cpp` build → Unsloth GGUF → `llama-server` → CodeRouter 接続を 7 step で(v1.8.3 実機検証済)|
66
69
  | **LM Studio 直接** | [LM Studio 直接ガイド](./docs/lmstudio-direct.md) | `qwen35` / `qwen35moe` を救う第 2 経路。LM Studio 0.4.12+ Local Server 経由で OpenAI 互換 + Anthropic 互換 (`/v1/messages`) 両対応、prompt caching 透過(v1.8.4 実機検証済)|
67
70
  | **安全に使う** | [セキュリティ方針](./docs/security.md) | 脅威モデル・秘密情報の扱い・脆弱性報告経路 |
68
- | **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.10.0 — Cost enforcement (`monthly_budget_usd`) + Long-run reliability completion (L2 memory pressure / L5 backend health) + Auto-router feature complete (6 matcher) を 1 minor で出荷) |
71
+ | **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v2.0.0 — Context Budget Management (L1 overflow 防止) で長時間 agent session の安定性を根本改善) |
69
72
  | **設計を追う** | [plan.md](./plan.md) | 設計不変項・マイルストーン・今後のロードマップ |
70
73
 
71
74
  English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [llama.cpp direct](./docs/llamacpp-direct.en.md) · [LM Studio direct](./docs/lmstudio-direct.en.md) · [Security](./docs/security.en.md)
@@ -81,7 +84,7 @@ CodeRouter は、コーディングエージェント(Claude Code / gemini-cli
81
84
  - **うっかり課金しない。** `ALLOW_PAID=false` が既定。有料プロバイダをチェーンから外したときは理由を 1 行ログに出すので、なぜ使われなかったかが後で grep できます。
82
85
  - **ローカル Ollama の上で Claude Code / gemini-cli / codex が動く。** Claude Code は Anthropic のワイアフォーマット、Ollama / llama.cpp / LM Studio は OpenAI。CodeRouter が双方向に変換し、小さいローカルモデルがテキストで吐いてしまう `{"name":..., "arguments":...}` を tool_use ブロックへ復元してからエージェントに渡します。
83
86
  - **「なぜか動かない」の原因を教えてくれる。** `coderouter doctor --check-model <provider>` が 6 種類の典型的な失敗モード(コンテキスト切り詰め / ストリーム早期終了 / ツール呼び出し欠落 / reasoning フィールド漏れ / 認証 / Anthropic `thinking`)を実地プローブし、コピペ可能な YAML パッチを出します。
84
- - **監査しやすい。** ランタイム依存 5 個(LiteLLM は 100+)。Pure Python、MIT、テスト 871 本緑。
87
+ - **監査しやすい。** ランタイム依存 5 個(LiteLLM は 100+)。Pure Python、MIT、テスト 930 本緑。
85
88
 
86
89
  ```
87
90
  クライアント (Claude Code / OpenAI SDK / gemini-cli / codex / curl)
@@ -155,7 +158,7 @@ CodeRouter / Voice Bridge ともに独立した repo で進化していて、HTT
155
158
 
156
159
  ## クイックスタート(3 コマンド)
157
160
 
158
- **v1.7.0 で PyPI 公開**、**v1.8.0 で用途別 4 プロファイル + Z.AI/GLM 連携**、**v1.9.0 で Cache observability / Adaptive routing / Cost-aware dashboard / Tool-loop guard を pillar 化**、**v1.10.0 Cost enforcement (`monthly_budget_usd`) / Long-run reliability (L2 memory pressure + L5 backend health) / Auto-router 6 matcher feature complete を出荷**しました。`uvx` 一発で動きます (Python 3.12 以上必須):
161
+ **v2.1.0 で Long-run Reliability pillar 完成** Context Budget (L1) + Drift Detection (L4) + Partial Stitching (L6) + Continuous Probing (P3)。`uvx` 一発で動きます (Python 3.12 以上必須):
159
162
 
160
163
  ```bash
161
164
  # 1. サンプル設定を置く
@@ -223,9 +226,9 @@ CodeRouter 自体は純 Python 3.12+ で、実質的な OS 対応範囲は `min(
223
226
 
224
227
  注意点や「ローカル GPU なし」向けレシピを含むフル版マトリクス: [利用ガイド §1](./docs/usage-guide.md#1-os-互換性)
225
228
 
226
- ## ステータス — v1.10.0 minor (2026-05)
229
+ ## ステータス — v2.1.0 (2026-05)
227
230
 
228
- **テスト 871 本通過。ランタイム依存 5 個 (33 sub-release 連続据え置き)。macOS / Linux / Windows WSL2 で動作。** ルーターは日常的な Claude Code 用途で安定し、v1.10.0 **Vision pillar P2 Long-run Reliability** が完成 (L2/L3/L5)、**Cost pillar** が観測 → 制約まで閉じる、**Auto-router** 6 matcher で feature complete に到達しました。v1.0 の総まとめは [`docs/retrospectives/v1.0.md`](./docs/retrospectives/v1.0.md)。
231
+ **テスト 950 本通過。ランタイム依存 5 個 (39 sub-release 連続据え置き)。macOS / Linux / Windows WSL2 で動作。** v2.1.0 **Long-run Reliability pillar が完成** — Context Budget (L1) / Drift Detection (L4) / Partial Stitching (L6) / Continuous Probing (P3) の 4 sub-release を統合出荷。v1.10.0 までに **Long-run Reliability** (L2/L3/L5)、**Cost pillar**、**Auto-router 6 matcher** が完成済み。v1.0 の総まとめは [`docs/retrospectives/v1.0.md`](./docs/retrospectives/v1.0.md)。
229
232
 
230
233
  今日の CodeRouter が届ける価値:
231
234
 
@@ -112,6 +112,10 @@ class GatesSummary:
112
112
  degraded_breakdown: dict[str, int] # capability → count
113
113
  filters_applied_total: int
114
114
  filters_breakdown: dict[str, int] # filter name → count
115
+ # v2.0-F (L1): context budget guard summary
116
+ context_budget_warnings: int = 0
117
+ context_budget_trims: int = 0
118
+ context_budget_latest_ratio: dict[str, float] | None = None
115
119
 
116
120
 
117
121
  @dataclass(frozen=True)
@@ -252,6 +256,8 @@ def build_gates_summary(snapshot: dict[str, Any]) -> GatesSummary:
252
256
  )
253
257
  degraded_breakdown = dict(counters.get("capability_degraded", {}) or {})
254
258
  filters_breakdown = dict(counters.get("output_filter_applied", {}) or {})
259
+ # v2.0-F (L1): context budget guard counters
260
+ ctx_budget_latest = counters.get("context_budget_latest_ratio") or {}
255
261
  return GatesSummary(
256
262
  total_requests=total_requests,
257
263
  total_failed=total_failed,
@@ -261,6 +267,13 @@ def build_gates_summary(snapshot: dict[str, Any]) -> GatesSummary:
261
267
  degraded_breakdown=degraded_breakdown,
262
268
  filters_applied_total=sum(filters_breakdown.values()),
263
269
  filters_breakdown=filters_breakdown,
270
+ context_budget_warnings=int(
271
+ counters.get("context_budget_warnings_total", 0)
272
+ ),
273
+ context_budget_trims=int(
274
+ counters.get("context_budget_trims_total", 0)
275
+ ),
276
+ context_budget_latest_ratio=ctx_budget_latest if ctx_budget_latest else None,
264
277
  )
265
278
 
266
279
 
@@ -397,6 +410,19 @@ def format_text(snapshot: dict[str, Any], *, width: int = 80) -> str:
397
410
  else ""
398
411
  )
399
412
  )
413
+ # v2.0-F (L1): context budget guard stats
414
+ if gates.context_budget_warnings or gates.context_budget_trims:
415
+ ratio_str = ""
416
+ if gates.context_budget_latest_ratio:
417
+ top_profile = max(
418
+ gates.context_budget_latest_ratio,
419
+ key=gates.context_budget_latest_ratio.get, # type: ignore[arg-type]
420
+ )
421
+ ratio_str = f" (latest: {gates.context_budget_latest_ratio[top_profile]:.0%} {top_profile})"
422
+ lines.append(
423
+ f" context-budget warn: {gates.context_budget_warnings} "
424
+ f"trim: {gates.context_budget_trims}{ratio_str}"
425
+ )
400
426
  lines.append("")
401
427
  lines.append("Recent")
402
428
  if not recent:
@@ -633,7 +659,28 @@ def _draw_frame( # pragma: no cover - curses-only
633
659
  + (f" ({_fmt_breakdown(gates.filters_breakdown)})" if gates.filters_breakdown else ""),
634
660
  width,
635
661
  )
636
- row += 2
662
+ row += 1
663
+ # v2.0-F (L1): context budget guard line
664
+ if gates.context_budget_warnings or gates.context_budget_trims:
665
+ ratio_str = ""
666
+ if gates.context_budget_latest_ratio:
667
+ top_profile = max(
668
+ gates.context_budget_latest_ratio,
669
+ key=gates.context_budget_latest_ratio.get, # type: ignore[arg-type]
670
+ )
671
+ ratio_str = f" (latest: {gates.context_budget_latest_ratio[top_profile]:.0%} {top_profile})"
672
+ budget_line = (
673
+ f" context-budget warn: {gates.context_budget_warnings} "
674
+ f"trim: {gates.context_budget_trims}{ratio_str}"
675
+ )
676
+ budget_color = (
677
+ _COLOR_YELLOW_PAIR
678
+ if gates.context_budget_trims == 0
679
+ else _COLOR_RED_PAIR
680
+ )
681
+ stdscr.addnstr(row, 0, budget_line, width, int(curses.color_pair(budget_color)))
682
+ row += 1
683
+ row += 1
637
684
 
638
685
  if row >= height - 2:
639
686
  return