coderouter-cli 2.5.0__tar.gz → 2.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/CHANGELOG.md +67 -0
  2. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/PKG-INFO +17 -17
  3. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/README.en.md +16 -16
  4. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/README.md +16 -16
  5. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/cli.py +2 -2
  6. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/config/capability_registry.py +1 -1
  7. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/data/model-capabilities.yaml +2 -2
  8. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/ingress/launcher_routes.py +54 -12
  9. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/logging.py +2 -2
  10. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/routing/capability.py +1 -1
  11. coderouter_cli-2.5.2/docs/README.md +94 -0
  12. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/backends}/hf-ollama-models.md +2 -2
  13. coderouter_cli-2.5.2/docs/backends/install-backends.en.md +208 -0
  14. coderouter_cli-2.5.2/docs/backends/install-backends.md +208 -0
  15. coderouter_cli-2.5.2/docs/backends/launcher-quickstart.md +143 -0
  16. coderouter_cli-2.5.2/docs/backends/launcher.md +323 -0
  17. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/backends}/llamacpp-direct.en.md +3 -3
  18. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/backends}/llamacpp-direct.md +4 -4
  19. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/backends}/lmstudio-direct.en.md +3 -3
  20. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/backends}/lmstudio-direct.md +3 -3
  21. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/concepts}/architecture.md +2 -2
  22. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/docs/designs/v1.6-auto-router.md +1 -1
  23. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/guides}/free-tier-guide.en.md +5 -5
  24. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/guides}/free-tier-guide.md +7 -7
  25. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/guides}/troubleshooting.en.md +1 -1
  26. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/guides}/troubleshooting.md +1 -1
  27. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/guides}/usage-guide.en.md +5 -5
  28. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/guides}/usage-guide.md +4 -4
  29. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/start}/quickstart.en.md +9 -9
  30. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/start}/quickstart.md +9 -9
  31. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/start}/when-do-i-need-coderouter.en.md +1 -1
  32. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/start}/when-do-i-need-coderouter.md +1 -1
  33. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/examples/providers.nvidia-nim.yaml +1 -1
  34. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/examples/providers.yaml +3 -3
  35. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/pyproject.toml +1 -1
  36. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_examples_yaml.py +1 -1
  37. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_setup_sh.py +1 -1
  38. coderouter_cli-2.5.0/docs/launcher-gui.md +0 -200
  39. coderouter_cli-2.5.0/docs/launcher-quickstart.md +0 -187
  40. coderouter_cli-2.5.0/docs/launcher.md +0 -288
  41. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/.gitignore +0 -0
  42. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/LICENSE +0 -0
  43. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/__init__.py +0 -0
  44. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/__main__.py +0 -0
  45. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/adapters/__init__.py +0 -0
  46. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/adapters/anthropic_native.py +0 -0
  47. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/adapters/base.py +0 -0
  48. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/adapters/openai_compat.py +0 -0
  49. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/adapters/registry.py +0 -0
  50. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/cli_stats.py +0 -0
  51. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/config/__init__.py +0 -0
  52. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/config/env_file.py +0 -0
  53. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/config/loader.py +0 -0
  54. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/config/schemas.py +0 -0
  55. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/cost.py +0 -0
  56. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/data/__init__.py +0 -0
  57. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/doctor.py +0 -0
  58. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/doctor_apply.py +0 -0
  59. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/env_security.py +0 -0
  60. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/errors.py +0 -0
  61. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/guards/__init__.py +0 -0
  62. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/guards/_fingerprint.py +0 -0
  63. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/guards/backend_health.py +0 -0
  64. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/guards/context_budget.py +0 -0
  65. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/guards/continuous_probe.py +0 -0
  66. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/guards/drift_actions.py +0 -0
  67. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/guards/drift_detection.py +0 -0
  68. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/guards/memory_pressure.py +0 -0
  69. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/guards/self_healing.py +0 -0
  70. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/guards/tool_loop.py +0 -0
  71. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/ingress/__init__.py +0 -0
  72. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/ingress/anthropic_routes.py +0 -0
  73. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/ingress/app.py +0 -0
  74. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/ingress/dashboard_routes.py +0 -0
  75. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/ingress/metrics_routes.py +0 -0
  76. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/ingress/openai_routes.py +0 -0
  77. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/metrics/__init__.py +0 -0
  78. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/metrics/collector.py +0 -0
  79. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/metrics/prometheus.py +0 -0
  80. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/output_filters.py +0 -0
  81. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/plugins/__init__.py +0 -0
  82. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/plugins/base.py +0 -0
  83. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/plugins/loader.py +0 -0
  84. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/plugins/registry.py +0 -0
  85. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/routing/__init__.py +0 -0
  86. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/routing/adaptive.py +0 -0
  87. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/routing/auto_router.py +0 -0
  88. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/routing/budget.py +0 -0
  89. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/routing/fallback.py +0 -0
  90. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/state/__init__.py +0 -0
  91. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/state/audit_log.py +0 -0
  92. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/state/replay.py +0 -0
  93. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/state/request_log.py +0 -0
  94. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/state/store.py +0 -0
  95. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/state/suggest_rules.py +0 -0
  96. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/token_estimation.py +0 -0
  97. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/translation/__init__.py +0 -0
  98. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/translation/anthropic.py +0 -0
  99. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/translation/convert.py +0 -0
  100. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/coderouter/translation/tool_repair.py +0 -0
  101. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/docs/assets/dashboard-demo.png +0 -0
  102. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/backends}/gguf_dl.md +0 -0
  103. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/backends}/verify-ollama-0.23.1.md +0 -0
  104. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/concepts}/context-budget.md +0 -0
  105. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/concepts}/continuous-probing.md +0 -0
  106. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/concepts}/drift-detection.md +0 -0
  107. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/concepts}/partial-stitch.md +0 -0
  108. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/docs/designs/v1.5-dashboard-mockup.html +0 -0
  109. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/docs/designs/v1.6-auto-router-verification.md +0 -0
  110. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/guides}/security.en.md +0 -0
  111. {coderouter_cli-2.5.0/docs → coderouter_cli-2.5.2/docs/guides}/security.md +0 -0
  112. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/docs/openrouter-roster/CHANGES.md +0 -0
  113. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/docs/openrouter-roster/README.md +0 -0
  114. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/docs/openrouter-roster/latest.json +0 -0
  115. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/docs/retrospectives/v0.4.md +0 -0
  116. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/docs/retrospectives/v0.5-verify.md +0 -0
  117. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/docs/retrospectives/v0.5.md +0 -0
  118. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/docs/retrospectives/v0.6.md +0 -0
  119. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/docs/retrospectives/v0.7.md +0 -0
  120. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/docs/retrospectives/v1.0-verify.md +0 -0
  121. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/docs/retrospectives/v1.0.md +0 -0
  122. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/examples/.env.example +0 -0
  123. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/examples/providers.auto-custom.yaml +0 -0
  124. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/examples/providers.auto.yaml +0 -0
  125. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/examples/providers.llama-cpp-vllm.yaml +0 -0
  126. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/examples/providers.note-2026.yaml +0 -0
  127. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/examples/providers.raspberrypi.yaml +0 -0
  128. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/examples/providers.v2-context-budget.yaml +0 -0
  129. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/scripts/demo_traffic.sh +0 -0
  130. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/scripts/openrouter_roster_diff.py +0 -0
  131. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/scripts/smoke_v2_2.sh +0 -0
  132. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/scripts/verify-providers.yaml +0 -0
  133. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/scripts/verify_ollama_0_23.py +0 -0
  134. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/scripts/verify_v0_5.sh +0 -0
  135. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/scripts/verify_v1_0.sh +0 -0
  136. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/__init__.py +0 -0
  137. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/conftest.py +0 -0
  138. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_adapter_anthropic.py +0 -0
  139. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_audit_log.py +0 -0
  140. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_auto_router.py +0 -0
  141. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_backend_health.py +0 -0
  142. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_budget.py +0 -0
  143. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_capability.py +0 -0
  144. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_capability_degraded_payload.py +0 -0
  145. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_capability_registry.py +0 -0
  146. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_capability_registry_cache_control.py +0 -0
  147. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_claude_code_suitability.py +0 -0
  148. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_cli.py +0 -0
  149. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_cli_stats.py +0 -0
  150. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_config.py +0 -0
  151. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_context_budget.py +0 -0
  152. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_continuous_probe.py +0 -0
  153. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_dashboard_endpoint.py +0 -0
  154. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_doctor.py +0 -0
  155. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_doctor_apply.py +0 -0
  156. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_doctor_cache_probe.py +0 -0
  157. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_drift_actions.py +0 -0
  158. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_drift_detection.py +0 -0
  159. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_drift_detection_integration.py +0 -0
  160. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_env_file.py +0 -0
  161. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_env_security.py +0 -0
  162. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_errors.py +0 -0
  163. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_fallback.py +0 -0
  164. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_fallback_anthropic.py +0 -0
  165. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_fallback_cache_control.py +0 -0
  166. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_fallback_cache_observed.py +0 -0
  167. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_fallback_misconfig_warn.py +0 -0
  168. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_fallback_paid_gate.py +0 -0
  169. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_fallback_thinking.py +0 -0
  170. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_guards_tool_loop.py +0 -0
  171. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_ingress_anthropic.py +0 -0
  172. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_ingress_profile.py +0 -0
  173. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_memory_pressure.py +0 -0
  174. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_metrics_cache.py +0 -0
  175. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_metrics_collector.py +0 -0
  176. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_metrics_cost.py +0 -0
  177. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_metrics_endpoint.py +0 -0
  178. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_metrics_jsonl.py +0 -0
  179. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_metrics_prometheus.py +0 -0
  180. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_metrics_prometheus_cache.py +0 -0
  181. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_openai_compat.py +0 -0
  182. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_openrouter_roster_diff.py +0 -0
  183. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_output_filters.py +0 -0
  184. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_output_filters_adapters.py +0 -0
  185. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_partial_stitch.py +0 -0
  186. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_plugins_integration.py +0 -0
  187. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_plugins_loader.py +0 -0
  188. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_plugins_registry.py +0 -0
  189. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_reasoning_strip.py +0 -0
  190. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_request_log.py +0 -0
  191. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_routing_adaptive.py +0 -0
  192. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_self_healing.py +0 -0
  193. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_state_store.py +0 -0
  194. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_token_estimation.py +0 -0
  195. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_tool_repair.py +0 -0
  196. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_translation_anthropic.py +0 -0
  197. {coderouter_cli-2.5.0 → coderouter_cli-2.5.2}/tests/test_translation_reverse.py +0 -0
@@ -6,6 +6,73 @@ versioning follows [SemVer](https://semver.org/).
6
6
 
7
7
  ---
8
8
 
9
+ ## [v2.5.2] — 2026-05-22 (Backend-aware Launcher suggestions + backend install guide)
10
+
11
+ Patch release: a Launcher bug fix and documentation improvements.
12
+
13
+ ### Fixed
14
+
15
+ - **Launcher "suggest values" (`⚙ 推奨値`) is now backend-aware.**
16
+ Previously the button emitted llama.cpp flags
17
+ (`-ngl` / `--ctx-size` / `--threads`) for every backend, but vLLM and
18
+ MLX reject those. Now:
19
+ - **llama.cpp** — the flags, as before.
20
+ - **vLLM** — empty; `--max-model-len` etc. depend on the model's real
21
+ context length, so the engine's auto-derivation is left to do its job.
22
+ - **MLX** — empty; it assumes unified memory and takes no launch-time
23
+ tuning flags.
24
+
25
+ Fixed in both the desktop GUI (`launcher_gui.py`) and the Web launcher
26
+ (`coderouter/ingress/launcher_routes.py`); the `/api/launcher/suggest`
27
+ endpoint now accepts a `backend` parameter.
28
+
29
+ ### Documentation
30
+
31
+ - New **`docs/backends/install-backends.md`** (+ `.en.md`) — an
32
+ installation guide for llama.cpp / vLLM / MLX covering macOS / Linux /
33
+ Windows, with per-backend verification steps and common pitfalls.
34
+ - **Launcher docs consolidated from 3 files to 2**: `launcher-gui.md` is
35
+ merged into a unified `launcher.md` (Web + Desktop GUI in one guide,
36
+ shared reference documented once); `launcher-quickstart.md` is slimmed
37
+ to delegate installation to the new guide.
38
+ - **Backend venv convention documented**: vLLM / MLX virtual
39
+ environments live under `~/.coderouter/backends/<backend>/`, one venv
40
+ per backend.
41
+
42
+ ---
43
+
44
+ ## [v2.5.1] — 2026-05-22 (MLX backend + docs reorganization)
45
+
46
+ Patch release: a third Launcher backend, a reorganized documentation
47
+ tree, and a security fix.
48
+
49
+ ### Added
50
+
51
+ - **MLX backend** for the Launcher (`launcher_gui.py` and
52
+ `coderouter/ingress/launcher_routes.py`): `mlx` joins `llama.cpp` and
53
+ `vllm`, aimed at Apple Silicon users. Launches
54
+ `python -m mlx_lm.server --model <m> --port <p>`. The backend
55
+ selectors (desktop GUI combobox / Web `<select>`) gain an `mlx`
56
+ option, and the binary-not-found error messages are now
57
+ backend-agnostic.
58
+
59
+ ### Changed
60
+
61
+ - **`docs/` reorganized** into role-based folders — `start/`, `guides/`,
62
+ `backends/`, `concepts/` — with a new bilingual (JA/EN) master index
63
+ at `docs/README.md` including a quick "what to read" table. Internal
64
+ cross-links, `README.md` / `README.en.md`, and code/config path
65
+ references were updated to the new layout.
66
+ - **`plan.md` restructured**: deduplicated, version ordering fixed,
67
+ sections compressed (1747 → 721 lines).
68
+
69
+ ### Security
70
+
71
+ - **starlette `1.0.0` → `1.0.1`** (`uv.lock`): fixes PYSEC-2026-161,
72
+ which failed the `cve-audit` CI job.
73
+
74
+ ---
75
+
9
76
  ## [v2.5.0] — 2026-05-22 (Launcher — llama.cpp / vllm GUI)
10
77
 
11
78
  Browser-based process manager for local inference backends, integrated
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coderouter-cli
3
- Version: 2.5.0
3
+ Version: 2.5.2
4
4
  Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
5
5
  Project-URL: Homepage, https://github.com/zephel01/CodeRouter
6
6
  Project-URL: Repository, https://github.com/zephel01/CodeRouter
@@ -54,7 +54,7 @@ Description-Content-Type: text/markdown
54
54
  </p>
55
55
 
56
56
  <p align="center">
57
- <a href="./README.en.md">English</a> · <strong>日本語</strong> · <a href="./docs/quickstart.md">10 分で動かす</a> · <a href="./docs/architecture.md">設計詳細</a>
57
+ <a href="./README.en.md">English</a> · <strong>日本語</strong> · <a href="./docs/start/quickstart.md">10 分で動かす</a> · <a href="./docs/concepts/architecture.md">設計詳細</a>
58
58
  </p>
59
59
 
60
60
  ---
@@ -121,7 +121,7 @@ ANTHROPIC_BASE_URL=http://localhost:8088 ANTHROPIC_AUTH_TOKEN=dummy claude
121
121
  | codex / gemini-cli + Ollama 直繋ぎで動いてる | オプション — フォールバックが欲しいなら |
122
122
  | Claude API を直接叩いてて問題ない | 不要 |
123
123
 
124
- 詳細は → [要否判定ガイド](./docs/when-do-i-need-coderouter.md)
124
+ 詳細は → [要否判定ガイド](./docs/start/when-do-i-need-coderouter.md)
125
125
 
126
126
  ---
127
127
 
@@ -186,7 +186,7 @@ launcher:
186
186
  "--max-model-len": 4096
187
187
  ```
188
188
 
189
- 詳細 → [Launcher ガイド](./docs/launcher.md)
189
+ 詳細 → [Launcher ガイド](./docs/backends/launcher.md)
190
190
 
191
191
  ---
192
192
 
@@ -213,7 +213,7 @@ providers:
213
213
  api_key_env: OPENROUTER_API_KEY
214
214
  ```
215
215
 
216
- もっと詳しい設定 → [利用ガイド](./docs/usage-guide.md) · [設計詳細](./docs/architecture.md)
216
+ もっと詳しい設定 → [利用ガイド](./docs/guides/usage-guide.md) · [設計詳細](./docs/concepts/architecture.md)
217
217
 
218
218
  ---
219
219
 
@@ -221,15 +221,15 @@ providers:
221
221
 
222
222
  | やりたいこと | ドキュメント |
223
223
  |---|---|
224
- | すぐ動かす | [Quickstart](./docs/quickstart.md) |
225
- | 使いこなす | [利用ガイド](./docs/usage-guide.md) |
226
- | 無料で回す | [無料枠ガイド](./docs/free-tier-guide.md) |
227
- | llama.cpp / vllm を GUI で起動 | [Launcher ガイド](./docs/launcher.md) |
228
- | 詰まった | [トラブルシューティング](./docs/troubleshooting.md) |
229
- | 設計を知りたい | [アーキテクチャ詳細](./docs/architecture.md) |
224
+ | すぐ動かす | [Quickstart](./docs/start/quickstart.md) |
225
+ | 使いこなす | [利用ガイド](./docs/guides/usage-guide.md) |
226
+ | 無料で回す | [無料枠ガイド](./docs/guides/free-tier-guide.md) |
227
+ | llama.cpp / vllm を GUI で起動 | [Launcher ガイド](./docs/backends/launcher.md) |
228
+ | 詰まった | [トラブルシューティング](./docs/guides/troubleshooting.md) |
229
+ | 設計を知りたい | [アーキテクチャ詳細](./docs/concepts/architecture.md) |
230
230
  | 全リリース履歴 | [CHANGELOG](./CHANGELOG.md) |
231
231
 
232
- English: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier](./docs/free-tier-guide.en.md) · [Troubleshooting](./docs/troubleshooting.en.md)
232
+ English: [Quickstart](./docs/start/quickstart.en.md) · [Usage guide](./docs/guides/usage-guide.en.md) · [Free-tier](./docs/guides/free-tier-guide.en.md) · [Troubleshooting](./docs/guides/troubleshooting.en.md)
233
233
 
234
234
  ---
235
235
 
@@ -239,10 +239,10 @@ English: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-gui
239
239
 
240
240
  | 症状 | 原因 | 詳細 |
241
241
  |---|---|---|
242
- | 401 エラー | API キー未設定 / `.env` に `export` 忘れ | [§1](./docs/troubleshooting.md#1-起動設定で踏みやすい-5-つの罠-v162-追加) |
243
- | 返信が空 / 意味不明 | Ollama の `num_ctx` が 2048 に切り詰め | [§3](./docs/troubleshooting.md#3-ollama-初心者--サイレント失敗-5-症状-v07-c) |
244
- | `<think>` タグが漏れる | `output_filters: [strip_thinking]` を付ける | [§3](./docs/troubleshooting.md#3-ollama-初心者--サイレント失敗-5-症状-v07-c) |
245
- | Claude Code でツール呼び出しがおかしい | tool-call 修復が効いてない | [§4](./docs/troubleshooting.md#4-claude-code-連携で踏みやすい罠-v162-追加) |
242
+ | 401 エラー | API キー未設定 / `.env` に `export` 忘れ | [§1](./docs/guides/troubleshooting.md#1-起動設定で踏みやすい-5-つの罠-v162-追加) |
243
+ | 返信が空 / 意味不明 | Ollama の `num_ctx` が 2048 に切り詰め | [§3](./docs/guides/troubleshooting.md#3-ollama-初心者--サイレント失敗-5-症状-v07-c) |
244
+ | `<think>` タグが漏れる | `output_filters: [strip_thinking]` を付ける | [§3](./docs/guides/troubleshooting.md#3-ollama-初心者--サイレント失敗-5-症状-v07-c) |
245
+ | Claude Code でツール呼び出しがおかしい | tool-call 修復が効いてない | [§4](./docs/guides/troubleshooting.md#4-claude-code-連携で踏みやすい罠-v162-追加) |
246
246
 
247
247
  `http://localhost:8088/dashboard` を開いておくと、ほとんどの問題が見て 10 秒でわかります。
248
248
 
@@ -268,7 +268,7 @@ CodeRouter は backend ルーター層として独立して動きます。`OPENA
268
268
 
269
269
  ## Security
270
270
 
271
- シークレットは環境変数に置きます。[`docs/security.md`](./docs/security.md) に完全な方針と報告手順があります。
271
+ シークレットは環境変数に置きます。[`docs/security.md`](./docs/guides/security.md) に完全な方針と報告手順があります。
272
272
 
273
273
  ## License
274
274
 
@@ -13,7 +13,7 @@
13
13
  </p>
14
14
 
15
15
  <p align="center">
16
- <strong>English</strong> · <a href="./README.md">日本語</a> · <a href="./docs/quickstart.en.md">Get started in 10 min</a> · <a href="./docs/architecture.md">Architecture</a>
16
+ <strong>English</strong> · <a href="./README.md">日本語</a> · <a href="./docs/start/quickstart.en.md">Get started in 10 min</a> · <a href="./docs/concepts/architecture.md">Architecture</a>
17
17
  </p>
18
18
 
19
19
  ---
@@ -80,7 +80,7 @@ That's it. Claude Code works as usual, but your local Ollama is answering behind
80
80
  | codex / gemini-cli + Ollama works fine | Optional — if you want fallback |
81
81
  | Using Claude API directly, no issues | Not needed |
82
82
 
83
- Full decision matrix → [Do I need CodeRouter?](./docs/when-do-i-need-coderouter.en.md)
83
+ Full decision matrix → [Do I need CodeRouter?](./docs/start/when-do-i-need-coderouter.en.md)
84
84
 
85
85
  ---
86
86
 
@@ -145,7 +145,7 @@ launcher:
145
145
  "--max-model-len": 4096
146
146
  ```
147
147
 
148
- Details → [Launcher guide](./docs/launcher.md)
148
+ Details → [Launcher guide](./docs/backends/launcher.md)
149
149
 
150
150
  ---
151
151
 
@@ -172,7 +172,7 @@ providers:
172
172
  api_key_env: OPENROUTER_API_KEY
173
173
  ```
174
174
 
175
- More detail → [Usage guide](./docs/usage-guide.en.md) · [Architecture](./docs/architecture.md)
175
+ More detail → [Usage guide](./docs/guides/usage-guide.en.md) · [Architecture](./docs/concepts/architecture.md)
176
176
 
177
177
  ---
178
178
 
@@ -180,15 +180,15 @@ More detail → [Usage guide](./docs/usage-guide.en.md) · [Architecture](./docs
180
180
 
181
181
  | Goal | Document |
182
182
  |---|---|
183
- | Get running fast | [Quickstart](./docs/quickstart.en.md) |
184
- | Use it well | [Usage guide](./docs/usage-guide.en.md) |
185
- | Run for free | [Free-tier guide](./docs/free-tier-guide.en.md) |
186
- | Launch llama.cpp / vllm via GUI | [Launcher guide](./docs/launcher.md) |
187
- | Stuck? | [Troubleshooting](./docs/troubleshooting.en.md) |
188
- | Understand the design | [Architecture](./docs/architecture.md) |
183
+ | Get running fast | [Quickstart](./docs/start/quickstart.en.md) |
184
+ | Use it well | [Usage guide](./docs/guides/usage-guide.en.md) |
185
+ | Run for free | [Free-tier guide](./docs/guides/free-tier-guide.en.md) |
186
+ | Launch llama.cpp / vllm via GUI | [Launcher guide](./docs/backends/launcher.md) |
187
+ | Stuck? | [Troubleshooting](./docs/guides/troubleshooting.en.md) |
188
+ | Understand the design | [Architecture](./docs/concepts/architecture.md) |
189
189
  | Full release history | [CHANGELOG](./CHANGELOG.md) |
190
190
 
191
- 日本語: [Quickstart](./docs/quickstart.md) · [利用ガイド](./docs/usage-guide.md) · [無料枠ガイド](./docs/free-tier-guide.md) · [トラブルシューティング](./docs/troubleshooting.md)
191
+ 日本語: [Quickstart](./docs/start/quickstart.md) · [利用ガイド](./docs/guides/usage-guide.md) · [無料枠ガイド](./docs/guides/free-tier-guide.md) · [トラブルシューティング](./docs/guides/troubleshooting.md)
192
192
 
193
193
  ---
194
194
 
@@ -198,10 +198,10 @@ More detail → [Usage guide](./docs/usage-guide.en.md) · [Architecture](./docs
198
198
 
199
199
  | Symptom | Cause | Details |
200
200
  |---|---|---|
201
- | 401 error | API key not set / missing `export` in `.env` | [§1](./docs/troubleshooting.en.md#1-five-startup--config-gotchas-added-in-v162) |
202
- | Empty / garbage replies | Ollama `num_ctx` truncated to 2048 | [§3](./docs/troubleshooting.en.md#3-ollama-beginner--5-silent-fail-symptoms-v07-c) |
203
- | `<think>` tags leaking | Add `output_filters: [strip_thinking]` | [§3](./docs/troubleshooting.en.md#3-ollama-beginner--5-silent-fail-symptoms-v07-c) |
204
- | Tool calls misbehaving in Claude Code | Tool-call repair not kicking in | [§4](./docs/troubleshooting.en.md#4-claude-code-integration-gotchas-added-in-v162) |
201
+ | 401 error | API key not set / missing `export` in `.env` | [§1](./docs/guides/troubleshooting.en.md#1-five-startup--config-gotchas-added-in-v162) |
202
+ | Empty / garbage replies | Ollama `num_ctx` truncated to 2048 | [§3](./docs/guides/troubleshooting.en.md#3-ollama-beginner--5-silent-fail-symptoms-v07-c) |
203
+ | `<think>` tags leaking | Add `output_filters: [strip_thinking]` | [§3](./docs/guides/troubleshooting.en.md#3-ollama-beginner--5-silent-fail-symptoms-v07-c) |
204
+ | Tool calls misbehaving in Claude Code | Tool-call repair not kicking in | [§4](./docs/guides/troubleshooting.en.md#4-claude-code-integration-gotchas-added-in-v162) |
205
205
 
206
206
  Open `http://localhost:8088/dashboard` while debugging — most issues become visible in 10 seconds.
207
207
 
@@ -227,7 +227,7 @@ CodeRouter runs as an independent backend router layer. Point any project's `OPE
227
227
 
228
228
  ## Security
229
229
 
230
- Secrets go in env vars, not config files. See [`docs/security.en.md`](./docs/security.en.md) for the full policy and reporting instructions.
230
+ Secrets go in env vars, not config files. See [`docs/security.en.md`](./docs/guides/security.en.md) for the full policy and reporting instructions.
231
231
 
232
232
  ## License
233
233
 
@@ -13,7 +13,7 @@
13
13
  </p>
14
14
 
15
15
  <p align="center">
16
- <a href="./README.en.md">English</a> · <strong>日本語</strong> · <a href="./docs/quickstart.md">10 分で動かす</a> · <a href="./docs/architecture.md">設計詳細</a>
16
+ <a href="./README.en.md">English</a> · <strong>日本語</strong> · <a href="./docs/start/quickstart.md">10 分で動かす</a> · <a href="./docs/concepts/architecture.md">設計詳細</a>
17
17
  </p>
18
18
 
19
19
  ---
@@ -80,7 +80,7 @@ ANTHROPIC_BASE_URL=http://localhost:8088 ANTHROPIC_AUTH_TOKEN=dummy claude
80
80
  | codex / gemini-cli + Ollama 直繋ぎで動いてる | オプション — フォールバックが欲しいなら |
81
81
  | Claude API を直接叩いてて問題ない | 不要 |
82
82
 
83
- 詳細は → [要否判定ガイド](./docs/when-do-i-need-coderouter.md)
83
+ 詳細は → [要否判定ガイド](./docs/start/when-do-i-need-coderouter.md)
84
84
 
85
85
  ---
86
86
 
@@ -145,7 +145,7 @@ launcher:
145
145
  "--max-model-len": 4096
146
146
  ```
147
147
 
148
- 詳細 → [Launcher ガイド](./docs/launcher.md)
148
+ 詳細 → [Launcher ガイド](./docs/backends/launcher.md)
149
149
 
150
150
  ---
151
151
 
@@ -172,7 +172,7 @@ providers:
172
172
  api_key_env: OPENROUTER_API_KEY
173
173
  ```
174
174
 
175
- もっと詳しい設定 → [利用ガイド](./docs/usage-guide.md) · [設計詳細](./docs/architecture.md)
175
+ もっと詳しい設定 → [利用ガイド](./docs/guides/usage-guide.md) · [設計詳細](./docs/concepts/architecture.md)
176
176
 
177
177
  ---
178
178
 
@@ -180,15 +180,15 @@ providers:
180
180
 
181
181
  | やりたいこと | ドキュメント |
182
182
  |---|---|
183
- | すぐ動かす | [Quickstart](./docs/quickstart.md) |
184
- | 使いこなす | [利用ガイド](./docs/usage-guide.md) |
185
- | 無料で回す | [無料枠ガイド](./docs/free-tier-guide.md) |
186
- | llama.cpp / vllm を GUI で起動 | [Launcher ガイド](./docs/launcher.md) |
187
- | 詰まった | [トラブルシューティング](./docs/troubleshooting.md) |
188
- | 設計を知りたい | [アーキテクチャ詳細](./docs/architecture.md) |
183
+ | すぐ動かす | [Quickstart](./docs/start/quickstart.md) |
184
+ | 使いこなす | [利用ガイド](./docs/guides/usage-guide.md) |
185
+ | 無料で回す | [無料枠ガイド](./docs/guides/free-tier-guide.md) |
186
+ | llama.cpp / vllm を GUI で起動 | [Launcher ガイド](./docs/backends/launcher.md) |
187
+ | 詰まった | [トラブルシューティング](./docs/guides/troubleshooting.md) |
188
+ | 設計を知りたい | [アーキテクチャ詳細](./docs/concepts/architecture.md) |
189
189
  | 全リリース履歴 | [CHANGELOG](./CHANGELOG.md) |
190
190
 
191
- English: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier](./docs/free-tier-guide.en.md) · [Troubleshooting](./docs/troubleshooting.en.md)
191
+ English: [Quickstart](./docs/start/quickstart.en.md) · [Usage guide](./docs/guides/usage-guide.en.md) · [Free-tier](./docs/guides/free-tier-guide.en.md) · [Troubleshooting](./docs/guides/troubleshooting.en.md)
192
192
 
193
193
  ---
194
194
 
@@ -198,10 +198,10 @@ English: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-gui
198
198
 
199
199
  | 症状 | 原因 | 詳細 |
200
200
  |---|---|---|
201
- | 401 エラー | API キー未設定 / `.env` に `export` 忘れ | [§1](./docs/troubleshooting.md#1-起動設定で踏みやすい-5-つの罠-v162-追加) |
202
- | 返信が空 / 意味不明 | Ollama の `num_ctx` が 2048 に切り詰め | [§3](./docs/troubleshooting.md#3-ollama-初心者--サイレント失敗-5-症状-v07-c) |
203
- | `<think>` タグが漏れる | `output_filters: [strip_thinking]` を付ける | [§3](./docs/troubleshooting.md#3-ollama-初心者--サイレント失敗-5-症状-v07-c) |
204
- | Claude Code でツール呼び出しがおかしい | tool-call 修復が効いてない | [§4](./docs/troubleshooting.md#4-claude-code-連携で踏みやすい罠-v162-追加) |
201
+ | 401 エラー | API キー未設定 / `.env` に `export` 忘れ | [§1](./docs/guides/troubleshooting.md#1-起動設定で踏みやすい-5-つの罠-v162-追加) |
202
+ | 返信が空 / 意味不明 | Ollama の `num_ctx` が 2048 に切り詰め | [§3](./docs/guides/troubleshooting.md#3-ollama-初心者--サイレント失敗-5-症状-v07-c) |
203
+ | `<think>` タグが漏れる | `output_filters: [strip_thinking]` を付ける | [§3](./docs/guides/troubleshooting.md#3-ollama-初心者--サイレント失敗-5-症状-v07-c) |
204
+ | Claude Code でツール呼び出しがおかしい | tool-call 修復が効いてない | [§4](./docs/guides/troubleshooting.md#4-claude-code-連携で踏みやすい罠-v162-追加) |
205
205
 
206
206
  `http://localhost:8088/dashboard` を開いておくと、ほとんどの問題が見て 10 秒でわかります。
207
207
 
@@ -227,7 +227,7 @@ CodeRouter は backend ルーター層として独立して動きます。`OPENA
227
227
 
228
228
  ## Security
229
229
 
230
- シークレットは環境変数に置きます。[`docs/security.md`](./docs/security.md) に完全な方針と報告手順があります。
230
+ シークレットは環境変数に置きます。[`docs/security.md`](./docs/guides/security.md) に完全な方針と報告手順があります。
231
231
 
232
232
  ## License
233
233
 
@@ -61,7 +61,7 @@ def _build_parser() -> argparse.ArgumentParser:
61
61
  "binding the server. Repeat to layer multiple files. By "
62
62
  "default, file values do NOT override variables already in "
63
63
  "the environment (the shell `export` wins). See "
64
- "docs/troubleshooting.md §5 for 1Password / direnv / sops "
64
+ "docs/guides/troubleshooting.md §5 for 1Password / direnv / sops "
65
65
  "integration recipes."
66
66
  ),
67
67
  )
@@ -116,7 +116,7 @@ def _build_parser() -> argparse.ArgumentParser:
116
116
  "POSIX file mode (0600 expected), .gitignore coverage, "
117
117
  "and git-tracking state. Bare `--check-env` (no PATH) "
118
118
  "looks for `./.env` then `~/.coderouter/.env`. "
119
- "See docs/troubleshooting.md §5 for the threat model."
119
+ "See docs/guides/troubleshooting.md §5 for the threat model."
120
120
  ),
121
121
  )
122
122
  doctor.add_argument(
@@ -109,7 +109,7 @@ class RegistryCapabilities(BaseModel):
109
109
  "harness. ``degraded`` = the model over-eagerly invokes "
110
110
  "tools/skills when given Claude Code's system prompt — e.g. "
111
111
  "Llama-3.3-70B treating small talk like ``こんにちは`` as "
112
- "``Skill(hello)`` invocations (see docs/troubleshooting.md "
112
+ "``Skill(hello)`` invocations (see docs/guides/troubleshooting.md "
113
113
  "§4-1 for the symptom log). ``ok`` = explicitly verified "
114
114
  "clean. ``None`` = no opinion (treated as ``ok`` at the "
115
115
  "startup check)."
@@ -35,7 +35,7 @@
35
35
  # Claude Code's agentic-coding harness;
36
36
  # "degraded" triggers a startup WARN when
37
37
  # the provider is on a `claude-code-*`
38
- # chain. See docs/troubleshooting.md §4-1.
38
+ # chain. See docs/guides/troubleshooting.md §4-1.
39
39
  #
40
40
  # First-match semantics: rules within a file are evaluated top-to-bottom
41
41
  # per flag; the first rule whose glob matches AND declares that flag
@@ -153,7 +153,7 @@ rules:
153
153
  # Llama-3.3-70B (verified 2026-04-24 against NVIDIA NIM) rewrites
154
154
  # ``こんにちは`` into ``Skill(hello)`` invocations and fabricates
155
155
  # ``AskUserQuestion("What is your name?")`` elicitations — see
156
- # docs/articles/note-nvidia-nim.md §6-2 + docs/troubleshooting.md §4-1.
156
+ # docs/articles/note/note-nvidia-nim.md §6-2 + docs/guides/troubleshooting.md §4-1.
157
157
  #
158
158
  # Glob coverage: NIM uses ``meta/llama-3.3-70b-instruct``, OpenRouter
159
159
  # uses ``meta-llama/llama-3.3-70b-instruct``, some local servers use
@@ -1,13 +1,13 @@
1
1
  """Launcher routes — ``GET /launcher`` + ``/api/launcher/*``.
2
2
 
3
- llama.cpp / vllm プロセス管理 UI。
3
+ llama.cpp / vllm / mlx プロセス管理 UI。
4
4
 
5
5
  設計方針:
6
6
  - ダッシュボードと同じ "1ファイル完結" スタイル (Tailwind CDN + inline JS)
7
7
  - プロセスレジストリは app.state.launcher に持たせる (再起動で消えるが意図通り)
8
8
  - option_profiles は providers.yaml の launcher: セクションで管理 → コード変更不要で拡張可
9
9
  - 複数プロセスの同時起動に対応 (UUID ベースの ID 管理)
10
- - llama.cpp / vllm どちらも同じ key-value args スキーマで統一
10
+ - llama.cpp / vllm / mlx いずれも同じ key-value args スキーマで統一
11
11
 
12
12
  エンドポイント:
13
13
  GET /launcher → HTML UI
@@ -62,7 +62,7 @@ class ManagedProcess:
62
62
 
63
63
  id: str
64
64
  name: str
65
- backend: str # "llama.cpp" | "vllm"
65
+ backend: str # "llama.cpp" | "vllm" | "mlx"
66
66
  model_path: str
67
67
  port: int
68
68
  options: dict[str, Any]
@@ -150,6 +150,7 @@ def _scan_models(model_dirs: list[str]) -> list[dict[str, Any]]:
150
150
  _BACKEND_DEFAULTS: dict[str, str] = {
151
151
  "llama.cpp": "llama-server",
152
152
  "vllm": "python",
153
+ "mlx": "python", # mlx_lm.server (Apple Silicon 向け)
153
154
  }
154
155
 
155
156
 
@@ -252,14 +253,32 @@ def _model_recommendation(size_gb: float, hw: dict[str, Any]) -> dict[str, str]:
252
253
  return {"level": "warn", "label": "メモリ厳しい"}
253
254
 
254
255
 
255
- def _suggest_launch_flags(size_gb: float, hw: dict[str, Any]) -> str:
256
- """選択モデル + ハードから -ngl / --ctx-size / --threads を提案する。
256
+ def _suggest_launch_flags(backend: str, size_gb: float,
257
+ hw: dict[str, Any]) -> str:
258
+ """選択モデル + ハード + バックエンドから推奨起動フラグを提案する。
257
259
 
260
+ バックエンドごとにフラグ体系が違うため分岐する:
261
+ - llama.cpp : -ngl / --ctx-size / --threads を算出
262
+ - vllm : モデル config からの自動導出に任せる (空文字)
263
+ - mlx : 統合メモリ前提で起動時フラグ不要 (空文字)
258
264
  あくまで目安。他プロセスのメモリ使用や量子化方式までは考慮しない。
259
265
  """
260
- threads = max(1, int(hw.get("cpu_count", 4)) - 2)
266
+ if backend == "mlx":
267
+ # MLX は統合メモリ + Metal 前提。llama.cpp の -ngl に相当する
268
+ # レイヤーオフロードの概念がなく、mlx_lm.server は起動時の
269
+ # 性能チューニングフラグを取らない。
270
+ return ""
271
+ if backend == "vllm":
272
+ # vllm の --max-model-len はモデルの実コンテキスト長に依存する。
273
+ # メモリ量だけのヒューリスティックで値を出すと、モデルの上限を
274
+ # 超えたときに vllm が起動を拒否する。空にしてエンジンの
275
+ # 自動導出 (モデル config) に任せるのが安全。
276
+ return ""
277
+
278
+ # llama.cpp (デフォルト)
261
279
  usable = _usable_memory_gb(hw)
262
280
  weights = size_gb * 1.15 # 重み + オーバーヘッド概算
281
+ threads = max(1, int(hw.get("cpu_count", 4)) - 2)
263
282
  if hw.get("gpu") == "cpu":
264
283
  ngl = 0
265
284
  elif usable >= weights + 1.0:
@@ -312,8 +331,17 @@ def _build_cmd(
312
331
  "--model", model_path,
313
332
  "--port", str(port),
314
333
  ]
334
+ elif backend == "mlx":
335
+ cmd = [
336
+ exe, "-m", "mlx_lm.server",
337
+ "--model", model_path,
338
+ "--port", str(port),
339
+ ]
315
340
  else:
316
- raise ValueError(f"Unknown backend: {backend!r}. Expected 'llama.cpp' or 'vllm'.")
341
+ raise ValueError(
342
+ f"Unknown backend: {backend!r}. "
343
+ "Expected 'llama.cpp', 'vllm' or 'mlx'."
344
+ )
317
345
 
318
346
  for flag, val in options.items():
319
347
  if isinstance(val, bool):
@@ -610,17 +638,20 @@ async def api_logs(proc_id: str, request: Request, n: int = 100) -> dict[str, An
610
638
 
611
639
 
612
640
  @router.get("/api/launcher/suggest")
613
- async def api_suggest(model_path: str = "") -> dict[str, Any]:
641
+ async def api_suggest(model_path: str = "",
642
+ backend: str = "llama.cpp") -> dict[str, Any]:
614
643
  """Suggest launch flags for the given model based on detected hardware.
615
644
 
616
645
  クライアントの「推奨値」ボタンから呼ばれる。値はあくまで目安。
646
+ バックエンドごとにフラグ体系が違うため backend も受け取る。
617
647
  """
618
648
  hw = await asyncio.to_thread(_detect_hardware)
619
649
  size_gb = 0.0
620
650
  if model_path:
621
651
  size_gb = await asyncio.to_thread(_model_size_gb, model_path)
622
652
  return {
623
- "extra_args": _suggest_launch_flags(size_gb, hw),
653
+ "extra_args": _suggest_launch_flags(backend, size_gb, hw),
654
+ "backend": backend,
624
655
  "hardware": hw,
625
656
  "size_gb": round(size_gb, 2),
626
657
  }
@@ -720,6 +751,7 @@ _LAUNCHER_HTML = r"""<!doctype html>
720
751
  <select id="f-backend" onchange="onBackendChange()">
721
752
  <option value="llama.cpp">llama.cpp</option>
722
753
  <option value="vllm">vllm</option>
754
+ <option value="mlx">mlx</option>
723
755
  </select>
724
756
  <div id="binary-hint" class="mt-1 text-xs text-slate-500 min-h-[1.2rem]"></div>
725
757
  </div>
@@ -894,14 +926,24 @@ _LAUNCHER_HTML = r"""<!doctype html>
894
926
  window.suggestOptions = async () => {
895
927
  const model = document.getElementById("f-model").value.trim();
896
928
  if (!model) { showLaunchErr("先にモデルを選択してください"); return; }
929
+ const backend = document.getElementById("f-backend").value;
897
930
  try {
898
931
  const r = await fetch("/api/launcher/suggest?model_path="
899
- + encodeURIComponent(model));
932
+ + encodeURIComponent(model)
933
+ + "&backend=" + encodeURIComponent(backend));
900
934
  const d = await r.json();
901
935
  if (!r.ok) { showLaunchErr(d.detail || "推奨値の取得に失敗"); return; }
902
936
  document.getElementById("f-extra").value = d.extra_args;
903
937
  showLaunchErr("");
904
- statusMsg("推奨値を設定(目安): " + d.extra_args);
938
+ if (d.extra_args) {
939
+ statusMsg("推奨値を設定(目安): " + d.extra_args);
940
+ } else if (backend === "mlx") {
941
+ statusMsg("MLX は起動時の調整フラグ不要です(統合メモリで自動)");
942
+ } else if (backend === "vllm") {
943
+ statusMsg("vllm は起動時フラグ不要です(モデル設定から自動導出)");
944
+ } else {
945
+ statusMsg("このバックエンドは推奨フラグの自動設定対象外です");
946
+ }
905
947
  } catch (e) {
906
948
  showLaunchErr(e.message);
907
949
  }
@@ -961,7 +1003,7 @@ _LAUNCHER_HTML = r"""<!doctype html>
961
1003
  // Enable/disable launch button based on binary availability
962
1004
  if (!info.found) {
963
1005
  btn.disabled = true;
964
- showLaunchErr(`⚠ "${esc(info.resolved)}" が見つかりません。llama.cpp をインストールするか、providers.yaml の launcher.backends.llama\\.cpp.binary にフルパスを設定してください。`);
1006
+ showLaunchErr(`⚠ "${esc(info.resolved)}" が見つかりません。選択中のバックエンド (${esc(backend)}) をインストールするか、providers.yaml の launcher.backends.${esc(backend)}.binary にフルパスを設定してください。`);
965
1007
  } else {
966
1008
  btn.disabled = false;
967
1009
  // Clear error only if it was a binary-not-found error
@@ -779,7 +779,7 @@ def log_output_filter_applied(
779
779
  # v1.7-B: chain-claude-code-suitability-degraded log shape
780
780
  #
781
781
  # Motivation (plan.md §11.B.4 #2):
782
- # v1.6.2 documented in docs/troubleshooting.md §4-1 that putting
782
+ # v1.6.2 documented in docs/guides/troubleshooting.md §4-1 that putting
783
783
  # Llama-3.3-70B at the head of a Claude-Code-facing chain causes
784
784
  # over-eager tool invocation (small talk like ``こんにちは`` getting
785
785
  # rewritten to ``Skill(hello)`` calls). Docs alone require the operator
@@ -805,7 +805,7 @@ def log_output_filter_applied(
805
805
  _DEFAULT_CLAUDE_CODE_SUITABILITY_HINT: str = (
806
806
  "move the degraded provider(s) to the tail of the chain or replace "
807
807
  "with an agentic-coding-tuned model (e.g. qwen3-coder-480b-a35b-instruct); "
808
- "see docs/troubleshooting.md §4-1"
808
+ "see docs/guides/troubleshooting.md §4-1"
809
809
  )
810
810
 
811
811
 
@@ -312,7 +312,7 @@ def anthropic_request_has_cache_control(request: AnthropicRequest) -> bool:
312
312
  # v1.7-B: claude_code_suitability startup check
313
313
  #
314
314
  # Motivation (plan.md §11.B.4 #2):
315
- # v1.6.2 documented in docs/troubleshooting.md §4-1 the "Llama-3.3-70B
315
+ # v1.6.2 documented in docs/guides/troubleshooting.md §4-1 the "Llama-3.3-70B
316
316
  # over-eagerly invokes Skill() for small talk under Claude Code"
317
317
  # symptom. v1.7-B promotes that hint from prose-only to a structured
318
318
  # automatic startup WARN: at app startup we scan every profile whose