coderouter-cli 2.5.1__tar.gz → 2.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/CHANGELOG.md +35 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/PKG-INFO +1 -1
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/ingress/launcher_routes.py +39 -8
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/README.md +2 -2
- coderouter_cli-2.5.2/docs/backends/install-backends.en.md +208 -0
- coderouter_cli-2.5.2/docs/backends/install-backends.md +208 -0
- coderouter_cli-2.5.2/docs/backends/launcher-quickstart.md +143 -0
- coderouter_cli-2.5.2/docs/backends/launcher.md +323 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/pyproject.toml +1 -1
- coderouter_cli-2.5.1/docs/backends/launcher-gui.md +0 -200
- coderouter_cli-2.5.1/docs/backends/launcher-quickstart.md +0 -187
- coderouter_cli-2.5.1/docs/backends/launcher.md +0 -288
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/.gitignore +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/LICENSE +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/README.en.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/README.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/__init__.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/__main__.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/adapters/__init__.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/adapters/anthropic_native.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/adapters/base.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/adapters/openai_compat.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/adapters/registry.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/cli.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/cli_stats.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/config/__init__.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/config/capability_registry.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/config/env_file.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/config/loader.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/config/schemas.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/cost.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/data/__init__.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/data/model-capabilities.yaml +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/doctor.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/doctor_apply.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/env_security.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/errors.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/guards/__init__.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/guards/_fingerprint.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/guards/backend_health.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/guards/context_budget.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/guards/continuous_probe.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/guards/drift_actions.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/guards/drift_detection.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/guards/memory_pressure.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/guards/self_healing.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/guards/tool_loop.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/ingress/__init__.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/ingress/anthropic_routes.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/ingress/app.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/ingress/dashboard_routes.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/ingress/metrics_routes.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/ingress/openai_routes.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/logging.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/metrics/__init__.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/metrics/collector.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/metrics/prometheus.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/output_filters.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/plugins/__init__.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/plugins/base.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/plugins/loader.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/plugins/registry.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/routing/__init__.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/routing/adaptive.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/routing/auto_router.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/routing/budget.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/routing/capability.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/routing/fallback.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/state/__init__.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/state/audit_log.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/state/replay.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/state/request_log.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/state/store.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/state/suggest_rules.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/token_estimation.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/translation/__init__.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/translation/anthropic.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/translation/convert.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/coderouter/translation/tool_repair.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/assets/dashboard-demo.png +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/backends/gguf_dl.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/backends/hf-ollama-models.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/backends/llamacpp-direct.en.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/backends/llamacpp-direct.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/backends/lmstudio-direct.en.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/backends/lmstudio-direct.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/backends/verify-ollama-0.23.1.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/concepts/architecture.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/concepts/context-budget.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/concepts/continuous-probing.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/concepts/drift-detection.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/concepts/partial-stitch.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/designs/v1.5-dashboard-mockup.html +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/designs/v1.6-auto-router-verification.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/designs/v1.6-auto-router.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/guides/free-tier-guide.en.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/guides/free-tier-guide.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/guides/security.en.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/guides/security.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/guides/troubleshooting.en.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/guides/troubleshooting.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/guides/usage-guide.en.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/guides/usage-guide.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/openrouter-roster/CHANGES.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/openrouter-roster/README.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/openrouter-roster/latest.json +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/retrospectives/v0.4.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/retrospectives/v0.5-verify.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/retrospectives/v0.5.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/retrospectives/v0.6.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/retrospectives/v0.7.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/retrospectives/v1.0-verify.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/retrospectives/v1.0.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/start/quickstart.en.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/start/quickstart.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/start/when-do-i-need-coderouter.en.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/docs/start/when-do-i-need-coderouter.md +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/examples/.env.example +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/examples/providers.auto-custom.yaml +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/examples/providers.auto.yaml +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/examples/providers.llama-cpp-vllm.yaml +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/examples/providers.note-2026.yaml +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/examples/providers.nvidia-nim.yaml +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/examples/providers.raspberrypi.yaml +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/examples/providers.v2-context-budget.yaml +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/examples/providers.yaml +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/scripts/demo_traffic.sh +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/scripts/openrouter_roster_diff.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/scripts/smoke_v2_2.sh +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/scripts/verify-providers.yaml +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/scripts/verify_ollama_0_23.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/scripts/verify_v0_5.sh +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/scripts/verify_v1_0.sh +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/__init__.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/conftest.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_adapter_anthropic.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_audit_log.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_auto_router.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_backend_health.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_budget.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_capability.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_capability_degraded_payload.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_capability_registry.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_capability_registry_cache_control.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_claude_code_suitability.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_cli.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_cli_stats.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_config.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_context_budget.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_continuous_probe.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_dashboard_endpoint.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_doctor.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_doctor_apply.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_doctor_cache_probe.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_drift_actions.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_drift_detection.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_drift_detection_integration.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_env_file.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_env_security.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_errors.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_examples_yaml.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_fallback.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_fallback_anthropic.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_fallback_cache_control.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_fallback_cache_observed.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_fallback_misconfig_warn.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_fallback_paid_gate.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_fallback_thinking.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_guards_tool_loop.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_ingress_anthropic.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_ingress_profile.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_memory_pressure.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_metrics_cache.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_metrics_collector.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_metrics_cost.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_metrics_endpoint.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_metrics_jsonl.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_metrics_prometheus.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_metrics_prometheus_cache.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_openai_compat.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_openrouter_roster_diff.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_output_filters.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_output_filters_adapters.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_partial_stitch.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_plugins_integration.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_plugins_loader.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_plugins_registry.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_reasoning_strip.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_request_log.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_routing_adaptive.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_self_healing.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_setup_sh.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_state_store.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_token_estimation.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_tool_repair.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_translation_anthropic.py +0 -0
- {coderouter_cli-2.5.1 → coderouter_cli-2.5.2}/tests/test_translation_reverse.py +0 -0
|
@@ -6,6 +6,41 @@ versioning follows [SemVer](https://semver.org/).
|
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
## [v2.5.2] — 2026-05-22 (Backend-aware Launcher suggestions + backend install guide)
|
|
10
|
+
|
|
11
|
+
Patch release: a Launcher bug fix and documentation improvements.
|
|
12
|
+
|
|
13
|
+
### Fixed
|
|
14
|
+
|
|
15
|
+
- **Launcher "suggest values" (`⚙ 推奨値`) is now backend-aware.**
|
|
16
|
+
Previously the button emitted llama.cpp flags
|
|
17
|
+
(`-ngl` / `--ctx-size` / `--threads`) for every backend, but vLLM and
|
|
18
|
+
MLX reject those. Now:
|
|
19
|
+
- **llama.cpp** — the flags, as before.
|
|
20
|
+
- **vLLM** — empty; `--max-model-len` etc. depend on the model's real
|
|
21
|
+
context length, so the engine's auto-derivation is left to do its job.
|
|
22
|
+
- **MLX** — empty; it assumes unified memory and takes no launch-time
|
|
23
|
+
tuning flags.
|
|
24
|
+
|
|
25
|
+
Fixed in both the desktop GUI (`launcher_gui.py`) and the Web launcher
|
|
26
|
+
(`coderouter/ingress/launcher_routes.py`); the `/api/launcher/suggest`
|
|
27
|
+
endpoint now accepts a `backend` parameter.
|
|
28
|
+
|
|
29
|
+
### Documentation
|
|
30
|
+
|
|
31
|
+
- New **`docs/backends/install-backends.md`** (+ `.en.md`) — an
|
|
32
|
+
installation guide for llama.cpp / vLLM / MLX covering macOS / Linux /
|
|
33
|
+
Windows, with per-backend verification steps and common pitfalls.
|
|
34
|
+
- **Launcher docs consolidated from 3 files to 2**: `launcher-gui.md` is
|
|
35
|
+
merged into a unified `launcher.md` (Web + Desktop GUI in one guide,
|
|
36
|
+
shared reference documented once); `launcher-quickstart.md` is slimmed
|
|
37
|
+
to delegate installation to the new guide.
|
|
38
|
+
- **Backend venv convention documented**: vLLM / MLX virtual
|
|
39
|
+
environments live under `~/.coderouter/backends/<backend>/`, one venv
|
|
40
|
+
per backend.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
9
44
|
## [v2.5.1] — 2026-05-22 (MLX backend + docs reorganization)
|
|
10
45
|
|
|
11
46
|
Patch release: a third Launcher backend, a reorganized documentation
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderouter-cli
|
|
3
|
-
Version: 2.5.
|
|
3
|
+
Version: 2.5.2
|
|
4
4
|
Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
|
|
5
5
|
Project-URL: Homepage, https://github.com/zephel01/CodeRouter
|
|
6
6
|
Project-URL: Repository, https://github.com/zephel01/CodeRouter
|
|
@@ -253,14 +253,32 @@ def _model_recommendation(size_gb: float, hw: dict[str, Any]) -> dict[str, str]:
|
|
|
253
253
|
return {"level": "warn", "label": "メモリ厳しい"}
|
|
254
254
|
|
|
255
255
|
|
|
256
|
-
def _suggest_launch_flags(
|
|
257
|
-
|
|
258
|
-
|
|
256
|
+
def _suggest_launch_flags(backend: str, size_gb: float,
|
|
257
|
+
hw: dict[str, Any]) -> str:
|
|
258
|
+
"""選択モデル + ハード + バックエンドから推奨起動フラグを提案する。
|
|
259
|
+
|
|
260
|
+
バックエンドごとにフラグ体系が違うため分岐する:
|
|
261
|
+
- llama.cpp : -ngl / --ctx-size / --threads を算出
|
|
262
|
+
- vllm : モデル config からの自動導出に任せる (空文字)
|
|
263
|
+
- mlx : 統合メモリ前提で起動時フラグ不要 (空文字)
|
|
259
264
|
あくまで目安。他プロセスのメモリ使用や量子化方式までは考慮しない。
|
|
260
265
|
"""
|
|
261
|
-
|
|
266
|
+
if backend == "mlx":
|
|
267
|
+
# MLX は統合メモリ + Metal 前提。llama.cpp の -ngl に相当する
|
|
268
|
+
# レイヤーオフロードの概念がなく、mlx_lm.server は起動時の
|
|
269
|
+
# 性能チューニングフラグを取らない。
|
|
270
|
+
return ""
|
|
271
|
+
if backend == "vllm":
|
|
272
|
+
# vllm の --max-model-len はモデルの実コンテキスト長に依存する。
|
|
273
|
+
# メモリ量だけのヒューリスティックで値を出すと、モデルの上限を
|
|
274
|
+
# 超えたときに vllm が起動を拒否する。空にしてエンジンの
|
|
275
|
+
# 自動導出 (モデル config) に任せるのが安全。
|
|
276
|
+
return ""
|
|
277
|
+
|
|
278
|
+
# llama.cpp (デフォルト)
|
|
262
279
|
usable = _usable_memory_gb(hw)
|
|
263
280
|
weights = size_gb * 1.15 # 重み + オーバーヘッド概算
|
|
281
|
+
threads = max(1, int(hw.get("cpu_count", 4)) - 2)
|
|
264
282
|
if hw.get("gpu") == "cpu":
|
|
265
283
|
ngl = 0
|
|
266
284
|
elif usable >= weights + 1.0:
|
|
@@ -620,17 +638,20 @@ async def api_logs(proc_id: str, request: Request, n: int = 100) -> dict[str, An
|
|
|
620
638
|
|
|
621
639
|
|
|
622
640
|
@router.get("/api/launcher/suggest")
|
|
623
|
-
async def api_suggest(model_path: str = ""
|
|
641
|
+
async def api_suggest(model_path: str = "",
|
|
642
|
+
backend: str = "llama.cpp") -> dict[str, Any]:
|
|
624
643
|
"""Suggest launch flags for the given model based on detected hardware.
|
|
625
644
|
|
|
626
645
|
クライアントの「推奨値」ボタンから呼ばれる。値はあくまで目安。
|
|
646
|
+
バックエンドごとにフラグ体系が違うため backend も受け取る。
|
|
627
647
|
"""
|
|
628
648
|
hw = await asyncio.to_thread(_detect_hardware)
|
|
629
649
|
size_gb = 0.0
|
|
630
650
|
if model_path:
|
|
631
651
|
size_gb = await asyncio.to_thread(_model_size_gb, model_path)
|
|
632
652
|
return {
|
|
633
|
-
"extra_args": _suggest_launch_flags(size_gb, hw),
|
|
653
|
+
"extra_args": _suggest_launch_flags(backend, size_gb, hw),
|
|
654
|
+
"backend": backend,
|
|
634
655
|
"hardware": hw,
|
|
635
656
|
"size_gb": round(size_gb, 2),
|
|
636
657
|
}
|
|
@@ -905,14 +926,24 @@ _LAUNCHER_HTML = r"""<!doctype html>
|
|
|
905
926
|
window.suggestOptions = async () => {
|
|
906
927
|
const model = document.getElementById("f-model").value.trim();
|
|
907
928
|
if (!model) { showLaunchErr("先にモデルを選択してください"); return; }
|
|
929
|
+
const backend = document.getElementById("f-backend").value;
|
|
908
930
|
try {
|
|
909
931
|
const r = await fetch("/api/launcher/suggest?model_path="
|
|
910
|
-
+ encodeURIComponent(model)
|
|
932
|
+
+ encodeURIComponent(model)
|
|
933
|
+
+ "&backend=" + encodeURIComponent(backend));
|
|
911
934
|
const d = await r.json();
|
|
912
935
|
if (!r.ok) { showLaunchErr(d.detail || "推奨値の取得に失敗"); return; }
|
|
913
936
|
document.getElementById("f-extra").value = d.extra_args;
|
|
914
937
|
showLaunchErr("");
|
|
915
|
-
|
|
938
|
+
if (d.extra_args) {
|
|
939
|
+
statusMsg("推奨値を設定(目安): " + d.extra_args);
|
|
940
|
+
} else if (backend === "mlx") {
|
|
941
|
+
statusMsg("MLX は起動時の調整フラグ不要です(統合メモリで自動)");
|
|
942
|
+
} else if (backend === "vllm") {
|
|
943
|
+
statusMsg("vllm は起動時フラグ不要です(モデル設定から自動導出)");
|
|
944
|
+
} else {
|
|
945
|
+
statusMsg("このバックエンドは推奨フラグの自動設定対象外です");
|
|
946
|
+
}
|
|
916
947
|
} catch (e) {
|
|
917
948
|
showLaunchErr(e.message);
|
|
918
949
|
}
|
|
@@ -63,9 +63,9 @@ Many documents have a Japanese version (`.md`) and an English version (`.en.md`)
|
|
|
63
63
|
|
|
64
64
|
ローカル推論バックエンドの導入・起動・接続。 / Installing, launching, and connecting local inference backends.
|
|
65
65
|
|
|
66
|
+
- **install-backends** — llama.cpp / vLLM / MLX のインストール手順 / Installing the three backends · [日本語](backends/install-backends.md) · [English](backends/install-backends.en.md)
|
|
66
67
|
- **launcher-quickstart** — バックエンド導入から起動までの最短手順 / Install a backend and launch · [日本語](backends/launcher-quickstart.md)
|
|
67
|
-
- **launcher** —
|
|
68
|
-
- **launcher-gui** — デスクトップ GUI ランチャー / Desktop GUI launcher · [日本語](backends/launcher-gui.md)
|
|
68
|
+
- **launcher** — Launcher ガイド(Web版・デスクトップGUI版) / Launcher guide (Web & Desktop GUI) · [日本語](backends/launcher.md)
|
|
69
69
|
- **llamacpp-direct** — llama.cpp に直結する / Connect llama.cpp directly · [日本語](backends/llamacpp-direct.md) · [English](backends/llamacpp-direct.en.md)
|
|
70
70
|
- **lmstudio-direct** — LM Studio に直結する / Connect LM Studio directly · [日本語](backends/lmstudio-direct.md) · [English](backends/lmstudio-direct.en.md)
|
|
71
71
|
- **hf-ollama-models** — HuggingFace 配布モデルを Ollama で使う / Use HF models via Ollama · [日本語](backends/hf-ollama-models.md)
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# Backend Installation Guide — llama.cpp / vLLM / MLX
|
|
2
|
+
|
|
3
|
+
How to install the three local inference backends that the CodeRouter Launcher starts and manages: **llama.cpp**, **vLLM**, and **MLX**. Installing any one of them is enough to get started.
|
|
4
|
+
|
|
5
|
+
For Launcher configuration and startup after installation, see the [Launcher Quickstart](./launcher-quickstart.md).
|
|
6
|
+
|
|
7
|
+
> 日本語版: [install-backends.md](./install-backends.md)
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Which backend to choose
|
|
12
|
+
|
|
13
|
+
| Backend | OS | Model format | Best for |
|
|
14
|
+
|---|---|---|---|
|
|
15
|
+
| **llama.cpp** | macOS / Linux / Windows | GGUF | Anyone starting out. The most portable and lightweight option |
|
|
16
|
+
| **vLLM** | Linux (NVIDIA CUDA) | Hugging Face (safetensors) | High throughput on Linux + GPU |
|
|
17
|
+
| **MLX** | macOS (Apple Silicon) | MLX format | Fast inference on M-series Macs |
|
|
18
|
+
|
|
19
|
+
**When in doubt, use llama.cpp.** It runs on macOS, Linux, and Windows, has a huge selection of `.gguf` models, and is the lightest to set up. On an Apple Silicon Mac, MLX is a notch faster; on Linux with an NVIDIA GPU, vLLM gives the highest throughput.
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## 1. llama.cpp
|
|
24
|
+
|
|
25
|
+
Provides `llama-server`, which exposes an OpenAI-compatible API.
|
|
26
|
+
|
|
27
|
+
### Supported environment
|
|
28
|
+
|
|
29
|
+
macOS, Linux, and Windows — all three. GPU acceleration uses Metal on macOS and NVIDIA CUDA on Linux/Windows.
|
|
30
|
+
|
|
31
|
+
### Option A — Homebrew (macOS / Linux, easiest)
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
brew install llama.cpp
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
`llama-server` lands on your PATH. Done.
|
|
38
|
+
|
|
39
|
+
### Option B — winget (Windows)
|
|
40
|
+
|
|
41
|
+
```powershell
|
|
42
|
+
winget install ggml.llamacpp
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Option C — Prebuilt binaries (any OS)
|
|
46
|
+
|
|
47
|
+
From the [llama.cpp Releases page](https://github.com/ggml-org/llama.cpp/releases), download the archive matching your OS and backend (CPU / CUDA / Metal) and extract it. Use the `llama-server` inside directly.
|
|
48
|
+
|
|
49
|
+
### Option D — Build from source (latest version / GPU tuning)
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
git clone https://github.com/ggml-org/llama.cpp
|
|
53
|
+
cd llama.cpp
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**macOS (Apple Silicon)** — Metal is enabled by default:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
cmake -B build
|
|
60
|
+
cmake --build build --config Release -j
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
**Linux (NVIDIA CUDA)** — requires the CUDA Toolkit:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
cmake -B build -DGGML_CUDA=ON
|
|
67
|
+
cmake --build build --config Release -j
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
After building, the server binary is at `build/bin/llama-server`. You will point the Launcher at this full path later.
|
|
71
|
+
|
|
72
|
+
### Verify
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
llama-server --version
|
|
76
|
+
# Start with a model, then check connectivity from another terminal
|
|
77
|
+
llama-server -m ./model.gguf --port 8080
|
|
78
|
+
curl http://localhost:8080/v1/models
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Common pitfalls
|
|
82
|
+
|
|
83
|
+
- **CUDA and Metal cannot coexist in one binary.** Build/download for the machine you will run on.
|
|
84
|
+
- If `llama-server` is not found, set the **full path** of the binary (from option B/C/D) in the Launcher's `backends.llama.cpp.binary`.
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## 2. vLLM
|
|
89
|
+
|
|
90
|
+
### Supported environment
|
|
91
|
+
|
|
92
|
+
A high-performance inference server for **Linux + NVIDIA GPU (CUDA)**.
|
|
93
|
+
|
|
94
|
+
- **macOS**: CPU-only backend, not practical. Use llama.cpp or MLX on a Mac.
|
|
95
|
+
- **Windows**: no native support. Run the Linux steps inside WSL2 (Ubuntu).
|
|
96
|
+
|
|
97
|
+
### Install
|
|
98
|
+
|
|
99
|
+
Create the venv under `~/.coderouter/backends/` — the same place as the CodeRouter config — **with a separate venv per backend**. vLLM goes in `~/.coderouter/backends/vllm/` (vLLM and MLX have completely different dependency trees, so always keep their venvs separate). A fixed path lets you write the `binary:` directly in `providers.yaml`.
|
|
100
|
+
|
|
101
|
+
Installation via `uv` (a fast Python environment manager) is recommended:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
uv venv ~/.coderouter/backends/vllm --python 3.12 --seed
|
|
105
|
+
source ~/.coderouter/backends/vllm/bin/activate
|
|
106
|
+
uv pip install vllm --torch-backend=auto
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
`pip` also works:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
python3.12 -m venv ~/.coderouter/backends/vllm
|
|
113
|
+
source ~/.coderouter/backends/vllm/bin/activate
|
|
114
|
+
pip install vllm
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Verify
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
python -c "import vllm; print(vllm.__version__)"
|
|
121
|
+
# Start with a model (first run downloads from Hugging Face)
|
|
122
|
+
python -m vllm.entrypoints.openai.api_server --model Qwen/Qwen2.5-7B-Instruct --port 8080
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
> The modern CLI `vllm serve Qwen/Qwen2.5-7B-Instruct --port 8080` starts the same OpenAI-compatible server. The Launcher uses the environment-independent `python -m vllm.entrypoints.openai.api_server` form (both are the same thing).
|
|
126
|
+
|
|
127
|
+
### Launcher integration
|
|
128
|
+
|
|
129
|
+
The Launcher starts vLLM as `<python> -m vllm.entrypoints.openai.api_server`. Set `backends.vllm.binary` in `providers.yaml` to the python of the venv created above:
|
|
130
|
+
|
|
131
|
+
```yaml
|
|
132
|
+
backends:
|
|
133
|
+
vllm:
|
|
134
|
+
binary: ~/.coderouter/backends/vllm/bin/python
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Common pitfalls
|
|
138
|
+
|
|
139
|
+
- A **CUDA driver / Toolkit version mismatch** can break installation or startup. Check your GPU and driver with `nvidia-smi`.
|
|
140
|
+
- Being slow or non-functional on macOS is expected — use llama.cpp / MLX on a Mac.
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## 3. MLX
|
|
145
|
+
|
|
146
|
+
Provides `mlx_lm.server`, an inference server built on Apple's MLX machine-learning framework. It runs noticeably faster on Apple Silicon Macs.
|
|
147
|
+
|
|
148
|
+
### Supported environment
|
|
149
|
+
|
|
150
|
+
- **macOS 14.0 or later**, **Apple Silicon (M1 or newer)** only.
|
|
151
|
+
- A **native (arm64) Python 3.10+** is required. It will not work with Intel Macs or an x86 Python running under Rosetta.
|
|
152
|
+
|
|
153
|
+
### Install
|
|
154
|
+
|
|
155
|
+
Create the venv at `~/.coderouter/backends/mlx/` (a separate venv from vLLM — one per backend):
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
python3 -m venv ~/.coderouter/backends/mlx
|
|
159
|
+
source ~/.coderouter/backends/mlx/bin/activate
|
|
160
|
+
pip install mlx-lm
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Verify
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
# Confirm a native (arm) Python — should print "arm"
|
|
167
|
+
python -c "import platform; print(platform.processor())"
|
|
168
|
+
# Confirm the install
|
|
169
|
+
python -c "import mlx_lm; print('mlx-lm OK')"
|
|
170
|
+
# Start with a model (first run downloads from Hugging Face)
|
|
171
|
+
mlx_lm.server --model mlx-community/Qwen2.5-7B-Instruct-4bit --port 8080
|
|
172
|
+
curl http://localhost:8080/v1/models
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Note on model format
|
|
176
|
+
|
|
177
|
+
**MLX cannot read GGUF.** `.gguf` files (for llama.cpp) do not work with MLX. Use **MLX-format models**, such as those published by [`mlx-community`](https://huggingface.co/mlx-community) on Hugging Face. Passing a repository ID directly — `mlx_lm.server --model mlx-community/<name>` — downloads it automatically on first use.
|
|
178
|
+
|
|
179
|
+
### Launcher integration
|
|
180
|
+
|
|
181
|
+
The Launcher starts MLX as `<python> -m mlx_lm.server`. Set `backends.mlx.binary` in `providers.yaml` to the python of the venv created above:
|
|
182
|
+
|
|
183
|
+
```yaml
|
|
184
|
+
backends:
|
|
185
|
+
mlx:
|
|
186
|
+
binary: ~/.coderouter/backends/mlx/bin/python
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Common pitfalls
|
|
190
|
+
|
|
191
|
+
- If **`platform.processor()` prints something other than `arm`** (`i386` / `x86_64`), your Python is an x86 build running under Rosetta. In Finder, Get Info on Terminal.app, uncheck "Open using Rosetta", and reinstall a native Python.
|
|
192
|
+
- On macOS older than 14.0, the PyPI package cannot be installed — update your OS.
|
|
193
|
+
- The "suggest values" button's flags such as `-ngl` are llama.cpp-only. MLX assumes unified memory and needs no launch-time tuning flags.
|
|
194
|
+
|
|
195
|
+
---
|
|
196
|
+
|
|
197
|
+
## After installation — start it from the Launcher
|
|
198
|
+
|
|
199
|
+
Once a backend is installed, you can pick a model and start it from the CodeRouter Launcher. The `launcher:` block in `providers.yaml`, plus the full path from Launcher startup to connecting Claude Code, is covered in the [Launcher Quickstart](./launcher-quickstart.md).
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
## Related documents
|
|
204
|
+
|
|
205
|
+
- [Launcher Quickstart](./launcher-quickstart.md) — configuration and startup after installation
|
|
206
|
+
- [Launcher Guide (Web & Desktop GUI)](./launcher.md)
|
|
207
|
+
- [llama.cpp direct connection guide](./llamacpp-direct.en.md)
|
|
208
|
+
- [CodeRouter Quickstart](../start/quickstart.en.md)
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# バックエンド インストール手順書 — llama.cpp / vLLM / MLX
|
|
2
|
+
|
|
3
|
+
CodeRouter Launcher が起動・管理する 3 つのローカル推論バックエンド ── **llama.cpp** / **vLLM** / **MLX** ── の導入手順です。いずれか 1 つを入れれば始められます。
|
|
4
|
+
|
|
5
|
+
導入後の Launcher 設定・起動については [Launcher クイックスタート](./launcher-quickstart.md) を参照してください。
|
|
6
|
+
|
|
7
|
+
> English version: [install-backends.en.md](./install-backends.en.md)
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## どのバックエンドを選ぶか
|
|
12
|
+
|
|
13
|
+
| バックエンド | 対応 OS | モデル形式 | 向いている人 |
|
|
14
|
+
|---|---|---|---|
|
|
15
|
+
| **llama.cpp** | macOS / Linux / Windows | GGUF | まず試したい人。最も汎用的で軽量 |
|
|
16
|
+
| **vLLM** | Linux (NVIDIA CUDA) | Hugging Face (safetensors) | Linux + GPU で高スループットを出したい人 |
|
|
17
|
+
| **MLX** | macOS (Apple Silicon) | MLX 形式 | M シリーズ Mac で速く動かしたい人 |
|
|
18
|
+
|
|
19
|
+
**迷ったら llama.cpp。** macOS・Linux・Windows のどれでも動き、`.gguf` モデルが豊富で、セットアップが最も軽量です。Apple Silicon の Mac なら MLX が一段速く、Linux + NVIDIA GPU なら vLLM が高スループットです。
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## 1. llama.cpp
|
|
24
|
+
|
|
25
|
+
OpenAI 互換 API を提供する `llama-server` を用意します。
|
|
26
|
+
|
|
27
|
+
### 対応環境
|
|
28
|
+
|
|
29
|
+
macOS / Linux / Windows のすべて。GPU は macOS で Metal、Linux/Windows で NVIDIA CUDA に対応します。
|
|
30
|
+
|
|
31
|
+
### 方法 A — Homebrew(macOS / Linux、最も簡単)
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
brew install llama.cpp
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
`llama-server` が PATH に入ります。これで完了です。
|
|
38
|
+
|
|
39
|
+
### 方法 B — winget(Windows)
|
|
40
|
+
|
|
41
|
+
```powershell
|
|
42
|
+
winget install ggml.llamacpp
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### 方法 C — プレビルドバイナリ(全 OS)
|
|
46
|
+
|
|
47
|
+
[llama.cpp の Releases ページ](https://github.com/ggml-org/llama.cpp/releases) から、OS とバックエンド(CPU / CUDA / Metal)に合ったアーカイブをダウンロードして展開します。中の `llama-server` をそのまま使えます。
|
|
48
|
+
|
|
49
|
+
### 方法 D — ソースからビルド(最新版・GPU 最適化したい場合)
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
git clone https://github.com/ggml-org/llama.cpp
|
|
53
|
+
cd llama.cpp
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**macOS (Apple Silicon)** — Metal は既定で有効:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
cmake -B build
|
|
60
|
+
cmake --build build --config Release -j
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
**Linux (NVIDIA CUDA)** — CUDA Toolkit が必要:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
cmake -B build -DGGML_CUDA=ON
|
|
67
|
+
cmake --build build --config Release -j
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
ビルド後、サーバーバイナリは `build/bin/llama-server` に生成されます。このフルパスを後で Launcher に設定します。
|
|
71
|
+
|
|
72
|
+
### 動作確認
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
llama-server --version
|
|
76
|
+
# モデルを指定して起動 → 別ターミナルで疎通確認
|
|
77
|
+
llama-server -m ./model.gguf --port 8080
|
|
78
|
+
curl http://localhost:8080/v1/models
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### よくあるつまずき
|
|
82
|
+
|
|
83
|
+
- **CUDA と Metal は同一バイナリに同梱できません。** 実行マシンに合わせてビルド/ダウンロードしてください。
|
|
84
|
+
- `llama-server` が見つからない場合は、方法 B/C/D で入れたバイナリの**フルパス**を Launcher の `backends.llama.cpp.binary` に設定します。
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## 2. vLLM
|
|
89
|
+
|
|
90
|
+
### 対応環境
|
|
91
|
+
|
|
92
|
+
**Linux + NVIDIA GPU (CUDA)** 向けの高速推論サーバーです。
|
|
93
|
+
|
|
94
|
+
- **macOS**: CPU バックエンドのみで実用的ではありません。Mac では llama.cpp か MLX を使ってください。
|
|
95
|
+
- **Windows**: ネイティブ対応はありません。WSL2(Ubuntu)上で Linux 手順を実行してください。
|
|
96
|
+
|
|
97
|
+
### インストール
|
|
98
|
+
|
|
99
|
+
venv は CodeRouter 設定と同じ `~/.coderouter/backends/` 配下に、**バックエンドごとに分けて**作ります。vLLM は `~/.coderouter/backends/vllm/` です(vLLM と MLX は依存関係がまったく違うため、venv は必ず分けます)。場所を固定すると `providers.yaml` の `binary:` にそのまま書けます。
|
|
100
|
+
|
|
101
|
+
`uv`(高速な Python 環境管理ツール)での導入が推奨です:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
uv venv ~/.coderouter/backends/vllm --python 3.12 --seed
|
|
105
|
+
source ~/.coderouter/backends/vllm/bin/activate
|
|
106
|
+
uv pip install vllm --torch-backend=auto
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
`pip` でも可:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
python3.12 -m venv ~/.coderouter/backends/vllm
|
|
113
|
+
source ~/.coderouter/backends/vllm/bin/activate
|
|
114
|
+
pip install vllm
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### 動作確認
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
python -c "import vllm; print(vllm.__version__)"
|
|
121
|
+
# モデルを指定して起動(初回は Hugging Face からダウンロード)
|
|
122
|
+
python -m vllm.entrypoints.openai.api_server --model Qwen/Qwen2.5-7B-Instruct --port 8080
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
> 新しい CLI の `vllm serve Qwen/Qwen2.5-7B-Instruct --port 8080` でも同じ OpenAI 互換サーバーが起動します。Launcher は環境に依存しない `python -m vllm.entrypoints.openai.api_server` 形式を使います(どちらも実体は同じ)。
|
|
126
|
+
|
|
127
|
+
### Launcher との連携
|
|
128
|
+
|
|
129
|
+
Launcher は vLLM を `<python> -m vllm.entrypoints.openai.api_server` の形で起動します。`providers.yaml` の `backends.vllm.binary` には、上で作った venv の python を指定します:
|
|
130
|
+
|
|
131
|
+
```yaml
|
|
132
|
+
backends:
|
|
133
|
+
vllm:
|
|
134
|
+
binary: ~/.coderouter/backends/vllm/bin/python
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### よくあるつまずき
|
|
138
|
+
|
|
139
|
+
- **CUDA ドライバ / Toolkit のバージョン不一致**でインストールや起動に失敗することがあります。`nvidia-smi` で GPU とドライバを確認してください。
|
|
140
|
+
- macOS で遅い・動かないのは仕様です。Mac では llama.cpp / MLX を使ってください。
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## 3. MLX
|
|
145
|
+
|
|
146
|
+
Apple 製の機械学習フレームワーク MLX を使った推論サーバー `mlx_lm.server` を用意します。Apple Silicon の Mac で一段速く動きます。
|
|
147
|
+
|
|
148
|
+
### 対応環境
|
|
149
|
+
|
|
150
|
+
- **macOS 14.0 以降**、**Apple Silicon (M1 以降)** のみ。
|
|
151
|
+
- **ネイティブ(arm64)の Python 3.10 以降**が必要です。Intel Mac・Rosetta 経由の x86 Python では動きません。
|
|
152
|
+
|
|
153
|
+
### インストール
|
|
154
|
+
|
|
155
|
+
venv は `~/.coderouter/backends/mlx/` に作ります(vLLM とは別の venv。バックエンドごとに分けます):
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
python3 -m venv ~/.coderouter/backends/mlx
|
|
159
|
+
source ~/.coderouter/backends/mlx/bin/activate
|
|
160
|
+
pip install mlx-lm
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### 動作確認
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
# native (arm) Python であることを確認 — "arm" と出れば OK
|
|
167
|
+
python -c "import platform; print(platform.processor())"
|
|
168
|
+
# インストール確認
|
|
169
|
+
python -c "import mlx_lm; print('mlx-lm OK')"
|
|
170
|
+
# モデルを指定して起動(初回は Hugging Face からダウンロード)
|
|
171
|
+
mlx_lm.server --model mlx-community/Qwen2.5-7B-Instruct-4bit --port 8080
|
|
172
|
+
curl http://localhost:8080/v1/models
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### モデル形式の注意
|
|
176
|
+
|
|
177
|
+
**MLX は GGUF を読めません。** llama.cpp 用の `.gguf` ファイルは MLX では使えません。Hugging Face の [`mlx-community`](https://huggingface.co/mlx-community) が配布する **MLX 形式のモデル**を使ってください。`mlx_lm.server --model mlx-community/<モデル名>` のようにリポジトリ ID を直接指定すると、初回に自動ダウンロードされます。
|
|
178
|
+
|
|
179
|
+
### Launcher との連携
|
|
180
|
+
|
|
181
|
+
Launcher は MLX を `<python> -m mlx_lm.server` の形で起動します。`providers.yaml` の `backends.mlx.binary` には、上で作った venv の python を指定します:
|
|
182
|
+
|
|
183
|
+
```yaml
|
|
184
|
+
backends:
|
|
185
|
+
mlx:
|
|
186
|
+
binary: ~/.coderouter/backends/mlx/bin/python
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### よくあるつまずき
|
|
190
|
+
|
|
191
|
+
- **`platform.processor()` が `arm` 以外**(`i386` / `x86_64`)の場合、Rosetta 経由の x86 Python です。ターミナル.app を Finder で「情報を見る」→「Rosetta を使用して開く」のチェックを外し、ネイティブの Python を入れ直してください。
|
|
192
|
+
- macOS が 14.0 未満では PyPI 版がインストールできません。OS を更新してください。
|
|
193
|
+
- 推奨値ボタンの `-ngl` などは llama.cpp 専用フラグです。MLX は統合メモリ前提のため起動時の調整フラグは不要です。
|
|
194
|
+
|
|
195
|
+
---
|
|
196
|
+
|
|
197
|
+
## インストール後 — Launcher で起動する
|
|
198
|
+
|
|
199
|
+
バックエンドが入ったら、CodeRouter Launcher からモデルを選んで起動できます。`providers.yaml` の `launcher:` ブロック設定と Launcher の起動・Claude Code 接続までの通し手順は、[Launcher クイックスタート](./launcher-quickstart.md) にまとめてあります。
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
## 関連ドキュメント
|
|
204
|
+
|
|
205
|
+
- [Launcher クイックスタート](./launcher-quickstart.md) — 導入後の設定〜起動
|
|
206
|
+
- [Launcher ガイド(Web版・デスクトップGUI版)](./launcher.md)
|
|
207
|
+
- [llama.cpp 直接接続ガイド](./llamacpp-direct.md)
|
|
208
|
+
- [CodeRouter クイックスタート](../start/quickstart.md)
|