coderouter-cli 2.5.1__py3-none-any.whl → 2.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/ingress/launcher_routes.py +39 -8
- {coderouter_cli-2.5.1.dist-info → coderouter_cli-2.5.2.dist-info}/METADATA +1 -1
- {coderouter_cli-2.5.1.dist-info → coderouter_cli-2.5.2.dist-info}/RECORD +6 -6
- {coderouter_cli-2.5.1.dist-info → coderouter_cli-2.5.2.dist-info}/WHEEL +0 -0
- {coderouter_cli-2.5.1.dist-info → coderouter_cli-2.5.2.dist-info}/entry_points.txt +0 -0
- {coderouter_cli-2.5.1.dist-info → coderouter_cli-2.5.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -253,14 +253,32 @@ def _model_recommendation(size_gb: float, hw: dict[str, Any]) -> dict[str, str]:
|
|
|
253
253
|
return {"level": "warn", "label": "メモリ厳しい"}
|
|
254
254
|
|
|
255
255
|
|
|
256
|
-
def _suggest_launch_flags(
|
|
257
|
-
|
|
258
|
-
|
|
256
|
+
def _suggest_launch_flags(backend: str, size_gb: float,
|
|
257
|
+
hw: dict[str, Any]) -> str:
|
|
258
|
+
"""選択モデル + ハード + バックエンドから推奨起動フラグを提案する。
|
|
259
|
+
|
|
260
|
+
バックエンドごとにフラグ体系が違うため分岐する:
|
|
261
|
+
- llama.cpp : -ngl / --ctx-size / --threads を算出
|
|
262
|
+
- vllm : モデル config からの自動導出に任せる (空文字)
|
|
263
|
+
- mlx : 統合メモリ前提で起動時フラグ不要 (空文字)
|
|
259
264
|
あくまで目安。他プロセスのメモリ使用や量子化方式までは考慮しない。
|
|
260
265
|
"""
|
|
261
|
-
|
|
266
|
+
if backend == "mlx":
|
|
267
|
+
# MLX は統合メモリ + Metal 前提。llama.cpp の -ngl に相当する
|
|
268
|
+
# レイヤーオフロードの概念がなく、mlx_lm.server は起動時の
|
|
269
|
+
# 性能チューニングフラグを取らない。
|
|
270
|
+
return ""
|
|
271
|
+
if backend == "vllm":
|
|
272
|
+
# vllm の --max-model-len はモデルの実コンテキスト長に依存する。
|
|
273
|
+
# メモリ量だけのヒューリスティックで値を出すと、モデルの上限を
|
|
274
|
+
# 超えたときに vllm が起動を拒否する。空にしてエンジンの
|
|
275
|
+
# 自動導出 (モデル config) に任せるのが安全。
|
|
276
|
+
return ""
|
|
277
|
+
|
|
278
|
+
# llama.cpp (デフォルト)
|
|
262
279
|
usable = _usable_memory_gb(hw)
|
|
263
280
|
weights = size_gb * 1.15 # 重み + オーバーヘッド概算
|
|
281
|
+
threads = max(1, int(hw.get("cpu_count", 4)) - 2)
|
|
264
282
|
if hw.get("gpu") == "cpu":
|
|
265
283
|
ngl = 0
|
|
266
284
|
elif usable >= weights + 1.0:
|
|
@@ -620,17 +638,20 @@ async def api_logs(proc_id: str, request: Request, n: int = 100) -> dict[str, An
|
|
|
620
638
|
|
|
621
639
|
|
|
622
640
|
@router.get("/api/launcher/suggest")
|
|
623
|
-
async def api_suggest(model_path: str = ""
|
|
641
|
+
async def api_suggest(model_path: str = "",
|
|
642
|
+
backend: str = "llama.cpp") -> dict[str, Any]:
|
|
624
643
|
"""Suggest launch flags for the given model based on detected hardware.
|
|
625
644
|
|
|
626
645
|
クライアントの「推奨値」ボタンから呼ばれる。値はあくまで目安。
|
|
646
|
+
バックエンドごとにフラグ体系が違うため backend も受け取る。
|
|
627
647
|
"""
|
|
628
648
|
hw = await asyncio.to_thread(_detect_hardware)
|
|
629
649
|
size_gb = 0.0
|
|
630
650
|
if model_path:
|
|
631
651
|
size_gb = await asyncio.to_thread(_model_size_gb, model_path)
|
|
632
652
|
return {
|
|
633
|
-
"extra_args": _suggest_launch_flags(size_gb, hw),
|
|
653
|
+
"extra_args": _suggest_launch_flags(backend, size_gb, hw),
|
|
654
|
+
"backend": backend,
|
|
634
655
|
"hardware": hw,
|
|
635
656
|
"size_gb": round(size_gb, 2),
|
|
636
657
|
}
|
|
@@ -905,14 +926,24 @@ _LAUNCHER_HTML = r"""<!doctype html>
|
|
|
905
926
|
window.suggestOptions = async () => {
|
|
906
927
|
const model = document.getElementById("f-model").value.trim();
|
|
907
928
|
if (!model) { showLaunchErr("先にモデルを選択してください"); return; }
|
|
929
|
+
const backend = document.getElementById("f-backend").value;
|
|
908
930
|
try {
|
|
909
931
|
const r = await fetch("/api/launcher/suggest?model_path="
|
|
910
|
-
+ encodeURIComponent(model)
|
|
932
|
+
+ encodeURIComponent(model)
|
|
933
|
+
+ "&backend=" + encodeURIComponent(backend));
|
|
911
934
|
const d = await r.json();
|
|
912
935
|
if (!r.ok) { showLaunchErr(d.detail || "推奨値の取得に失敗"); return; }
|
|
913
936
|
document.getElementById("f-extra").value = d.extra_args;
|
|
914
937
|
showLaunchErr("");
|
|
915
|
-
|
|
938
|
+
if (d.extra_args) {
|
|
939
|
+
statusMsg("推奨値を設定(目安): " + d.extra_args);
|
|
940
|
+
} else if (backend === "mlx") {
|
|
941
|
+
statusMsg("MLX は起動時の調整フラグ不要です(統合メモリで自動)");
|
|
942
|
+
} else if (backend === "vllm") {
|
|
943
|
+
statusMsg("vllm は起動時フラグ不要です(モデル設定から自動導出)");
|
|
944
|
+
} else {
|
|
945
|
+
statusMsg("このバックエンドは推奨フラグの自動設定対象外です");
|
|
946
|
+
}
|
|
916
947
|
} catch (e) {
|
|
917
948
|
showLaunchErr(e.message);
|
|
918
949
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderouter-cli
|
|
3
|
-
Version: 2.5.
|
|
3
|
+
Version: 2.5.2
|
|
4
4
|
Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
|
|
5
5
|
Project-URL: Homepage, https://github.com/zephel01/CodeRouter
|
|
6
6
|
Project-URL: Repository, https://github.com/zephel01/CodeRouter
|
|
@@ -36,7 +36,7 @@ coderouter/ingress/__init__.py,sha256=WQsCH2CGJCAhy0mS6GSEdeYZRkkQu2OHDsP4CJWTLu
|
|
|
36
36
|
coderouter/ingress/anthropic_routes.py,sha256=It2f7XGe3fgKQX01J2F5JOCoZr96t_Tx_kY2om99MVo,16894
|
|
37
37
|
coderouter/ingress/app.py,sha256=PcuTvUFNjr04EbsUOu8qdyKTdBzxkIJYB4xpz8dFfMo,12635
|
|
38
38
|
coderouter/ingress/dashboard_routes.py,sha256=rscoj89weHTfc8QmYk-fof-7062rhKFHVHRA8cDImDI,21931
|
|
39
|
-
coderouter/ingress/launcher_routes.py,sha256=
|
|
39
|
+
coderouter/ingress/launcher_routes.py,sha256=Jh-E6qFmHnr7ON4W6QanafxQIoojT4F034mybLvhTyQ,47548
|
|
40
40
|
coderouter/ingress/metrics_routes.py,sha256=M22dwOGn24P05Ge4W3c7d7mYytSGWjIR-pPSPOAiHJY,3965
|
|
41
41
|
coderouter/ingress/openai_routes.py,sha256=Zw1efPw9DI6GgV8ZcLrzS6Cda0KLrFkKn2GBZWSe6Vo,6322
|
|
42
42
|
coderouter/metrics/__init__.py,sha256=7Es351DPS7yLM0yVF_F0eesmiD83n7Zzhie44chht38,1465
|
|
@@ -62,8 +62,8 @@ coderouter/translation/__init__.py,sha256=PYXN7XVEwpG1uC8RLy6fvnGbzEZhhrEuUapH8I
|
|
|
62
62
|
coderouter/translation/anthropic.py,sha256=JpvIWNXHUPVqOGvps7o_6ZADhXuJuvpU7RdMqQFtwwM,6421
|
|
63
63
|
coderouter/translation/convert.py,sha256=-qyzFzmmr9hhQV6_Sg75kJnvCZvHe3n7vRdaZtk_JqQ,47269
|
|
64
64
|
coderouter/translation/tool_repair.py,sha256=Ok2PF947Liegc5oaytfptv5MWMkpfJYQie-zdP1y3cY,9946
|
|
65
|
-
coderouter_cli-2.5.
|
|
66
|
-
coderouter_cli-2.5.
|
|
67
|
-
coderouter_cli-2.5.
|
|
68
|
-
coderouter_cli-2.5.
|
|
69
|
-
coderouter_cli-2.5.
|
|
65
|
+
coderouter_cli-2.5.2.dist-info/METADATA,sha256=FIs0I95zZ_y40oz1Vby5cFmlZMABs559jQyBBgikMq8,11521
|
|
66
|
+
coderouter_cli-2.5.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
67
|
+
coderouter_cli-2.5.2.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
|
|
68
|
+
coderouter_cli-2.5.2.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
|
|
69
|
+
coderouter_cli-2.5.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|