coderouter-cli 2.3.0a4__py3-none-any.whl → 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/cli.py +31 -0
- coderouter/config/schemas.py +157 -0
- coderouter/guards/__init__.py +2 -0
- coderouter/guards/_fingerprint.py +125 -0
- coderouter/guards/drift_detection.py +55 -0
- coderouter/ingress/app.py +11 -0
- coderouter/ingress/dashboard_routes.py +1 -0
- coderouter/ingress/launcher_routes.py +1176 -0
- coderouter/routing/fallback.py +33 -3
- coderouter/state/__init__.py +15 -11
- coderouter/state/suggest_rules.py +413 -0
- {coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/METADATA +36 -4
- {coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/RECORD +16 -13
- {coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/WHEEL +0 -0
- {coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/entry_points.txt +0 -0
- {coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1176 @@
|
|
|
1
|
+
"""Launcher routes — ``GET /launcher`` + ``/api/launcher/*``.
|
|
2
|
+
|
|
3
|
+
llama.cpp / vllm プロセス管理 UI。
|
|
4
|
+
|
|
5
|
+
設計方針:
|
|
6
|
+
- ダッシュボードと同じ "1ファイル完結" スタイル (Tailwind CDN + inline JS)
|
|
7
|
+
- プロセスレジストリは app.state.launcher に持たせる (再起動で消えるが意図通り)
|
|
8
|
+
- option_profiles は providers.yaml の launcher: セクションで管理 → コード変更不要で拡張可
|
|
9
|
+
- 複数プロセスの同時起動に対応 (UUID ベースの ID 管理)
|
|
10
|
+
- llama.cpp / vllm どちらも同じ key-value args スキーマで統一
|
|
11
|
+
|
|
12
|
+
エンドポイント:
|
|
13
|
+
GET /launcher → HTML UI
|
|
14
|
+
GET /api/launcher/models → model_dirs をスキャンしてリスト返却
|
|
15
|
+
GET /api/launcher/option-profiles → providers.yaml の option_profiles を返却
|
|
16
|
+
GET /api/launcher/processes → 起動中・停止済みプロセス一覧
|
|
17
|
+
POST /api/launcher/start → プロセス起動
|
|
18
|
+
POST /api/launcher/stop/{id} → プロセス停止 (SIGTERM → SIGKILL)
|
|
19
|
+
DELETE /api/launcher/processes/{id} → レジストリから削除 (停止済みのみ)
|
|
20
|
+
GET /api/launcher/logs/{id} → ログ最新 N 行
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import asyncio
|
|
26
|
+
import contextlib
|
|
27
|
+
import os
|
|
28
|
+
import platform
|
|
29
|
+
import shlex
|
|
30
|
+
import shutil
|
|
31
|
+
import subprocess
|
|
32
|
+
import uuid
|
|
33
|
+
from collections import deque
|
|
34
|
+
from dataclasses import dataclass, field
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
from typing import Any
|
|
37
|
+
|
|
38
|
+
from fastapi import APIRouter, HTTPException, Request
|
|
39
|
+
from fastapi.responses import HTMLResponse
|
|
40
|
+
from pydantic import BaseModel
|
|
41
|
+
|
|
42
|
+
router = APIRouter()
|
|
43
|
+
|
|
44
|
+
# 背景タスクへの強参照を保持する (create_task の戻り値が GC されるのを防ぐ)
|
|
45
|
+
_background_tasks: set[asyncio.Task[Any]] = set()
|
|
46
|
+
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
# Model file extensions to scan
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
_MODEL_EXTS = {".gguf", ".ggml", ".safetensors", ".bin", ".pt", ".pth"}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
# ManagedProcess — 起動したプロセスの状態を保持
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class ManagedProcess:
|
|
61
|
+
"""Running or stopped backend process entry."""
|
|
62
|
+
|
|
63
|
+
id: str
|
|
64
|
+
name: str
|
|
65
|
+
backend: str # "llama.cpp" | "vllm"
|
|
66
|
+
model_path: str
|
|
67
|
+
port: int
|
|
68
|
+
options: dict[str, Any]
|
|
69
|
+
extra_args: str
|
|
70
|
+
status: str = "starting" # "starting" | "running" | "stopped" | "error"
|
|
71
|
+
pid: int | None = None
|
|
72
|
+
returncode: int | None = None
|
|
73
|
+
log_tail: deque = field(default_factory=lambda: deque(maxlen=200))
|
|
74
|
+
# asyncio subprocess handle — not serialised
|
|
75
|
+
_proc: Any = field(default=None, repr=False, compare=False)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
# LauncherRegistry — app.state に格納するレジストリ
|
|
80
|
+
# ---------------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class LauncherRegistry:
|
|
84
|
+
"""In-process registry for ManagedProcess instances."""
|
|
85
|
+
|
|
86
|
+
def __init__(self) -> None:
|
|
87
|
+
self._procs: dict[str, ManagedProcess] = {}
|
|
88
|
+
|
|
89
|
+
def get(self, proc_id: str) -> ManagedProcess:
|
|
90
|
+
try:
|
|
91
|
+
return self._procs[proc_id]
|
|
92
|
+
except KeyError:
|
|
93
|
+
raise KeyError(proc_id) from None
|
|
94
|
+
|
|
95
|
+
def add(self, proc: ManagedProcess) -> None:
|
|
96
|
+
self._procs[proc.id] = proc
|
|
97
|
+
|
|
98
|
+
def remove(self, proc_id: str) -> None:
|
|
99
|
+
del self._procs[proc_id]
|
|
100
|
+
|
|
101
|
+
def all(self) -> list[ManagedProcess]:
|
|
102
|
+
return list(self._procs.values())
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _registry(request: Request) -> LauncherRegistry:
|
|
106
|
+
"""Get or create the LauncherRegistry on app.state."""
|
|
107
|
+
if not hasattr(request.app.state, "launcher"):
|
|
108
|
+
request.app.state.launcher = LauncherRegistry()
|
|
109
|
+
return request.app.state.launcher
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
# Model scanning
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _scan_models(model_dirs: list[str]) -> list[dict[str, Any]]:
|
|
118
|
+
"""Walk model_dirs and return metadata for each discovered model file."""
|
|
119
|
+
found: list[dict[str, Any]] = []
|
|
120
|
+
for raw in model_dirs:
|
|
121
|
+
base = Path(raw).expanduser().resolve()
|
|
122
|
+
if not base.exists():
|
|
123
|
+
continue
|
|
124
|
+
for p in sorted(base.rglob("*")):
|
|
125
|
+
if not p.is_file():
|
|
126
|
+
continue
|
|
127
|
+
if p.suffix.lower() not in _MODEL_EXTS:
|
|
128
|
+
continue
|
|
129
|
+
try:
|
|
130
|
+
size = p.stat().st_size
|
|
131
|
+
except OSError:
|
|
132
|
+
continue
|
|
133
|
+
found.append(
|
|
134
|
+
{
|
|
135
|
+
"path": str(p),
|
|
136
|
+
"name": p.name,
|
|
137
|
+
"dir": str(p.parent),
|
|
138
|
+
"size_gb": round(size / (1024**3), 2),
|
|
139
|
+
"ext": p.suffix.lower(),
|
|
140
|
+
}
|
|
141
|
+
)
|
|
142
|
+
return found
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# ---------------------------------------------------------------------------
|
|
146
|
+
# Command builder
|
|
147
|
+
# ---------------------------------------------------------------------------
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
_BACKEND_DEFAULTS: dict[str, str] = {
|
|
151
|
+
"llama.cpp": "llama-server",
|
|
152
|
+
"vllm": "python",
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _resolve_binary(backend: str, configured: str | None) -> str:
|
|
157
|
+
"""Return the executable to use, expanding ~ and env vars."""
|
|
158
|
+
raw = configured or _BACKEND_DEFAULTS.get(backend, backend)
|
|
159
|
+
return str(Path(raw).expanduser())
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _resolve_backends_sync(
|
|
163
|
+
backends_cfg: dict[str, Any] | None,
|
|
164
|
+
) -> dict[str, dict[str, Any]]:
|
|
165
|
+
"""Resolve binary paths and check availability for every backend.
|
|
166
|
+
|
|
167
|
+
Performs blocking filesystem I/O (``is_file`` / ``shutil.which``),
|
|
168
|
+
so async-route callers must invoke it via ``asyncio.to_thread``.
|
|
169
|
+
"""
|
|
170
|
+
result: dict[str, dict[str, Any]] = {}
|
|
171
|
+
for backend, default_bin in _BACKEND_DEFAULTS.items():
|
|
172
|
+
configured: str | None = None
|
|
173
|
+
if backends_cfg:
|
|
174
|
+
bc = backends_cfg.get(backend)
|
|
175
|
+
if bc and bc.binary:
|
|
176
|
+
configured = bc.binary
|
|
177
|
+
resolved = _resolve_binary(backend, configured)
|
|
178
|
+
expanded = str(Path(resolved).expanduser())
|
|
179
|
+
found = (
|
|
180
|
+
Path(expanded).is_file() # フルパス指定でファイルが存在
|
|
181
|
+
or shutil.which(expanded) is not None # PATH から解決可能
|
|
182
|
+
)
|
|
183
|
+
result[backend] = {
|
|
184
|
+
"resolved": resolved,
|
|
185
|
+
"configured": configured or "",
|
|
186
|
+
"default": default_bin,
|
|
187
|
+
"is_custom": configured is not None,
|
|
188
|
+
"found": found,
|
|
189
|
+
}
|
|
190
|
+
return result
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# ---------------------------------------------------------------------------
|
|
194
|
+
# Hardware detection + model recommendation (luna-go /models 互換の発想)
|
|
195
|
+
# ---------------------------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _detect_hardware() -> dict[str, Any]:
|
|
199
|
+
"""ハードウェアを best-effort で検出する。
|
|
200
|
+
|
|
201
|
+
ブロッキング I/O (sysctl / nvidia-smi) を含むため、async ルートからは
|
|
202
|
+
``asyncio.to_thread`` 経由で呼ぶこと。
|
|
203
|
+
"""
|
|
204
|
+
cpu = os.cpu_count() or 4
|
|
205
|
+
ram_gb = 0.0
|
|
206
|
+
with contextlib.suppress(ValueError, OSError, AttributeError):
|
|
207
|
+
ram_gb = (os.sysconf("SC_PHYS_PAGES")
|
|
208
|
+
* os.sysconf("SC_PAGE_SIZE") / (1024 ** 3))
|
|
209
|
+
if ram_gb <= 0:
|
|
210
|
+
try:
|
|
211
|
+
out = subprocess.run(["sysctl", "-n", "hw.memsize"],
|
|
212
|
+
capture_output=True, text=True, timeout=3)
|
|
213
|
+
ram_gb = int(out.stdout.strip()) / (1024 ** 3)
|
|
214
|
+
except (ValueError, OSError, subprocess.SubprocessError):
|
|
215
|
+
pass
|
|
216
|
+
gpu, vram_gb = "cpu", 0.0
|
|
217
|
+
if platform.system() == "Darwin" and platform.machine() == "arm64":
|
|
218
|
+
gpu, vram_gb = "metal", ram_gb # ユニファイドメモリ
|
|
219
|
+
elif shutil.which("nvidia-smi"):
|
|
220
|
+
try:
|
|
221
|
+
out = subprocess.run(
|
|
222
|
+
["nvidia-smi", "--query-gpu=memory.total",
|
|
223
|
+
"--format=csv,noheader,nounits"],
|
|
224
|
+
capture_output=True, text=True, timeout=5)
|
|
225
|
+
mb = max((int(x) for x in out.stdout.split() if x.strip().isdigit()),
|
|
226
|
+
default=0)
|
|
227
|
+
if mb > 0:
|
|
228
|
+
gpu, vram_gb = "cuda", mb / 1024
|
|
229
|
+
except (ValueError, OSError, subprocess.SubprocessError):
|
|
230
|
+
pass
|
|
231
|
+
return {"ram_gb": round(ram_gb, 1), "vram_gb": round(vram_gb, 1),
|
|
232
|
+
"gpu": gpu, "cpu_count": cpu}
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _usable_memory_gb(hw: dict[str, Any]) -> float:
|
|
236
|
+
"""モデルの重み + KV キャッシュに使えるメモリ量。"""
|
|
237
|
+
if hw.get("gpu") == "cuda":
|
|
238
|
+
return float(hw.get("vram_gb", 0.0))
|
|
239
|
+
return float(hw.get("ram_gb", 0.0)) # metal (ユニファイド) / cpu
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _model_recommendation(size_gb: float, hw: dict[str, Any]) -> dict[str, str]:
|
|
243
|
+
"""モデル単位のメモリ適合判定 (luna-go /models 相当)。
|
|
244
|
+
|
|
245
|
+
level: "ok" (推奨) | "warn" (メモリ厳しい) | "unknown"
|
|
246
|
+
"""
|
|
247
|
+
usable = _usable_memory_gb(hw)
|
|
248
|
+
if usable <= 0 or size_gb <= 0:
|
|
249
|
+
return {"level": "unknown", "label": "—"}
|
|
250
|
+
if size_gb * 1.2 + 2.0 <= usable:
|
|
251
|
+
return {"level": "ok", "label": "推奨"}
|
|
252
|
+
return {"level": "warn", "label": "メモリ厳しい"}
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _suggest_launch_flags(size_gb: float, hw: dict[str, Any]) -> str:
|
|
256
|
+
"""選択モデル + ハードから -ngl / --ctx-size / --threads を提案する。
|
|
257
|
+
|
|
258
|
+
あくまで目安。他プロセスのメモリ使用や量子化方式までは考慮しない。
|
|
259
|
+
"""
|
|
260
|
+
threads = max(1, int(hw.get("cpu_count", 4)) - 2)
|
|
261
|
+
usable = _usable_memory_gb(hw)
|
|
262
|
+
weights = size_gb * 1.15 # 重み + オーバーヘッド概算
|
|
263
|
+
if hw.get("gpu") == "cpu":
|
|
264
|
+
ngl = 0
|
|
265
|
+
elif usable >= weights + 1.0:
|
|
266
|
+
ngl = 99 # 全レイヤー GPU に載る
|
|
267
|
+
elif usable > 1.5:
|
|
268
|
+
ngl = max(0, min(99, int(99 * (usable - 0.7) / max(weights, 0.1))))
|
|
269
|
+
else:
|
|
270
|
+
ngl = 0
|
|
271
|
+
headroom = usable - weights - 1.0
|
|
272
|
+
if headroom >= 8:
|
|
273
|
+
ctx = 32768
|
|
274
|
+
elif headroom >= 4:
|
|
275
|
+
ctx = 16384
|
|
276
|
+
elif headroom >= 2:
|
|
277
|
+
ctx = 8192
|
|
278
|
+
else:
|
|
279
|
+
ctx = 4096
|
|
280
|
+
return f"-ngl {ngl} --ctx-size {ctx} --threads {threads}"
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _model_size_gb(path: str) -> float:
|
|
284
|
+
"""モデルファイルのサイズ (GB)。失敗時は 0.0 (ブロッキング — to_thread 推奨)。"""
|
|
285
|
+
try:
|
|
286
|
+
return Path(path).expanduser().stat().st_size / (1024 ** 3)
|
|
287
|
+
except OSError:
|
|
288
|
+
return 0.0
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _build_cmd(
|
|
292
|
+
backend: str,
|
|
293
|
+
model_path: str,
|
|
294
|
+
port: int,
|
|
295
|
+
options: dict[str, Any],
|
|
296
|
+
extra_args: str,
|
|
297
|
+
binary: str | None = None,
|
|
298
|
+
) -> list[str]:
|
|
299
|
+
"""Build the CLI command list for the given backend and options.
|
|
300
|
+
|
|
301
|
+
``binary`` overrides the default executable (``llama-server`` /
|
|
302
|
+
``python``). When None, the default is used and PATH resolution
|
|
303
|
+
is left to the OS.
|
|
304
|
+
"""
|
|
305
|
+
exe = _resolve_binary(backend, binary)
|
|
306
|
+
|
|
307
|
+
if backend == "llama.cpp":
|
|
308
|
+
cmd: list[str] = [exe, "-m", model_path, "--port", str(port)]
|
|
309
|
+
elif backend == "vllm":
|
|
310
|
+
cmd = [
|
|
311
|
+
exe, "-m", "vllm.entrypoints.openai.api_server",
|
|
312
|
+
"--model", model_path,
|
|
313
|
+
"--port", str(port),
|
|
314
|
+
]
|
|
315
|
+
else:
|
|
316
|
+
raise ValueError(f"Unknown backend: {backend!r}. Expected 'llama.cpp' or 'vllm'.")
|
|
317
|
+
|
|
318
|
+
for flag, val in options.items():
|
|
319
|
+
if isinstance(val, bool):
|
|
320
|
+
if val:
|
|
321
|
+
cmd.append(flag)
|
|
322
|
+
else:
|
|
323
|
+
cmd.extend([flag, str(val)])
|
|
324
|
+
|
|
325
|
+
if extra_args.strip():
|
|
326
|
+
cmd.extend(shlex.split(extra_args))
|
|
327
|
+
|
|
328
|
+
return cmd
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
# ---------------------------------------------------------------------------
|
|
332
|
+
# Log reader background task
|
|
333
|
+
# ---------------------------------------------------------------------------
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
async def _tail_logs(proc: ManagedProcess) -> None:
|
|
337
|
+
"""Read stdout+stderr into proc.log_tail until the process exits."""
|
|
338
|
+
p = proc._proc
|
|
339
|
+
if p is None:
|
|
340
|
+
return
|
|
341
|
+
|
|
342
|
+
async def _drain(stream: asyncio.StreamReader | None) -> None:
|
|
343
|
+
if stream is None:
|
|
344
|
+
return
|
|
345
|
+
while True:
|
|
346
|
+
line = await stream.readline()
|
|
347
|
+
if not line:
|
|
348
|
+
break
|
|
349
|
+
proc.log_tail.append(line.decode(errors="replace").rstrip())
|
|
350
|
+
|
|
351
|
+
await asyncio.gather(_drain(p.stdout), _drain(p.stderr))
|
|
352
|
+
await p.wait()
|
|
353
|
+
proc.returncode = p.returncode
|
|
354
|
+
proc.pid = None
|
|
355
|
+
proc.status = "stopped" if (p.returncode or 0) == 0 else "error"
|
|
356
|
+
proc.log_tail.append(
|
|
357
|
+
f"[launcher] process exited with code {p.returncode}"
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
async def shutdown_launcher(app: Any) -> None:
|
|
362
|
+
"""Terminate all managed child processes on CodeRouter shutdown.
|
|
363
|
+
|
|
364
|
+
Called from the FastAPI lifespan so that llama.cpp / vllm processes
|
|
365
|
+
started via the Launcher are not left as orphans when CodeRouter exits.
|
|
366
|
+
"""
|
|
367
|
+
reg = getattr(app.state, "launcher", None)
|
|
368
|
+
if reg is None:
|
|
369
|
+
return
|
|
370
|
+
procs = reg.all()
|
|
371
|
+
for proc in procs:
|
|
372
|
+
p = proc._proc
|
|
373
|
+
if p is not None and p.returncode is None:
|
|
374
|
+
with contextlib.suppress(Exception):
|
|
375
|
+
p.terminate()
|
|
376
|
+
for proc in procs:
|
|
377
|
+
p = proc._proc
|
|
378
|
+
if p is None or p.returncode is not None:
|
|
379
|
+
continue
|
|
380
|
+
try:
|
|
381
|
+
await asyncio.wait_for(p.wait(), timeout=5.0)
|
|
382
|
+
except TimeoutError:
|
|
383
|
+
with contextlib.suppress(Exception):
|
|
384
|
+
p.kill()
|
|
385
|
+
except Exception:
|
|
386
|
+
pass
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
# ---------------------------------------------------------------------------
|
|
390
|
+
# Pydantic request models
|
|
391
|
+
# ---------------------------------------------------------------------------
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
class StartRequest(BaseModel):
|
|
395
|
+
name: str
|
|
396
|
+
backend: str
|
|
397
|
+
model_path: str
|
|
398
|
+
port: int
|
|
399
|
+
options: dict[str, Any] = {}
|
|
400
|
+
extra_args: str = ""
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
# ---------------------------------------------------------------------------
|
|
404
|
+
# API routes
|
|
405
|
+
# ---------------------------------------------------------------------------
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
@router.get("/api/launcher/models")
|
|
409
|
+
async def api_models(request: Request) -> dict[str, Any]:
|
|
410
|
+
"""Scan model_dirs and return discovered model files."""
|
|
411
|
+
cfg = request.app.state.config
|
|
412
|
+
launcher_cfg = getattr(cfg, "launcher", None)
|
|
413
|
+
model_dirs: list[str] = launcher_cfg.model_dirs if launcher_cfg else []
|
|
414
|
+
# rglob / stat はブロッキング I/O。イベントループ(= プロキシ全体)を
|
|
415
|
+
# 止めないよう別スレッドへ退避する。
|
|
416
|
+
models = await asyncio.to_thread(_scan_models, model_dirs)
|
|
417
|
+
hw = await asyncio.to_thread(_detect_hardware)
|
|
418
|
+
for m in models:
|
|
419
|
+
m["recommendation"] = _model_recommendation(m.get("size_gb", 0.0), hw)
|
|
420
|
+
return {
|
|
421
|
+
"models": models,
|
|
422
|
+
"model_dirs": model_dirs,
|
|
423
|
+
"hardware": hw,
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
@router.get("/api/launcher/option-profiles")
|
|
428
|
+
async def api_option_profiles(request: Request) -> dict[str, Any]:
|
|
429
|
+
"""Return option_profiles from providers.yaml launcher config."""
|
|
430
|
+
cfg = request.app.state.config
|
|
431
|
+
launcher_cfg = getattr(cfg, "launcher", None)
|
|
432
|
+
if not launcher_cfg:
|
|
433
|
+
return {"profiles": {}, "_note": "launcher: block not found in providers.yaml"}
|
|
434
|
+
if not launcher_cfg.option_profiles:
|
|
435
|
+
return {"profiles": {}, "_note": "option_profiles is empty — add option_profiles: under launcher: in providers.yaml"}
|
|
436
|
+
result: dict[str, list[dict]] = {}
|
|
437
|
+
for backend, profiles in launcher_cfg.option_profiles.items():
|
|
438
|
+
result[backend] = [{"name": p.name, "args": p.args} for p in profiles]
|
|
439
|
+
return {"profiles": result}
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
@router.get("/api/launcher/config-debug")
|
|
443
|
+
async def api_launcher_config_debug(request: Request) -> dict[str, Any]:
|
|
444
|
+
"""Return the effective launcher config for troubleshooting."""
|
|
445
|
+
cfg = request.app.state.config
|
|
446
|
+
launcher_cfg = getattr(cfg, "launcher", None)
|
|
447
|
+
if not launcher_cfg:
|
|
448
|
+
return {"launcher": None, "message": "launcher: block not found in providers.yaml"}
|
|
449
|
+
return {
|
|
450
|
+
"launcher": {
|
|
451
|
+
"model_dirs": launcher_cfg.model_dirs,
|
|
452
|
+
"backends": {k: {"binary": v.binary} for k, v in launcher_cfg.backends.items()},
|
|
453
|
+
"option_profiles": {
|
|
454
|
+
k: [p.name for p in v]
|
|
455
|
+
for k, v in launcher_cfg.option_profiles.items()
|
|
456
|
+
},
|
|
457
|
+
},
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
@router.get("/api/launcher/processes")
|
|
462
|
+
async def api_processes(request: Request) -> dict[str, Any]:
|
|
463
|
+
"""List all managed processes."""
|
|
464
|
+
reg = _registry(request)
|
|
465
|
+
return {
|
|
466
|
+
"processes": [
|
|
467
|
+
{
|
|
468
|
+
"id": p.id,
|
|
469
|
+
"name": p.name,
|
|
470
|
+
"backend": p.backend,
|
|
471
|
+
"model_path": p.model_path,
|
|
472
|
+
"port": p.port,
|
|
473
|
+
"status": p.status,
|
|
474
|
+
"pid": p.pid,
|
|
475
|
+
"returncode": p.returncode,
|
|
476
|
+
}
|
|
477
|
+
for p in reg.all()
|
|
478
|
+
]
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
@router.get("/api/launcher/backends")
|
|
483
|
+
async def api_backends(request: Request) -> dict[str, Any]:
|
|
484
|
+
"""Return resolved binary paths for each backend.
|
|
485
|
+
|
|
486
|
+
Used by the UI to display which executable will be invoked.
|
|
487
|
+
Shows configured path (from providers.yaml) or the PATH default.
|
|
488
|
+
"""
|
|
489
|
+
cfg = request.app.state.config
|
|
490
|
+
launcher_cfg = getattr(cfg, "launcher", None)
|
|
491
|
+
backends_cfg = (
|
|
492
|
+
launcher_cfg.backends
|
|
493
|
+
if (launcher_cfg and launcher_cfg.backends)
|
|
494
|
+
else None
|
|
495
|
+
)
|
|
496
|
+
# is_file / shutil.which はブロッキング I/O。別スレッドへ退避する。
|
|
497
|
+
result = await asyncio.to_thread(_resolve_backends_sync, backends_cfg)
|
|
498
|
+
return {"backends": result}
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
@router.post("/api/launcher/start")
|
|
502
|
+
async def api_start(req: StartRequest, request: Request) -> dict[str, Any]:
|
|
503
|
+
"""Start a new backend process."""
|
|
504
|
+
# Resolve binary path from providers.yaml launcher.backends
|
|
505
|
+
cfg = request.app.state.config
|
|
506
|
+
launcher_cfg = getattr(cfg, "launcher", None)
|
|
507
|
+
configured_binary: str | None = None
|
|
508
|
+
if launcher_cfg and launcher_cfg.backends:
|
|
509
|
+
bc = launcher_cfg.backends.get(req.backend)
|
|
510
|
+
if bc and bc.binary:
|
|
511
|
+
configured_binary = bc.binary
|
|
512
|
+
|
|
513
|
+
try:
|
|
514
|
+
cmd = _build_cmd(
|
|
515
|
+
req.backend, req.model_path, req.port,
|
|
516
|
+
req.options, req.extra_args,
|
|
517
|
+
binary=configured_binary,
|
|
518
|
+
)
|
|
519
|
+
except ValueError as exc:
|
|
520
|
+
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
|
521
|
+
|
|
522
|
+
proc_id = uuid.uuid4().hex[:8]
|
|
523
|
+
proc = ManagedProcess(
|
|
524
|
+
id=proc_id,
|
|
525
|
+
name=req.name,
|
|
526
|
+
backend=req.backend,
|
|
527
|
+
model_path=req.model_path,
|
|
528
|
+
port=req.port,
|
|
529
|
+
options=req.options,
|
|
530
|
+
extra_args=req.extra_args,
|
|
531
|
+
status="starting",
|
|
532
|
+
)
|
|
533
|
+
proc.log_tail.append(f"[launcher] cmd: {' '.join(cmd)}")
|
|
534
|
+
|
|
535
|
+
try:
|
|
536
|
+
p = await asyncio.create_subprocess_exec(
|
|
537
|
+
*cmd,
|
|
538
|
+
stdout=asyncio.subprocess.PIPE,
|
|
539
|
+
stderr=asyncio.subprocess.PIPE,
|
|
540
|
+
)
|
|
541
|
+
except FileNotFoundError:
|
|
542
|
+
raise HTTPException(
|
|
543
|
+
status_code=400,
|
|
544
|
+
detail=f"Executable not found: {cmd[0]!r}. Is {req.backend} installed?",
|
|
545
|
+
) from None
|
|
546
|
+
except Exception as exc:
|
|
547
|
+
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
|
548
|
+
|
|
549
|
+
proc._proc = p
|
|
550
|
+
proc.pid = p.pid
|
|
551
|
+
proc.status = "running"
|
|
552
|
+
proc.log_tail.append(f"[launcher] started PID {p.pid}")
|
|
553
|
+
|
|
554
|
+
_registry(request).add(proc)
|
|
555
|
+
_task = asyncio.create_task(_tail_logs(proc))
|
|
556
|
+
_background_tasks.add(_task)
|
|
557
|
+
_task.add_done_callback(_background_tasks.discard)
|
|
558
|
+
|
|
559
|
+
return {"id": proc_id, "pid": p.pid, "command": cmd}
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
@router.post("/api/launcher/stop/{proc_id}")
|
|
563
|
+
async def api_stop(proc_id: str, request: Request) -> dict[str, Any]:
|
|
564
|
+
"""Terminate a running process (SIGTERM, then SIGKILL after 5s)."""
|
|
565
|
+
try:
|
|
566
|
+
proc = _registry(request).get(proc_id)
|
|
567
|
+
except KeyError:
|
|
568
|
+
raise HTTPException(
|
|
569
|
+
status_code=404, detail=f"Process {proc_id!r} not found.") from None
|
|
570
|
+
|
|
571
|
+
if proc._proc and proc.status == "running":
|
|
572
|
+
proc._proc.terminate()
|
|
573
|
+
proc.log_tail.append("[launcher] SIGTERM sent")
|
|
574
|
+
try:
|
|
575
|
+
await asyncio.wait_for(proc._proc.wait(), timeout=5.0)
|
|
576
|
+
except TimeoutError:
|
|
577
|
+
proc._proc.kill()
|
|
578
|
+
proc.log_tail.append("[launcher] SIGKILL sent (timeout)")
|
|
579
|
+
proc.status = "stopped"
|
|
580
|
+
proc.pid = None
|
|
581
|
+
|
|
582
|
+
return {"id": proc_id, "status": proc.status}
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
@router.delete("/api/launcher/processes/{proc_id}")
|
|
586
|
+
async def api_delete(proc_id: str, request: Request) -> dict[str, Any]:
|
|
587
|
+
"""Remove a stopped process from the registry."""
|
|
588
|
+
reg = _registry(request)
|
|
589
|
+
try:
|
|
590
|
+
proc = reg.get(proc_id)
|
|
591
|
+
except KeyError:
|
|
592
|
+
raise HTTPException(
|
|
593
|
+
status_code=404, detail=f"Process {proc_id!r} not found.") from None
|
|
594
|
+
if proc.status == "running":
|
|
595
|
+
raise HTTPException(status_code=400, detail="Stop the process before deleting.")
|
|
596
|
+
reg.remove(proc_id)
|
|
597
|
+
return {"deleted": proc_id}
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
@router.get("/api/launcher/logs/{proc_id}")
|
|
601
|
+
async def api_logs(proc_id: str, request: Request, n: int = 100) -> dict[str, Any]:
|
|
602
|
+
"""Return the last N log lines for a process."""
|
|
603
|
+
try:
|
|
604
|
+
proc = _registry(request).get(proc_id)
|
|
605
|
+
except KeyError:
|
|
606
|
+
raise HTTPException(
|
|
607
|
+
status_code=404, detail=f"Process {proc_id!r} not found.") from None
|
|
608
|
+
tail = list(proc.log_tail)
|
|
609
|
+
return {"id": proc_id, "logs": tail[-n:], "total": len(tail)}
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
@router.get("/api/launcher/suggest")
|
|
613
|
+
async def api_suggest(model_path: str = "") -> dict[str, Any]:
|
|
614
|
+
"""Suggest launch flags for the given model based on detected hardware.
|
|
615
|
+
|
|
616
|
+
クライアントの「推奨値」ボタンから呼ばれる。値はあくまで目安。
|
|
617
|
+
"""
|
|
618
|
+
hw = await asyncio.to_thread(_detect_hardware)
|
|
619
|
+
size_gb = 0.0
|
|
620
|
+
if model_path:
|
|
621
|
+
size_gb = await asyncio.to_thread(_model_size_gb, model_path)
|
|
622
|
+
return {
|
|
623
|
+
"extra_args": _suggest_launch_flags(size_gb, hw),
|
|
624
|
+
"hardware": hw,
|
|
625
|
+
"size_gb": round(size_gb, 2),
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
# ---------------------------------------------------------------------------
|
|
630
|
+
# HTML UI
|
|
631
|
+
# ---------------------------------------------------------------------------
|
|
632
|
+
|
|
633
|
+
_LAUNCHER_HTML = r"""<!doctype html>
|
|
634
|
+
<html lang="ja">
|
|
635
|
+
<head>
|
|
636
|
+
<meta charset="utf-8" />
|
|
637
|
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
638
|
+
<title>CodeRouter Launcher</title>
|
|
639
|
+
<script src="https://cdn.tailwindcss.com"></script>
|
|
640
|
+
<style>
|
|
641
|
+
.dot { width:.5rem;height:.5rem;border-radius:9999px;display:inline-block; }
|
|
642
|
+
.tabnum { font-variant-numeric:tabular-nums; }
|
|
643
|
+
.log-box { font-family:monospace;font-size:.75rem;line-height:1.4;
|
|
644
|
+
overflow-y:auto;max-height:14rem;white-space:pre-wrap;word-break:break-all; }
|
|
645
|
+
.model-row:hover { background:rgba(255,255,255,.04);cursor:pointer; }
|
|
646
|
+
.model-row.selected { background:rgba(99,102,241,.15);border-left:2px solid #6366f1; }
|
|
647
|
+
input, select, textarea {
|
|
648
|
+
background:#1e293b;border:1px solid #334155;color:#f1f5f9;
|
|
649
|
+
border-radius:.375rem;padding:.35rem .6rem;width:100%;font-size:.875rem;
|
|
650
|
+
outline:none;
|
|
651
|
+
}
|
|
652
|
+
input:focus, select:focus, textarea:focus { border-color:#6366f1; }
|
|
653
|
+
.btn-primary {
|
|
654
|
+
background:#6366f1;color:#fff;padding:.4rem 1rem;border-radius:.375rem;
|
|
655
|
+
font-size:.875rem;font-weight:600;cursor:pointer;transition:background .15s;
|
|
656
|
+
}
|
|
657
|
+
.btn-primary:hover { background:#4f46e5; }
|
|
658
|
+
.btn-primary:disabled { background:#475569;cursor:not-allowed; }
|
|
659
|
+
.btn-sm {
|
|
660
|
+
padding:.25rem .6rem;border-radius:.25rem;font-size:.75rem;
|
|
661
|
+
cursor:pointer;font-weight:500;transition:background .15s;
|
|
662
|
+
}
|
|
663
|
+
.btn-red { background:#7f1d1d;color:#fca5a5; }
|
|
664
|
+
.btn-red:hover { background:#991b1b; }
|
|
665
|
+
.btn-slate { background:#334155;color:#94a3b8; }
|
|
666
|
+
.btn-slate:hover { background:#475569; }
|
|
667
|
+
.btn-indigo { background:#312e81;color:#a5b4fc; }
|
|
668
|
+
.btn-indigo:hover { background:#3730a3; }
|
|
669
|
+
</style>
|
|
670
|
+
</head>
|
|
671
|
+
<body class="bg-slate-950 text-slate-100 min-h-screen font-sans">
|
|
672
|
+
|
|
673
|
+
<!-- Header -->
|
|
674
|
+
<header class="border-b border-slate-800 px-6 py-3">
|
|
675
|
+
<div class="max-w-7xl mx-auto flex items-center gap-x-6 text-sm">
|
|
676
|
+
<span class="text-lg font-semibold tracking-tight">CodeRouter</span>
|
|
677
|
+
<a href="/dashboard" class="text-slate-400 hover:text-slate-200 transition-colors">Dashboard</a>
|
|
678
|
+
<span class="text-slate-100 font-medium border-b border-indigo-400 pb-0.5">Launcher</span>
|
|
679
|
+
<span id="status-msg" class="ml-auto text-xs text-slate-500"></span>
|
|
680
|
+
</div>
|
|
681
|
+
</header>
|
|
682
|
+
|
|
683
|
+
<main class="max-w-7xl mx-auto p-4 md:p-6 space-y-4">
|
|
684
|
+
|
|
685
|
+
<!-- Row 1: Models + Launch form -->
|
|
686
|
+
<div class="grid grid-cols-1 md:grid-cols-2 gap-4">
|
|
687
|
+
|
|
688
|
+
<!-- Models panel -->
|
|
689
|
+
<section class="bg-slate-900/60 border border-slate-800 rounded-lg p-4 flex flex-col gap-3">
|
|
690
|
+
<div class="flex items-center justify-between">
|
|
691
|
+
<div class="flex items-baseline gap-2">
|
|
692
|
+
<h2 class="text-sm font-semibold uppercase tracking-wider text-slate-400">Models</h2>
|
|
693
|
+
<span id="hw-info" class="text-xs text-slate-500"></span>
|
|
694
|
+
</div>
|
|
695
|
+
<button onclick="fetchModels()" class="btn-sm btn-slate">↻ スキャン</button>
|
|
696
|
+
</div>
|
|
697
|
+
<div id="model-dirs" class="text-xs text-slate-500 space-y-0.5"></div>
|
|
698
|
+
<div id="model-list" class="divide-y divide-slate-800 text-sm flex-1 overflow-y-auto max-h-64">
|
|
699
|
+
<div class="py-2 text-slate-500 text-xs">スキャン中…</div>
|
|
700
|
+
</div>
|
|
701
|
+
</section>
|
|
702
|
+
|
|
703
|
+
<!-- Launch form -->
|
|
704
|
+
<section class="bg-slate-900/60 border border-slate-800 rounded-lg p-4 flex flex-col gap-3">
|
|
705
|
+
<h2 class="text-sm font-semibold uppercase tracking-wider text-slate-400">Launch</h2>
|
|
706
|
+
|
|
707
|
+
<div class="grid grid-cols-2 gap-2">
|
|
708
|
+
<div>
|
|
709
|
+
<label class="block text-xs text-slate-400 mb-1">名前</label>
|
|
710
|
+
<input id="f-name" type="text" placeholder="my-qwen" />
|
|
711
|
+
</div>
|
|
712
|
+
<div>
|
|
713
|
+
<label class="block text-xs text-slate-400 mb-1">ポート</label>
|
|
714
|
+
<input id="f-port" type="number" value="8080" min="1024" max="65535" />
|
|
715
|
+
</div>
|
|
716
|
+
</div>
|
|
717
|
+
|
|
718
|
+
<div>
|
|
719
|
+
<label class="block text-xs text-slate-400 mb-1">バックエンド</label>
|
|
720
|
+
<select id="f-backend" onchange="onBackendChange()">
|
|
721
|
+
<option value="llama.cpp">llama.cpp</option>
|
|
722
|
+
<option value="vllm">vllm</option>
|
|
723
|
+
</select>
|
|
724
|
+
<div id="binary-hint" class="mt-1 text-xs text-slate-500 min-h-[1.2rem]"></div>
|
|
725
|
+
</div>
|
|
726
|
+
|
|
727
|
+
<div>
|
|
728
|
+
<label class="block text-xs text-slate-400 mb-1">モデルパス</label>
|
|
729
|
+
<input id="f-model" type="text" placeholder="← モデル一覧から選択 or 直接入力" />
|
|
730
|
+
</div>
|
|
731
|
+
|
|
732
|
+
<div>
|
|
733
|
+
<label class="block text-xs text-slate-400 mb-1">オプションプロファイル</label>
|
|
734
|
+
<select id="f-profile" onchange="onProfileChange()">
|
|
735
|
+
<option value="">-- なし --</option>
|
|
736
|
+
</select>
|
|
737
|
+
<div id="profile-args" class="mt-1 text-xs font-mono text-slate-400 bg-slate-800/50 rounded p-2 hidden"></div>
|
|
738
|
+
</div>
|
|
739
|
+
|
|
740
|
+
<div>
|
|
741
|
+
<div class="flex items-center justify-between mb-1">
|
|
742
|
+
<label class="block text-xs text-slate-400">追加オプション(自由入力)</label>
|
|
743
|
+
<button onclick="suggestOptions()" class="btn-sm btn-slate">⚙ 推奨値</button>
|
|
744
|
+
</div>
|
|
745
|
+
<input id="f-extra" type="text" placeholder="-ngl 99 --threads 8" />
|
|
746
|
+
</div>
|
|
747
|
+
|
|
748
|
+
<button id="btn-launch" onclick="launchProcess()" class="btn-primary w-full mt-1">
|
|
749
|
+
▶ 起動
|
|
750
|
+
</button>
|
|
751
|
+
<div id="launch-err" class="text-xs text-red-400 hidden"></div>
|
|
752
|
+
</section>
|
|
753
|
+
</div>
|
|
754
|
+
|
|
755
|
+
<!-- Row 2: Running processes -->
|
|
756
|
+
<section class="bg-slate-900/60 border border-slate-800 rounded-lg p-4">
|
|
757
|
+
<h2 class="text-sm font-semibold uppercase tracking-wider text-slate-400 mb-3">Processes</h2>
|
|
758
|
+
<div class="overflow-x-auto">
|
|
759
|
+
<table class="w-full text-sm tabnum">
|
|
760
|
+
<thead class="text-slate-500 text-left">
|
|
761
|
+
<tr>
|
|
762
|
+
<th class="pb-2 font-medium">NAME</th>
|
|
763
|
+
<th class="pb-2 font-medium">BACKEND</th>
|
|
764
|
+
<th class="pb-2 font-medium">MODEL</th>
|
|
765
|
+
<th class="pb-2 font-medium text-right">PORT</th>
|
|
766
|
+
<th class="pb-2 font-medium text-right">PID</th>
|
|
767
|
+
<th class="pb-2 font-medium">STATUS</th>
|
|
768
|
+
<th class="pb-2 font-medium text-right">ACTIONS</th>
|
|
769
|
+
</tr>
|
|
770
|
+
</thead>
|
|
771
|
+
<tbody id="proc-table" class="divide-y divide-slate-800">
|
|
772
|
+
<tr><td colspan="7" class="py-3 text-slate-500 text-xs">プロセスなし</td></tr>
|
|
773
|
+
</tbody>
|
|
774
|
+
</table>
|
|
775
|
+
</div>
|
|
776
|
+
</section>
|
|
777
|
+
|
|
778
|
+
<!-- Row 3: Log viewer (hidden until a process is selected) -->
|
|
779
|
+
<section id="log-panel" class="bg-slate-900/60 border border-slate-800 rounded-lg p-4 hidden">
|
|
780
|
+
<div class="flex items-center justify-between mb-2">
|
|
781
|
+
<h2 class="text-sm font-semibold uppercase tracking-wider text-slate-400">
|
|
782
|
+
Log: <span id="log-title" class="text-slate-200 normal-case">—</span>
|
|
783
|
+
</h2>
|
|
784
|
+
<div class="flex gap-2">
|
|
785
|
+
<button onclick="refreshLogs()" class="btn-sm btn-slate">↻ 更新</button>
|
|
786
|
+
<button onclick="closeLog()" class="btn-sm btn-slate">✕ 閉じる</button>
|
|
787
|
+
</div>
|
|
788
|
+
</div>
|
|
789
|
+
<div id="log-box" class="log-box bg-slate-950 rounded p-3 text-slate-300"></div>
|
|
790
|
+
</section>
|
|
791
|
+
|
|
792
|
+
</main>
|
|
793
|
+
|
|
794
|
+
<script>
|
|
795
|
+
(() => {
|
|
796
|
+
"use strict";
|
|
797
|
+
|
|
798
|
+
const POLL_MS = 3000;
|
|
799
|
+
let allProfiles = {}; // backend → [{name, args}]
|
|
800
|
+
const _modelCache = {}; // index → {path, name, dir, size_gb}
|
|
801
|
+
let selectedLogId = null;
|
|
802
|
+
let logAutoScroll = true;
|
|
803
|
+
let _lastAutoName = ""; // selectModel が自動入力した名前
|
|
804
|
+
|
|
805
|
+
// ── Helpers ──────────────────────────────────────────────────────────────
|
|
806
|
+
|
|
807
|
+
const esc = (s) => String(s ?? "").replace(/[&<>"']/g, c =>
|
|
808
|
+
({"&":"&","<":"<",">":">",'"':""","'":"'"}[c])
|
|
809
|
+
);
|
|
810
|
+
|
|
811
|
+
const statusMsg = (msg, ok = true) => {
|
|
812
|
+
const el = document.getElementById("status-msg");
|
|
813
|
+
el.textContent = msg;
|
|
814
|
+
el.className = "ml-auto text-xs " + (ok ? "text-slate-500" : "text-red-400");
|
|
815
|
+
if (ok) setTimeout(() => { if (el.textContent === msg) el.textContent = ""; }, 3000);
|
|
816
|
+
};
|
|
817
|
+
|
|
818
|
+
const showLaunchErr = (msg) => {
|
|
819
|
+
const el = document.getElementById("launch-err");
|
|
820
|
+
if (msg) { el.textContent = msg; el.classList.remove("hidden"); }
|
|
821
|
+
else { el.textContent = ""; el.classList.add("hidden"); }
|
|
822
|
+
};
|
|
823
|
+
|
|
824
|
+
const statusDot = (status) => {
|
|
825
|
+
const map = {running:"bg-green-500", starting:"bg-yellow-500",
|
|
826
|
+
stopped:"bg-slate-500", error:"bg-red-500"};
|
|
827
|
+
return `<span class="dot ${map[status] || "bg-slate-500"} mr-1.5"></span>${esc(status)}`;
|
|
828
|
+
};
|
|
829
|
+
|
|
830
|
+
// ── Models ───────────────────────────────────────────────────────────────
|
|
831
|
+
|
|
832
|
+
window.fetchModels = async () => {
|
|
833
|
+
statusMsg("モデルスキャン中…");
|
|
834
|
+
try {
|
|
835
|
+
const r = await fetch("/api/launcher/models");
|
|
836
|
+
const d = await r.json();
|
|
837
|
+
renderModelDirs(d.model_dirs || []);
|
|
838
|
+
renderHwInfo(d.hardware);
|
|
839
|
+
renderModels(d.models || []);
|
|
840
|
+
statusMsg(`モデル ${d.models.length} 件`);
|
|
841
|
+
} catch (e) {
|
|
842
|
+
statusMsg("モデルスキャン失敗: " + e.message, false);
|
|
843
|
+
}
|
|
844
|
+
};
|
|
845
|
+
|
|
846
|
+
const renderHwInfo = (hw) => {
|
|
847
|
+
const el = document.getElementById("hw-info");
|
|
848
|
+
if (!el) return;
|
|
849
|
+
if (!hw) { el.textContent = ""; return; }
|
|
850
|
+
const gpu = {metal: "Metal", cuda: "CUDA", cpu: "CPU"}[hw.gpu] || "CPU";
|
|
851
|
+
let s = `${gpu} · RAM ${hw.ram_gb}GB`;
|
|
852
|
+
if (hw.gpu === "cuda" && hw.vram_gb) s += ` · VRAM ${hw.vram_gb}GB`;
|
|
853
|
+
el.textContent = s;
|
|
854
|
+
};
|
|
855
|
+
|
|
856
|
+
const renderModelDirs = (dirs) => {
|
|
857
|
+
const el = document.getElementById("model-dirs");
|
|
858
|
+
el.innerHTML = dirs.length
|
|
859
|
+
? dirs.map(d => `<div class="truncate">📂 ${esc(d)}</div>`).join("")
|
|
860
|
+
: '<div class="text-slate-600">model_dirs 未設定 (providers.yaml)</div>';
|
|
861
|
+
};
|
|
862
|
+
|
|
863
|
+
const recBadge = (rec) => {
|
|
864
|
+
if (!rec || !rec.label) return "";
|
|
865
|
+
if (rec.level === "ok")
|
|
866
|
+
return `<span class="text-xs shrink-0" style="color:#22c55e">✓ ${esc(rec.label)}</span>`;
|
|
867
|
+
if (rec.level === "warn")
|
|
868
|
+
return `<span class="text-xs shrink-0" style="color:#eab308">⚠ ${esc(rec.label)}</span>`;
|
|
869
|
+
return "";
|
|
870
|
+
};
|
|
871
|
+
|
|
872
|
+
const renderModels = (models) => {
|
|
873
|
+
const el = document.getElementById("model-list");
|
|
874
|
+
if (!models.length) {
|
|
875
|
+
el.innerHTML = '<div class="py-2 text-slate-500 text-xs">モデルが見つかりません</div>';
|
|
876
|
+
return;
|
|
877
|
+
}
|
|
878
|
+
el.innerHTML = models.map((m, i) => {
|
|
879
|
+
_modelCache[i] = m;
|
|
880
|
+
return `
|
|
881
|
+
<div class="model-row px-1 py-2" onclick="selectModel(${i})">
|
|
882
|
+
<div class="flex justify-between items-baseline gap-2">
|
|
883
|
+
<span class="truncate">${esc(m.name)}</span>
|
|
884
|
+
<span class="flex items-baseline gap-2 shrink-0">
|
|
885
|
+
${recBadge(m.recommendation)}
|
|
886
|
+
<span class="text-slate-400 tabnum">${m.size_gb} GB</span>
|
|
887
|
+
</span>
|
|
888
|
+
</div>
|
|
889
|
+
<div class="text-slate-500 text-xs truncate">${esc(m.dir)}</div>
|
|
890
|
+
</div>`;
|
|
891
|
+
}).join("");
|
|
892
|
+
};
|
|
893
|
+
|
|
894
|
+
window.suggestOptions = async () => {
|
|
895
|
+
const model = document.getElementById("f-model").value.trim();
|
|
896
|
+
if (!model) { showLaunchErr("先にモデルを選択してください"); return; }
|
|
897
|
+
try {
|
|
898
|
+
const r = await fetch("/api/launcher/suggest?model_path="
|
|
899
|
+
+ encodeURIComponent(model));
|
|
900
|
+
const d = await r.json();
|
|
901
|
+
if (!r.ok) { showLaunchErr(d.detail || "推奨値の取得に失敗"); return; }
|
|
902
|
+
document.getElementById("f-extra").value = d.extra_args;
|
|
903
|
+
showLaunchErr("");
|
|
904
|
+
statusMsg("推奨値を設定(目安): " + d.extra_args);
|
|
905
|
+
} catch (e) {
|
|
906
|
+
showLaunchErr(e.message);
|
|
907
|
+
}
|
|
908
|
+
};
|
|
909
|
+
|
|
910
|
+
window.selectModel = (idx) => {
|
|
911
|
+
const m = _modelCache[idx];
|
|
912
|
+
if (!m) return;
|
|
913
|
+
document.getElementById("f-model").value = m.path;
|
|
914
|
+
// 名前が空 or 前回自動入力した値のまま → 選択モデル名で更新(手入力は保護)
|
|
915
|
+
const nameEl = document.getElementById("f-name");
|
|
916
|
+
if (!nameEl.value || nameEl.value === _lastAutoName) {
|
|
917
|
+
_lastAutoName = m.name.replace(/\.[^.]+$/, "").slice(0, 30);
|
|
918
|
+
nameEl.value = _lastAutoName;
|
|
919
|
+
}
|
|
920
|
+
document.querySelectorAll(".model-row").forEach((r, i) => {
|
|
921
|
+
r.classList.toggle("selected", i === idx);
|
|
922
|
+
});
|
|
923
|
+
};
|
|
924
|
+
|
|
925
|
+
// ── Backends (binary paths) ───────────────────────────────────────────────
|
|
926
|
+
|
|
927
|
+
let allBackends = {};
|
|
928
|
+
|
|
929
|
+
const fetchBackends = async () => {
|
|
930
|
+
try {
|
|
931
|
+
const r = await fetch("/api/launcher/backends");
|
|
932
|
+
if (!r.ok) throw new Error(`HTTP ${r.status}`);
|
|
933
|
+
const d = await r.json();
|
|
934
|
+
allBackends = d.backends || {};
|
|
935
|
+
} catch (e) {
|
|
936
|
+
console.error("[Launcher] fetchBackends failed:", e);
|
|
937
|
+
}
|
|
938
|
+
renderBinaryHint(); // always call outside try-catch so errors surface
|
|
939
|
+
};
|
|
940
|
+
|
|
941
|
+
const renderBinaryHint = () => {
|
|
942
|
+
const backend = document.getElementById("f-backend").value;
|
|
943
|
+
const hint = document.getElementById("binary-hint");
|
|
944
|
+
const btn = document.getElementById("btn-launch");
|
|
945
|
+
const info = allBackends[backend];
|
|
946
|
+
if (!info) {
|
|
947
|
+
hint.innerHTML = '<span class="text-slate-600 text-xs">バイナリ確認中…</span>';
|
|
948
|
+
return;
|
|
949
|
+
}
|
|
950
|
+
const dotColor = info.found ? "#22c55e" : "#ef4444"; // green-500 / red-500
|
|
951
|
+
const dot = `<svg style="display:inline;vertical-align:middle;margin-right:5px;flex-shrink:0" width="8" height="8" viewBox="0 0 8 8"><circle cx="4" cy="4" r="4" fill="${dotColor}"/></svg>`;
|
|
952
|
+
const label = info.is_custom ? "カスタム設定" : "PATH";
|
|
953
|
+
const statusText = info.found ? "利用可" : "見つかりません";
|
|
954
|
+
const pathColor = info.found
|
|
955
|
+
? (info.is_custom ? "#818cf8" : "#4ade80") // indigo-400 / green-400
|
|
956
|
+
: "#f87171"; // red-400
|
|
957
|
+
hint.innerHTML = dot
|
|
958
|
+
+ `<span style="font-family:monospace;color:${pathColor};overflow:hidden;text-overflow:ellipsis;white-space:nowrap">${esc(info.resolved)}</span>`
|
|
959
|
+
+ `<span style="color:#64748b;margin-left:6px;white-space:nowrap;flex-shrink:0">(${label} — ${statusText})</span>`;
|
|
960
|
+
hint.style.cssText = "display:flex;align-items:center;gap:0;overflow:hidden";
|
|
961
|
+
// Enable/disable launch button based on binary availability
|
|
962
|
+
if (!info.found) {
|
|
963
|
+
btn.disabled = true;
|
|
964
|
+
showLaunchErr(`⚠ "${esc(info.resolved)}" が見つかりません。llama.cpp をインストールするか、providers.yaml の launcher.backends.llama\\.cpp.binary にフルパスを設定してください。`);
|
|
965
|
+
} else {
|
|
966
|
+
btn.disabled = false;
|
|
967
|
+
// Clear error only if it was a binary-not-found error
|
|
968
|
+
const errEl = document.getElementById("launch-err");
|
|
969
|
+
if (errEl.textContent.startsWith("⚠")) showLaunchErr("");
|
|
970
|
+
}
|
|
971
|
+
};
|
|
972
|
+
|
|
973
|
+
// ── Option profiles ──────────────────────────────────────────────────────
|
|
974
|
+
|
|
975
|
+
const fetchProfiles = async () => {
|
|
976
|
+
try {
|
|
977
|
+
const r = await fetch("/api/launcher/option-profiles");
|
|
978
|
+
const d = await r.json();
|
|
979
|
+
allProfiles = d.profiles || {};
|
|
980
|
+
populateProfileSelect();
|
|
981
|
+
// Show hint if profiles are empty (misconfigured YAML)
|
|
982
|
+
if (d._note) console.warn("[Launcher] option-profiles:", d._note);
|
|
983
|
+
} catch (e) {
|
|
984
|
+
console.error("[Launcher] fetchProfiles error:", e);
|
|
985
|
+
}
|
|
986
|
+
};
|
|
987
|
+
|
|
988
|
+
const populateProfileSelect = () => {
|
|
989
|
+
const backend = document.getElementById("f-backend").value;
|
|
990
|
+
const sel = document.getElementById("f-profile");
|
|
991
|
+
const profiles = allProfiles[backend] || [];
|
|
992
|
+
const hint = profiles.length === 0
|
|
993
|
+
? '<option value="" disabled style="color:#64748b">providers.yaml に option_profiles を追加すると選べます</option>'
|
|
994
|
+
: '';
|
|
995
|
+
sel.innerHTML = '<option value="">-- なし --</option>' + hint +
|
|
996
|
+
profiles.map((p, i) => `<option value="${i}">${esc(p.name)}</option>`).join("");
|
|
997
|
+
renderProfileArgs();
|
|
998
|
+
};
|
|
999
|
+
|
|
1000
|
+
window.onBackendChange = () => {
|
|
1001
|
+
populateProfileSelect();
|
|
1002
|
+
renderBinaryHint();
|
|
1003
|
+
};
|
|
1004
|
+
|
|
1005
|
+
// renderProfileArgs は下で const 宣言されるため、宣言前参照(TDZ)を避けて
|
|
1006
|
+
// 呼び出し時に解決されるラッパーにする。
|
|
1007
|
+
window.onProfileChange = () => renderProfileArgs();
|
|
1008
|
+
|
|
1009
|
+
const renderProfileArgs = () => {
|
|
1010
|
+
const backend = document.getElementById("f-backend").value;
|
|
1011
|
+
const idx = document.getElementById("f-profile").value;
|
|
1012
|
+
const box = document.getElementById("profile-args");
|
|
1013
|
+
if (idx === "") { box.classList.add("hidden"); box.textContent = ""; return; }
|
|
1014
|
+
const profiles = allProfiles[backend] || [];
|
|
1015
|
+
const p = profiles[parseInt(idx)];
|
|
1016
|
+
if (!p) { box.classList.add("hidden"); return; }
|
|
1017
|
+
const lines = Object.entries(p.args).map(([k, v]) =>
|
|
1018
|
+
typeof v === "boolean" ? (v ? k : `# ${k} (disabled)`) : `${k} ${v}`
|
|
1019
|
+
);
|
|
1020
|
+
box.textContent = lines.join(" ");
|
|
1021
|
+
box.classList.remove("hidden");
|
|
1022
|
+
};
|
|
1023
|
+
|
|
1024
|
+
const selectedProfileArgs = () => {
|
|
1025
|
+
const backend = document.getElementById("f-backend").value;
|
|
1026
|
+
const idx = document.getElementById("f-profile").value;
|
|
1027
|
+
if (idx === "") return {};
|
|
1028
|
+
const profiles = allProfiles[backend] || [];
|
|
1029
|
+
const p = profiles[parseInt(idx)];
|
|
1030
|
+
return p ? p.args : {};
|
|
1031
|
+
};
|
|
1032
|
+
|
|
1033
|
+
// ── Launch ───────────────────────────────────────────────────────────────
|
|
1034
|
+
|
|
1035
|
+
window.launchProcess = async () => {
|
|
1036
|
+
showLaunchErr("");
|
|
1037
|
+
const name = document.getElementById("f-name").value.trim();
|
|
1038
|
+
const port = parseInt(document.getElementById("f-port").value);
|
|
1039
|
+
const backend = document.getElementById("f-backend").value;
|
|
1040
|
+
const model = document.getElementById("f-model").value.trim();
|
|
1041
|
+
const extra = document.getElementById("f-extra").value.trim();
|
|
1042
|
+
|
|
1043
|
+
if (!name) { showLaunchErr("名前を入力してください"); return; }
|
|
1044
|
+
if (!model) { showLaunchErr("モデルパスを入力してください"); return; }
|
|
1045
|
+
if (!port || port < 1024 || port > 65535) { showLaunchErr("ポートは 1024-65535"); return; }
|
|
1046
|
+
|
|
1047
|
+
const btn = document.getElementById("btn-launch");
|
|
1048
|
+
btn.disabled = true;
|
|
1049
|
+
btn.textContent = "起動中…";
|
|
1050
|
+
|
|
1051
|
+
try {
|
|
1052
|
+
const res = await fetch("/api/launcher/start", {
|
|
1053
|
+
method: "POST",
|
|
1054
|
+
headers: {"Content-Type": "application/json"},
|
|
1055
|
+
body: JSON.stringify({name, backend, model_path: model, port,
|
|
1056
|
+
options: selectedProfileArgs(), extra_args: extra}),
|
|
1057
|
+
});
|
|
1058
|
+
const d = await res.json();
|
|
1059
|
+
if (!res.ok) { showLaunchErr(d.detail || "起動失敗"); return; }
|
|
1060
|
+
statusMsg(`起動: ${name} (PID ${d.pid})`);
|
|
1061
|
+
// reset form name/port only
|
|
1062
|
+
document.getElementById("f-name").value = "";
|
|
1063
|
+
document.getElementById("f-port").value = String(port + 1);
|
|
1064
|
+
} catch (e) {
|
|
1065
|
+
showLaunchErr(e.message);
|
|
1066
|
+
} finally {
|
|
1067
|
+
btn.disabled = false;
|
|
1068
|
+
btn.textContent = "▶ 起動";
|
|
1069
|
+
}
|
|
1070
|
+
};
|
|
1071
|
+
|
|
1072
|
+
// ── Processes ────────────────────────────────────────────────────────────
|
|
1073
|
+
|
|
1074
|
+
const fetchProcesses = async () => {
|
|
1075
|
+
try {
|
|
1076
|
+
const r = await fetch("/api/launcher/processes");
|
|
1077
|
+
const d = await r.json();
|
|
1078
|
+
renderProcesses(d.processes || []);
|
|
1079
|
+
} catch (_) {}
|
|
1080
|
+
};
|
|
1081
|
+
|
|
1082
|
+
const renderProcesses = (procs) => {
|
|
1083
|
+
const tbody = document.getElementById("proc-table");
|
|
1084
|
+
if (!procs.length) {
|
|
1085
|
+
tbody.innerHTML = '<tr><td colspan="7" class="py-3 text-slate-500 text-xs">プロセスなし</td></tr>';
|
|
1086
|
+
return;
|
|
1087
|
+
}
|
|
1088
|
+
tbody.innerHTML = procs.map(p => {
|
|
1089
|
+
const modelName = p.model_path.split("/").pop();
|
|
1090
|
+
const stopBtn = p.status === "running"
|
|
1091
|
+
? `<button onclick="stopProc('${p.id}')" class="btn-sm btn-red">■ 停止</button>`
|
|
1092
|
+
: "";
|
|
1093
|
+
const delBtn = p.status !== "running"
|
|
1094
|
+
? `<button onclick="deleteProc('${p.id}')" class="btn-sm btn-slate ml-1">✕</button>`
|
|
1095
|
+
: "";
|
|
1096
|
+
const logBtn = `<button onclick="openLog('${p.id}','${esc(p.name)}')" class="btn-sm btn-indigo ml-1">📋 ログ</button>`;
|
|
1097
|
+
return `<tr>
|
|
1098
|
+
<td class="py-2 pr-3 font-medium">${esc(p.name)}</td>
|
|
1099
|
+
<td class="py-2 pr-3 text-slate-400">${esc(p.backend)}</td>
|
|
1100
|
+
<td class="py-2 pr-3 text-slate-400 truncate max-w-[10rem]" title="${esc(p.model_path)}">${esc(modelName)}</td>
|
|
1101
|
+
<td class="py-2 pr-3 text-right">${p.port}</td>
|
|
1102
|
+
<td class="py-2 pr-3 text-right text-slate-400">${p.pid ?? "—"}</td>
|
|
1103
|
+
<td class="py-2 pr-3">${statusDot(p.status)}</td>
|
|
1104
|
+
<td class="py-2 text-right whitespace-nowrap">${stopBtn}${logBtn}${delBtn}</td>
|
|
1105
|
+
</tr>`;
|
|
1106
|
+
}).join("");
|
|
1107
|
+
};
|
|
1108
|
+
|
|
1109
|
+
window.stopProc = async (id) => {
|
|
1110
|
+
if (!confirm("プロセスを停止しますか?")) return;
|
|
1111
|
+
const r = await fetch(`/api/launcher/stop/${id}`, {method:"POST"});
|
|
1112
|
+
const d = await r.json();
|
|
1113
|
+
statusMsg(`停止: ${d.status}`);
|
|
1114
|
+
await fetchProcesses();
|
|
1115
|
+
if (selectedLogId === id) await refreshLogs();
|
|
1116
|
+
};
|
|
1117
|
+
|
|
1118
|
+
window.deleteProc = async (id) => {
|
|
1119
|
+
if (!confirm("レジストリから削除しますか?")) return;
|
|
1120
|
+
await fetch(`/api/launcher/processes/${id}`, {method:"DELETE"});
|
|
1121
|
+
if (selectedLogId === id) closeLog();
|
|
1122
|
+
await fetchProcesses();
|
|
1123
|
+
};
|
|
1124
|
+
|
|
1125
|
+
// ── Log viewer ───────────────────────────────────────────────────────────
|
|
1126
|
+
|
|
1127
|
+
window.openLog = async (id, name) => {
|
|
1128
|
+
selectedLogId = id;
|
|
1129
|
+
document.getElementById("log-title").textContent = name;
|
|
1130
|
+
document.getElementById("log-panel").classList.remove("hidden");
|
|
1131
|
+
await refreshLogs();
|
|
1132
|
+
};
|
|
1133
|
+
|
|
1134
|
+
window.refreshLogs = async () => {
|
|
1135
|
+
if (!selectedLogId) return;
|
|
1136
|
+
try {
|
|
1137
|
+
const r = await fetch(`/api/launcher/logs/${selectedLogId}?n=200`);
|
|
1138
|
+
const d = await r.json();
|
|
1139
|
+
const box = document.getElementById("log-box");
|
|
1140
|
+
box.textContent = d.logs.join("\n") || "(ログなし)";
|
|
1141
|
+
if (logAutoScroll) box.scrollTop = box.scrollHeight;
|
|
1142
|
+
} catch (e) {
|
|
1143
|
+
document.getElementById("log-box").textContent = "ログ取得失敗: " + e.message;
|
|
1144
|
+
}
|
|
1145
|
+
};
|
|
1146
|
+
|
|
1147
|
+
window.closeLog = () => {
|
|
1148
|
+
selectedLogId = null;
|
|
1149
|
+
document.getElementById("log-panel").classList.add("hidden");
|
|
1150
|
+
};
|
|
1151
|
+
|
|
1152
|
+
// ── Init + polling ───────────────────────────────────────────────────────
|
|
1153
|
+
|
|
1154
|
+
const init = async () => {
|
|
1155
|
+
await Promise.all([fetchModels(), fetchProfiles(), fetchBackends(), fetchProcesses()]);
|
|
1156
|
+
};
|
|
1157
|
+
|
|
1158
|
+
const poll = async () => {
|
|
1159
|
+
await fetchProcesses();
|
|
1160
|
+
if (selectedLogId) await refreshLogs();
|
|
1161
|
+
};
|
|
1162
|
+
|
|
1163
|
+
init();
|
|
1164
|
+
setInterval(poll, POLL_MS);
|
|
1165
|
+
})();
|
|
1166
|
+
</script>
|
|
1167
|
+
|
|
1168
|
+
</body>
|
|
1169
|
+
</html>
|
|
1170
|
+
"""
|
|
1171
|
+
|
|
1172
|
+
|
|
1173
|
+
@router.get("/launcher", response_class=HTMLResponse)
|
|
1174
|
+
async def launcher_page() -> HTMLResponse:
|
|
1175
|
+
"""Serve the launcher single-page UI."""
|
|
1176
|
+
return HTMLResponse(content=_LAUNCHER_HTML)
|