ltcai 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,307 @@
1
+ """Model / engine API router.
2
+
3
+ Extracted from ``server_app.py`` in v1.3.0. Paths and schemas unchanged:
4
+ ``/models*``, ``/engines*`` (install/verify-cloud/pull-model/prepare-model[/stream]),
5
+ ``/setup/set-api-key``.
6
+
7
+ Mirrors the established router-factory convention: the heavy provider/runtime
8
+ helpers (engine_status, prepare_and_load_model, download_hf_model,
9
+ verify_cloud_models, …) remain owned by server_app for now and are injected here
10
+ as callables, so this module has no import cycle and adds no import-time
11
+ side effects.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import logging
18
+ import subprocess
19
+ from typing import Any, Callable, Dict, List, Optional
20
+
21
+ from fastapi import APIRouter, HTTPException, Request
22
+ from fastapi.responses import StreamingResponse
23
+ from pydantic import BaseModel
24
+
25
+
26
+ class LoadModelRequest(BaseModel):
27
+ model_id: str
28
+ engine: Optional[str] = None
29
+ user_email: Optional[str] = None
30
+ adapter_path: Optional[str] = None
31
+ draft_model_id: Optional[str] = None
32
+
33
+
34
+ class InstallEngineRequest(BaseModel):
35
+ engine: str
36
+
37
+
38
+ class SetApiKeyRequest(BaseModel):
39
+ provider: str
40
+ key: str
41
+ user_email: Optional[str] = None
42
+
43
+
44
+ class PullModelRequest(BaseModel):
45
+ model: str
46
+
47
+
48
+ class PrepareModelRequest(BaseModel):
49
+ model: str
50
+ engine: Optional[str] = None
51
+ user_email: Optional[str] = None
52
+
53
+
54
+ class VerifyCloudRequest(BaseModel):
55
+ force: bool = False
56
+ provider: Optional[str] = None
57
+
58
+
59
+ def create_models_router(
60
+ *,
61
+ model_router: Any,
62
+ require_user: Callable[[Request], str],
63
+ get_current_user: Callable[[Request], Optional[str]],
64
+ load_users: Callable[[], Dict],
65
+ get_user_role: Callable[..., str],
66
+ install_engine: Callable[[str], Dict],
67
+ verify_cloud_models: Callable[..., Any],
68
+ normalize_local_model_request: Callable[..., str],
69
+ download_hf_model: Callable[..., Dict],
70
+ prepare_and_load_model: Callable[..., Any],
71
+ prepare_and_load_model_stream: Callable[..., Any],
72
+ sse_event: Callable[[str, Dict], str],
73
+ ensure_ollama_server: Callable[[], None],
74
+ local_binary: Callable[[str], Optional[str]],
75
+ engine_status: Callable[[], List[Dict]],
76
+ filter_lower_family_versions: Callable[[List[Dict]], List[Dict]],
77
+ list_compat_profiles: Callable[[], Any],
78
+ set_user_api_key: Callable[..., None],
79
+ engine_model_catalog: Dict,
80
+ model_engine_aliases: Dict,
81
+ cloud_verify_ttl_seconds: int,
82
+ is_public_mode: bool,
83
+ allow_local_models: bool,
84
+ require_auth: bool,
85
+ ) -> APIRouter:
86
+ router = APIRouter()
87
+ # Bind injected deps to the names the moved handler bodies expect.
88
+ _router = model_router
89
+ ENGINE_MODEL_CATALOG = engine_model_catalog
90
+ MODEL_ENGINE_ALIASES = model_engine_aliases
91
+ CLOUD_VERIFY_TTL_SECONDS = cloud_verify_ttl_seconds
92
+ IS_PUBLIC_MODE = is_public_mode
93
+ ALLOW_LOCAL_MODELS = allow_local_models
94
+ REQUIRE_AUTH = require_auth
95
+ _list_compat_profiles = list_compat_profiles
96
+
97
+ def _recommended_with_engine_options(items: List[Dict[str, object]]) -> List[Dict[str, object]]:
98
+ out: List[Dict[str, object]] = []
99
+ for item in items:
100
+ base = {
101
+ "id": item["id"],
102
+ "name": item["name"],
103
+ "tag": item["tag"],
104
+ "size": item["size"],
105
+ "display_name": item.get("name") or item.get("id"),
106
+ }
107
+ short_id = str(item["id"]).lower()
108
+ aliases = MODEL_ENGINE_ALIASES.get(short_id) or {}
109
+ options: List[Dict[str, str]] = []
110
+ for engine_name in ("local_mlx", "ollama", "lmstudio", "llamacpp", "vllm"):
111
+ real = aliases.get(engine_name)
112
+ if not real:
113
+ continue
114
+ options.append({
115
+ "engine": engine_name,
116
+ "model_id": real,
117
+ "load_id": real if engine_name == "local_mlx" else f"{engine_name}:{real}",
118
+ })
119
+ if not options:
120
+ options.append({"engine": "local_mlx", "model_id": item["id"], "load_id": item["id"]})
121
+ base["engine_options"] = options
122
+ base["recommended_engine"] = options[0]["engine"]
123
+ out.append(base)
124
+ return out
125
+
126
+ # ── Engines ───────────────────────────────────────────────────────────
127
+
128
+ @router.post("/engines/install")
129
+ async def engines_install(req: InstallEngineRequest, request: Request):
130
+ require_user(request)
131
+ return install_engine(req.engine)
132
+
133
+ @router.post("/engines/verify-cloud")
134
+ async def engines_verify_cloud(req: VerifyCloudRequest, request: Request):
135
+ require_user(request)
136
+ results = await verify_cloud_models(force=req.force, provider_filter=req.provider)
137
+ return {"verified": results, "ttl_seconds": CLOUD_VERIFY_TTL_SECONDS}
138
+
139
+ @router.post("/engines/pull-model")
140
+ async def pull_ollama_model(req: PullModelRequest, request: Request):
141
+ require_user(request)
142
+ model_ref = normalize_local_model_request(req.model, None)
143
+ if not model_ref:
144
+ raise HTTPException(status_code=400, detail="모델 식별자가 비어 있습니다.")
145
+
146
+ if ":" in model_ref and model_ref.split(":", 1)[0].strip().lower() in {"ollama", "vllm", "lmstudio", "llamacpp", "local_mlx", "mlx"}:
147
+ provider, model_name = model_ref.split(":", 1)
148
+ provider = provider.strip().lower()
149
+ model_name = model_name.strip()
150
+ else:
151
+ provider, model_name = "local_mlx", model_ref
152
+
153
+ if not model_name:
154
+ raise HTTPException(status_code=400, detail="모델 이름이 비어 있습니다.")
155
+
156
+ if provider == "ollama":
157
+ ensure_ollama_server()
158
+ ollama = local_binary("ollama")
159
+ if not ollama:
160
+ raise HTTPException(status_code=400, detail="Ollama가 설치되지 않았습니다.")
161
+ try:
162
+ completed = subprocess.run(
163
+ [ollama, "pull", model_name],
164
+ capture_output=True, text=True, timeout=900, check=False,
165
+ )
166
+ except subprocess.TimeoutExpired:
167
+ raise HTTPException(status_code=408, detail="모델 다운로드 시간이 초과되었습니다.")
168
+ if completed.returncode != 0:
169
+ raise HTTPException(status_code=500, detail=completed.stderr[-2000:] or "pull 실패")
170
+ return {"provider": provider, "model": model_name, "returncode": completed.returncode}
171
+
172
+ if provider == "lmstudio":
173
+ raise HTTPException(
174
+ status_code=400,
175
+ detail=(
176
+ "LM Studio 모델은 Lattice에서 Hugging Face로 pull하지 않습니다. "
177
+ "LM Studio 앱에서 모델을 다운로드하고 Local Server를 켠 뒤 모델을 로드하세요. "
178
+ "그러면 모델 선택창에 실제 /v1/models 항목이 표시됩니다."
179
+ ),
180
+ )
181
+
182
+ if provider in {"vllm", "llamacpp", "local_mlx", "mlx"}:
183
+ download_provider = "local_mlx" if provider == "mlx" else provider
184
+ result = download_hf_model(model_name, download_provider)
185
+ return {"provider": provider, "model": model_name, "returncode": 0, **result}
186
+
187
+ raise HTTPException(status_code=400, detail=f"{provider} 엔진 모델 다운로드는 아직 자동화되지 않았습니다.")
188
+
189
+ @router.post("/engines/prepare-model")
190
+ async def engines_prepare_model(req: PrepareModelRequest, request: Request):
191
+ require_user(request)
192
+ return await prepare_and_load_model(
193
+ req.model, request, engine=req.engine, user_email=req.user_email,
194
+ )
195
+
196
+ @router.post("/engines/prepare-model/stream")
197
+ async def engines_prepare_model_stream(req: PrepareModelRequest, request: Request):
198
+ require_user(request)
199
+
200
+ async def event_stream():
201
+ try:
202
+ async for chunk in prepare_and_load_model_stream(
203
+ req.model, request, engine=req.engine, user_email=req.user_email,
204
+ ):
205
+ yield chunk
206
+ except HTTPException as exc:
207
+ yield sse_event("error", {
208
+ "status_code": exc.status_code,
209
+ "detail": exc.detail or "모델 준비에 실패했습니다.",
210
+ })
211
+ except Exception as exc:
212
+ logging.exception("model prepare stream failed")
213
+ yield sse_event("error", {
214
+ "status_code": 500,
215
+ "detail": str(exc)[-1000:] or "모델 준비에 실패했습니다.",
216
+ })
217
+
218
+ return StreamingResponse(
219
+ event_stream(),
220
+ media_type="text/event-stream",
221
+ headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
222
+ )
223
+
224
+ @router.post("/setup/set-api-key")
225
+ async def set_api_key(req: SetApiKeyRequest, request: Request):
226
+ from llm_router import OPENAI_COMPATIBLE_PROVIDERS
227
+ config = OPENAI_COMPATIBLE_PROVIDERS.get(req.provider)
228
+ if not config:
229
+ raise HTTPException(status_code=400, detail="알 수 없는 프로바이더입니다.")
230
+ if not req.key.strip():
231
+ raise HTTPException(status_code=400, detail="API 키가 비어있습니다.")
232
+ current_user = get_current_user(request)
233
+ if REQUIRE_AUTH and not current_user:
234
+ raise HTTPException(status_code=401, detail="인증이 필요합니다.")
235
+ if req.user_email and req.user_email != current_user:
236
+ users = load_users()
237
+ if get_user_role(current_user or "", users) != "admin":
238
+ raise HTTPException(status_code=403, detail="다른 사용자의 API 키를 설정할 권한이 없습니다.")
239
+ target_email = (req.user_email or current_user or "").strip()
240
+ if not target_email:
241
+ raise HTTPException(status_code=400, detail="사용자 식별이 필요합니다. 로그인 후 다시 시도하세요.")
242
+ set_user_api_key(target_email, req.provider, req.key.strip())
243
+ return {"ok": True, "provider": req.provider, "user_email": target_email, "scope": "user"}
244
+
245
+ # ── Models ────────────────────────────────────────────────────────────
246
+
247
+ @router.get("/models")
248
+ async def list_models():
249
+ recommended = _recommended_with_engine_options(
250
+ list(filter_lower_family_versions(ENGINE_MODEL_CATALOG.get("local_mlx", [])))
251
+ )
252
+ return {
253
+ "recommended": recommended,
254
+ "cloud": _router.detected_cloud_models(),
255
+ "engines": await asyncio.to_thread(engine_status),
256
+ "loaded": _router.loaded_model_ids,
257
+ "current": _router.current_model_id,
258
+ "compat_profiles": _list_compat_profiles(),
259
+ }
260
+
261
+ @router.get("/models/compat-profiles")
262
+ async def list_model_compat_profiles(request: Request):
263
+ require_user(request)
264
+ return {"profiles": _list_compat_profiles()}
265
+
266
+ @router.post("/models/load")
267
+ async def load_model(req: LoadModelRequest, request: Request):
268
+ try:
269
+ model_id = req.model_id
270
+ requested_engine = req.engine or (model_id.split(":", 1)[0] if ":" in model_id else "local_mlx")
271
+ if IS_PUBLIC_MODE and not ALLOW_LOCAL_MODELS and requested_engine in {"local_mlx", "mlx"}:
272
+ raise HTTPException(
273
+ status_code=400,
274
+ detail="Public mode blocks local MLX model loading. Use openai:, openrouter:, groq:, together:, or set LATTICEAI_ALLOW_LOCAL_MODELS=true.",
275
+ )
276
+ return await prepare_and_load_model(
277
+ model_id, request, engine=req.engine, user_email=req.user_email,
278
+ adapter_path=req.adapter_path, draft_model_id=req.draft_model_id,
279
+ )
280
+ except HTTPException:
281
+ raise
282
+ except Exception as e:
283
+ raise HTTPException(status_code=500, detail=str(e))
284
+
285
+ @router.post("/models/switch/{model_id:path}")
286
+ async def switch_model(model_id: str, request: Request):
287
+ require_user(request)
288
+ try:
289
+ _router.switch_model(model_id)
290
+ return {"status": "ok", "current": _router.current_model_id}
291
+ except KeyError:
292
+ raise HTTPException(status_code=404, detail=f"Model '{model_id}' not loaded. Call /models/load first.")
293
+
294
+ @router.delete("/models/unload/{model_id:path}")
295
+ async def unload_model(model_id: str, request: Request):
296
+ require_user(request)
297
+ _router.unload_model(model_id)
298
+ return {"status": "ok", "unloaded": model_id}
299
+
300
+ @router.delete("/models/unload-all")
301
+ async def unload_all_models(request: Request):
302
+ require_user(request)
303
+ unloaded = _router.loaded_model_ids
304
+ _router.unload_all()
305
+ return {"status": "ok", "unloaded": unloaded}
306
+
307
+ return router
@@ -18,7 +18,7 @@ from pathlib import Path
18
18
  from typing import Any, Callable, Dict, Iterable, List, Optional
19
19
 
20
20
 
21
- WORKSPACE_OS_VERSION = "1.2.0"
21
+ WORKSPACE_OS_VERSION = "1.3.0"
22
22
 
23
23
  # Workspace types separate single-user Personal workspaces from shared
24
24
  # Organization workspaces. Both keep the same local-first JSON store; the type