ltcai 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -0
- package/docs/CHANGELOG.md +41 -0
- package/latticeai/__init__.py +1 -1
- package/latticeai/api/mcp.py +386 -0
- package/latticeai/api/models.py +307 -0
- package/latticeai/core/workspace_os.py +1 -1
- package/latticeai/server_app.py +49 -616
- package/package.json +1 -1
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
"""Model / engine API router.
|
|
2
|
+
|
|
3
|
+
Extracted from ``server_app.py`` in v1.3.0. Paths and schemas unchanged:
|
|
4
|
+
``/models*``, ``/engines*`` (install/verify-cloud/pull-model/prepare-model[/stream]),
|
|
5
|
+
``/setup/set-api-key``.
|
|
6
|
+
|
|
7
|
+
Mirrors the established router-factory convention: the heavy provider/runtime
|
|
8
|
+
helpers (engine_status, prepare_and_load_model, download_hf_model,
|
|
9
|
+
verify_cloud_models, …) remain owned by server_app for now and are injected here
|
|
10
|
+
as callables, so this module has no import cycle and adds no import-time
|
|
11
|
+
side effects.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import logging
|
|
18
|
+
import subprocess
|
|
19
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
20
|
+
|
|
21
|
+
from fastapi import APIRouter, HTTPException, Request
|
|
22
|
+
from fastapi.responses import StreamingResponse
|
|
23
|
+
from pydantic import BaseModel
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class LoadModelRequest(BaseModel):
|
|
27
|
+
model_id: str
|
|
28
|
+
engine: Optional[str] = None
|
|
29
|
+
user_email: Optional[str] = None
|
|
30
|
+
adapter_path: Optional[str] = None
|
|
31
|
+
draft_model_id: Optional[str] = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class InstallEngineRequest(BaseModel):
|
|
35
|
+
engine: str
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class SetApiKeyRequest(BaseModel):
|
|
39
|
+
provider: str
|
|
40
|
+
key: str
|
|
41
|
+
user_email: Optional[str] = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class PullModelRequest(BaseModel):
|
|
45
|
+
model: str
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class PrepareModelRequest(BaseModel):
|
|
49
|
+
model: str
|
|
50
|
+
engine: Optional[str] = None
|
|
51
|
+
user_email: Optional[str] = None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class VerifyCloudRequest(BaseModel):
|
|
55
|
+
force: bool = False
|
|
56
|
+
provider: Optional[str] = None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def create_models_router(
|
|
60
|
+
*,
|
|
61
|
+
model_router: Any,
|
|
62
|
+
require_user: Callable[[Request], str],
|
|
63
|
+
get_current_user: Callable[[Request], Optional[str]],
|
|
64
|
+
load_users: Callable[[], Dict],
|
|
65
|
+
get_user_role: Callable[..., str],
|
|
66
|
+
install_engine: Callable[[str], Dict],
|
|
67
|
+
verify_cloud_models: Callable[..., Any],
|
|
68
|
+
normalize_local_model_request: Callable[..., str],
|
|
69
|
+
download_hf_model: Callable[..., Dict],
|
|
70
|
+
prepare_and_load_model: Callable[..., Any],
|
|
71
|
+
prepare_and_load_model_stream: Callable[..., Any],
|
|
72
|
+
sse_event: Callable[[str, Dict], str],
|
|
73
|
+
ensure_ollama_server: Callable[[], None],
|
|
74
|
+
local_binary: Callable[[str], Optional[str]],
|
|
75
|
+
engine_status: Callable[[], List[Dict]],
|
|
76
|
+
filter_lower_family_versions: Callable[[List[Dict]], List[Dict]],
|
|
77
|
+
list_compat_profiles: Callable[[], Any],
|
|
78
|
+
set_user_api_key: Callable[..., None],
|
|
79
|
+
engine_model_catalog: Dict,
|
|
80
|
+
model_engine_aliases: Dict,
|
|
81
|
+
cloud_verify_ttl_seconds: int,
|
|
82
|
+
is_public_mode: bool,
|
|
83
|
+
allow_local_models: bool,
|
|
84
|
+
require_auth: bool,
|
|
85
|
+
) -> APIRouter:
|
|
86
|
+
router = APIRouter()
|
|
87
|
+
# Bind injected deps to the names the moved handler bodies expect.
|
|
88
|
+
_router = model_router
|
|
89
|
+
ENGINE_MODEL_CATALOG = engine_model_catalog
|
|
90
|
+
MODEL_ENGINE_ALIASES = model_engine_aliases
|
|
91
|
+
CLOUD_VERIFY_TTL_SECONDS = cloud_verify_ttl_seconds
|
|
92
|
+
IS_PUBLIC_MODE = is_public_mode
|
|
93
|
+
ALLOW_LOCAL_MODELS = allow_local_models
|
|
94
|
+
REQUIRE_AUTH = require_auth
|
|
95
|
+
_list_compat_profiles = list_compat_profiles
|
|
96
|
+
|
|
97
|
+
def _recommended_with_engine_options(items: List[Dict[str, object]]) -> List[Dict[str, object]]:
|
|
98
|
+
out: List[Dict[str, object]] = []
|
|
99
|
+
for item in items:
|
|
100
|
+
base = {
|
|
101
|
+
"id": item["id"],
|
|
102
|
+
"name": item["name"],
|
|
103
|
+
"tag": item["tag"],
|
|
104
|
+
"size": item["size"],
|
|
105
|
+
"display_name": item.get("name") or item.get("id"),
|
|
106
|
+
}
|
|
107
|
+
short_id = str(item["id"]).lower()
|
|
108
|
+
aliases = MODEL_ENGINE_ALIASES.get(short_id) or {}
|
|
109
|
+
options: List[Dict[str, str]] = []
|
|
110
|
+
for engine_name in ("local_mlx", "ollama", "lmstudio", "llamacpp", "vllm"):
|
|
111
|
+
real = aliases.get(engine_name)
|
|
112
|
+
if not real:
|
|
113
|
+
continue
|
|
114
|
+
options.append({
|
|
115
|
+
"engine": engine_name,
|
|
116
|
+
"model_id": real,
|
|
117
|
+
"load_id": real if engine_name == "local_mlx" else f"{engine_name}:{real}",
|
|
118
|
+
})
|
|
119
|
+
if not options:
|
|
120
|
+
options.append({"engine": "local_mlx", "model_id": item["id"], "load_id": item["id"]})
|
|
121
|
+
base["engine_options"] = options
|
|
122
|
+
base["recommended_engine"] = options[0]["engine"]
|
|
123
|
+
out.append(base)
|
|
124
|
+
return out
|
|
125
|
+
|
|
126
|
+
# ── Engines ───────────────────────────────────────────────────────────
|
|
127
|
+
|
|
128
|
+
@router.post("/engines/install")
|
|
129
|
+
async def engines_install(req: InstallEngineRequest, request: Request):
|
|
130
|
+
require_user(request)
|
|
131
|
+
return install_engine(req.engine)
|
|
132
|
+
|
|
133
|
+
@router.post("/engines/verify-cloud")
|
|
134
|
+
async def engines_verify_cloud(req: VerifyCloudRequest, request: Request):
|
|
135
|
+
require_user(request)
|
|
136
|
+
results = await verify_cloud_models(force=req.force, provider_filter=req.provider)
|
|
137
|
+
return {"verified": results, "ttl_seconds": CLOUD_VERIFY_TTL_SECONDS}
|
|
138
|
+
|
|
139
|
+
@router.post("/engines/pull-model")
|
|
140
|
+
async def pull_ollama_model(req: PullModelRequest, request: Request):
|
|
141
|
+
require_user(request)
|
|
142
|
+
model_ref = normalize_local_model_request(req.model, None)
|
|
143
|
+
if not model_ref:
|
|
144
|
+
raise HTTPException(status_code=400, detail="모델 식별자가 비어 있습니다.")
|
|
145
|
+
|
|
146
|
+
if ":" in model_ref and model_ref.split(":", 1)[0].strip().lower() in {"ollama", "vllm", "lmstudio", "llamacpp", "local_mlx", "mlx"}:
|
|
147
|
+
provider, model_name = model_ref.split(":", 1)
|
|
148
|
+
provider = provider.strip().lower()
|
|
149
|
+
model_name = model_name.strip()
|
|
150
|
+
else:
|
|
151
|
+
provider, model_name = "local_mlx", model_ref
|
|
152
|
+
|
|
153
|
+
if not model_name:
|
|
154
|
+
raise HTTPException(status_code=400, detail="모델 이름이 비어 있습니다.")
|
|
155
|
+
|
|
156
|
+
if provider == "ollama":
|
|
157
|
+
ensure_ollama_server()
|
|
158
|
+
ollama = local_binary("ollama")
|
|
159
|
+
if not ollama:
|
|
160
|
+
raise HTTPException(status_code=400, detail="Ollama가 설치되지 않았습니다.")
|
|
161
|
+
try:
|
|
162
|
+
completed = subprocess.run(
|
|
163
|
+
[ollama, "pull", model_name],
|
|
164
|
+
capture_output=True, text=True, timeout=900, check=False,
|
|
165
|
+
)
|
|
166
|
+
except subprocess.TimeoutExpired:
|
|
167
|
+
raise HTTPException(status_code=408, detail="모델 다운로드 시간이 초과되었습니다.")
|
|
168
|
+
if completed.returncode != 0:
|
|
169
|
+
raise HTTPException(status_code=500, detail=completed.stderr[-2000:] or "pull 실패")
|
|
170
|
+
return {"provider": provider, "model": model_name, "returncode": completed.returncode}
|
|
171
|
+
|
|
172
|
+
if provider == "lmstudio":
|
|
173
|
+
raise HTTPException(
|
|
174
|
+
status_code=400,
|
|
175
|
+
detail=(
|
|
176
|
+
"LM Studio 모델은 Lattice에서 Hugging Face로 pull하지 않습니다. "
|
|
177
|
+
"LM Studio 앱에서 모델을 다운로드하고 Local Server를 켠 뒤 모델을 로드하세요. "
|
|
178
|
+
"그러면 모델 선택창에 실제 /v1/models 항목이 표시됩니다."
|
|
179
|
+
),
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
if provider in {"vllm", "llamacpp", "local_mlx", "mlx"}:
|
|
183
|
+
download_provider = "local_mlx" if provider == "mlx" else provider
|
|
184
|
+
result = download_hf_model(model_name, download_provider)
|
|
185
|
+
return {"provider": provider, "model": model_name, "returncode": 0, **result}
|
|
186
|
+
|
|
187
|
+
raise HTTPException(status_code=400, detail=f"{provider} 엔진 모델 다운로드는 아직 자동화되지 않았습니다.")
|
|
188
|
+
|
|
189
|
+
@router.post("/engines/prepare-model")
|
|
190
|
+
async def engines_prepare_model(req: PrepareModelRequest, request: Request):
|
|
191
|
+
require_user(request)
|
|
192
|
+
return await prepare_and_load_model(
|
|
193
|
+
req.model, request, engine=req.engine, user_email=req.user_email,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
@router.post("/engines/prepare-model/stream")
|
|
197
|
+
async def engines_prepare_model_stream(req: PrepareModelRequest, request: Request):
|
|
198
|
+
require_user(request)
|
|
199
|
+
|
|
200
|
+
async def event_stream():
|
|
201
|
+
try:
|
|
202
|
+
async for chunk in prepare_and_load_model_stream(
|
|
203
|
+
req.model, request, engine=req.engine, user_email=req.user_email,
|
|
204
|
+
):
|
|
205
|
+
yield chunk
|
|
206
|
+
except HTTPException as exc:
|
|
207
|
+
yield sse_event("error", {
|
|
208
|
+
"status_code": exc.status_code,
|
|
209
|
+
"detail": exc.detail or "모델 준비에 실패했습니다.",
|
|
210
|
+
})
|
|
211
|
+
except Exception as exc:
|
|
212
|
+
logging.exception("model prepare stream failed")
|
|
213
|
+
yield sse_event("error", {
|
|
214
|
+
"status_code": 500,
|
|
215
|
+
"detail": str(exc)[-1000:] or "모델 준비에 실패했습니다.",
|
|
216
|
+
})
|
|
217
|
+
|
|
218
|
+
return StreamingResponse(
|
|
219
|
+
event_stream(),
|
|
220
|
+
media_type="text/event-stream",
|
|
221
|
+
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
@router.post("/setup/set-api-key")
|
|
225
|
+
async def set_api_key(req: SetApiKeyRequest, request: Request):
|
|
226
|
+
from llm_router import OPENAI_COMPATIBLE_PROVIDERS
|
|
227
|
+
config = OPENAI_COMPATIBLE_PROVIDERS.get(req.provider)
|
|
228
|
+
if not config:
|
|
229
|
+
raise HTTPException(status_code=400, detail="알 수 없는 프로바이더입니다.")
|
|
230
|
+
if not req.key.strip():
|
|
231
|
+
raise HTTPException(status_code=400, detail="API 키가 비어있습니다.")
|
|
232
|
+
current_user = get_current_user(request)
|
|
233
|
+
if REQUIRE_AUTH and not current_user:
|
|
234
|
+
raise HTTPException(status_code=401, detail="인증이 필요합니다.")
|
|
235
|
+
if req.user_email and req.user_email != current_user:
|
|
236
|
+
users = load_users()
|
|
237
|
+
if get_user_role(current_user or "", users) != "admin":
|
|
238
|
+
raise HTTPException(status_code=403, detail="다른 사용자의 API 키를 설정할 권한이 없습니다.")
|
|
239
|
+
target_email = (req.user_email or current_user or "").strip()
|
|
240
|
+
if not target_email:
|
|
241
|
+
raise HTTPException(status_code=400, detail="사용자 식별이 필요합니다. 로그인 후 다시 시도하세요.")
|
|
242
|
+
set_user_api_key(target_email, req.provider, req.key.strip())
|
|
243
|
+
return {"ok": True, "provider": req.provider, "user_email": target_email, "scope": "user"}
|
|
244
|
+
|
|
245
|
+
# ── Models ────────────────────────────────────────────────────────────
|
|
246
|
+
|
|
247
|
+
@router.get("/models")
|
|
248
|
+
async def list_models():
|
|
249
|
+
recommended = _recommended_with_engine_options(
|
|
250
|
+
list(filter_lower_family_versions(ENGINE_MODEL_CATALOG.get("local_mlx", [])))
|
|
251
|
+
)
|
|
252
|
+
return {
|
|
253
|
+
"recommended": recommended,
|
|
254
|
+
"cloud": _router.detected_cloud_models(),
|
|
255
|
+
"engines": await asyncio.to_thread(engine_status),
|
|
256
|
+
"loaded": _router.loaded_model_ids,
|
|
257
|
+
"current": _router.current_model_id,
|
|
258
|
+
"compat_profiles": _list_compat_profiles(),
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
@router.get("/models/compat-profiles")
|
|
262
|
+
async def list_model_compat_profiles(request: Request):
|
|
263
|
+
require_user(request)
|
|
264
|
+
return {"profiles": _list_compat_profiles()}
|
|
265
|
+
|
|
266
|
+
@router.post("/models/load")
|
|
267
|
+
async def load_model(req: LoadModelRequest, request: Request):
|
|
268
|
+
try:
|
|
269
|
+
model_id = req.model_id
|
|
270
|
+
requested_engine = req.engine or (model_id.split(":", 1)[0] if ":" in model_id else "local_mlx")
|
|
271
|
+
if IS_PUBLIC_MODE and not ALLOW_LOCAL_MODELS and requested_engine in {"local_mlx", "mlx"}:
|
|
272
|
+
raise HTTPException(
|
|
273
|
+
status_code=400,
|
|
274
|
+
detail="Public mode blocks local MLX model loading. Use openai:, openrouter:, groq:, together:, or set LATTICEAI_ALLOW_LOCAL_MODELS=true.",
|
|
275
|
+
)
|
|
276
|
+
return await prepare_and_load_model(
|
|
277
|
+
model_id, request, engine=req.engine, user_email=req.user_email,
|
|
278
|
+
adapter_path=req.adapter_path, draft_model_id=req.draft_model_id,
|
|
279
|
+
)
|
|
280
|
+
except HTTPException:
|
|
281
|
+
raise
|
|
282
|
+
except Exception as e:
|
|
283
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
284
|
+
|
|
285
|
+
@router.post("/models/switch/{model_id:path}")
|
|
286
|
+
async def switch_model(model_id: str, request: Request):
|
|
287
|
+
require_user(request)
|
|
288
|
+
try:
|
|
289
|
+
_router.switch_model(model_id)
|
|
290
|
+
return {"status": "ok", "current": _router.current_model_id}
|
|
291
|
+
except KeyError:
|
|
292
|
+
raise HTTPException(status_code=404, detail=f"Model '{model_id}' not loaded. Call /models/load first.")
|
|
293
|
+
|
|
294
|
+
@router.delete("/models/unload/{model_id:path}")
|
|
295
|
+
async def unload_model(model_id: str, request: Request):
|
|
296
|
+
require_user(request)
|
|
297
|
+
_router.unload_model(model_id)
|
|
298
|
+
return {"status": "ok", "unloaded": model_id}
|
|
299
|
+
|
|
300
|
+
@router.delete("/models/unload-all")
|
|
301
|
+
async def unload_all_models(request: Request):
|
|
302
|
+
require_user(request)
|
|
303
|
+
unloaded = _router.loaded_model_ids
|
|
304
|
+
_router.unload_all()
|
|
305
|
+
return {"status": "ok", "unloaded": unloaded}
|
|
306
|
+
|
|
307
|
+
return router
|
|
@@ -18,7 +18,7 @@ from pathlib import Path
|
|
|
18
18
|
from typing import Any, Callable, Dict, Iterable, List, Optional
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
WORKSPACE_OS_VERSION = "1.
|
|
21
|
+
WORKSPACE_OS_VERSION = "1.3.0"
|
|
22
22
|
|
|
23
23
|
# Workspace types separate single-user Personal workspaces from shared
|
|
24
24
|
# Organization workspaces. Both keep the same local-first JSON store; the type
|