ltcai 0.1.28 → 0.1.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -3
- package/auto_setup.py +605 -0
- package/docs/CHANGELOG.md +30 -0
- package/kg_schema.py +723 -0
- package/package.json +4 -1
- package/server.py +727 -42
- package/static/account.html +5 -616
- package/static/admin.html +236 -1371
- package/static/chat.html +204 -7146
- package/static/graph.html +15 -1436
- package/static/lattice-reference.css +6557 -71
- package/static/scripts/account.js +230 -0
- package/static/scripts/admin.js +1198 -0
- package/static/scripts/chat.js +4634 -0
- package/static/scripts/graph.js +1059 -0
- package/static/sw.js +11 -1
package/server.py
CHANGED
|
@@ -12,6 +12,7 @@ import json
|
|
|
12
12
|
import logging
|
|
13
13
|
import os
|
|
14
14
|
import platform
|
|
15
|
+
import queue
|
|
15
16
|
import re
|
|
16
17
|
import secrets
|
|
17
18
|
import threading
|
|
@@ -211,19 +212,24 @@ SSO_CLIENT_SECRET = env_value("OIDC_CLIENT_SECRET", "")
|
|
|
211
212
|
SSO_REDIRECT_URI = env_value("OIDC_REDIRECT_URI", "http://localhost:4825/auth/sso/callback")
|
|
212
213
|
SSO_PROVIDER_NAME = env_value("OIDC_PROVIDER_NAME", "SSO")
|
|
213
214
|
_sso_discovery_cache: Optional[Dict] = None
|
|
215
|
+
_sso_discovery_cache_url: str = ""
|
|
214
216
|
_sso_states: Dict[str, float] = {} # state → timestamp (CSRF protection)
|
|
215
217
|
|
|
216
218
|
async def _get_sso_discovery() -> Optional[Dict]:
|
|
217
|
-
global _sso_discovery_cache
|
|
218
|
-
|
|
219
|
+
global _sso_discovery_cache, _sso_discovery_cache_url
|
|
220
|
+
settings = get_sso_settings()
|
|
221
|
+
discovery_url = settings.get("discovery_url", "")
|
|
222
|
+
if _sso_discovery_cache and _sso_discovery_cache_url == discovery_url:
|
|
219
223
|
return _sso_discovery_cache
|
|
220
|
-
if not
|
|
224
|
+
if not discovery_url:
|
|
221
225
|
return None
|
|
222
226
|
try:
|
|
223
227
|
import httpx as _httpx
|
|
224
228
|
async with _httpx.AsyncClient() as c:
|
|
225
|
-
r = await c.get(
|
|
229
|
+
r = await c.get(discovery_url, timeout=10)
|
|
230
|
+
r.raise_for_status()
|
|
226
231
|
_sso_discovery_cache = r.json()
|
|
232
|
+
_sso_discovery_cache_url = discovery_url
|
|
227
233
|
except Exception as e:
|
|
228
234
|
logging.warning("SSO discovery failed: %s", e)
|
|
229
235
|
return None
|
|
@@ -357,11 +363,12 @@ HISTORY_FILE = DATA_DIR / "chat_history.json"
|
|
|
357
363
|
VPC_FILE = DATA_DIR / "vpc_config.json"
|
|
358
364
|
MCP_FILE = DATA_DIR / "mcp_installs.json"
|
|
359
365
|
AUDIT_FILE = DATA_DIR / "audit_log.json"
|
|
366
|
+
SSO_FILE = DATA_DIR / "sso_config.json"
|
|
360
367
|
KNOWLEDGE_GRAPH = KnowledgeGraphStore(DATA_DIR / "knowledge_graph.sqlite", DATA_DIR / "knowledge_graph_blobs") if ENABLE_GRAPH else None
|
|
361
368
|
|
|
362
369
|
def _require_graph():
|
|
363
370
|
if not ENABLE_GRAPH or KNOWLEDGE_GRAPH is None:
|
|
364
|
-
raise HTTPException(status_code=404, detail="
|
|
371
|
+
raise HTTPException(status_code=404, detail="지식 그래프가 비활성화되어 있습니다. LATTICEAI_ENABLE_GRAPH=true 설정 후 다시 시도해 주세요.")
|
|
365
372
|
|
|
366
373
|
class UserRegister(BaseModel):
|
|
367
374
|
email: str
|
|
@@ -387,6 +394,75 @@ class VpcConfigUpdate(BaseModel):
|
|
|
387
394
|
peering_status: Optional[str] = None
|
|
388
395
|
notes: Optional[str] = None
|
|
389
396
|
|
|
397
|
+
class SsoConfigUpdate(BaseModel):
|
|
398
|
+
enabled: Optional[bool] = None
|
|
399
|
+
provider_name: Optional[str] = None
|
|
400
|
+
discovery_url: Optional[str] = None
|
|
401
|
+
client_id: Optional[str] = None
|
|
402
|
+
client_secret: Optional[str] = None
|
|
403
|
+
redirect_uri: Optional[str] = None
|
|
404
|
+
scopes: Optional[str] = None
|
|
405
|
+
|
|
406
|
+
def _sso_env_defaults() -> Dict[str, object]:
|
|
407
|
+
return {
|
|
408
|
+
"enabled": bool(SSO_DISCOVERY_URL and SSO_CLIENT_ID and SSO_CLIENT_SECRET),
|
|
409
|
+
"provider_name": SSO_PROVIDER_NAME,
|
|
410
|
+
"discovery_url": SSO_DISCOVERY_URL,
|
|
411
|
+
"client_id": SSO_CLIENT_ID,
|
|
412
|
+
"client_secret": SSO_CLIENT_SECRET,
|
|
413
|
+
"redirect_uri": SSO_REDIRECT_URI,
|
|
414
|
+
"scopes": "openid email profile",
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
def load_sso_config() -> Dict[str, object]:
|
|
418
|
+
config = _sso_env_defaults()
|
|
419
|
+
if SSO_FILE.exists():
|
|
420
|
+
try:
|
|
421
|
+
data = json.loads(SSO_FILE.read_text(encoding="utf-8"))
|
|
422
|
+
if isinstance(data, dict):
|
|
423
|
+
config.update({k: v for k, v in data.items() if v is not None})
|
|
424
|
+
except Exception as e:
|
|
425
|
+
logging.warning("load_sso_config failed (using env/defaults): %s", e)
|
|
426
|
+
config["provider_name"] = str(config.get("provider_name") or "SSO")
|
|
427
|
+
config["discovery_url"] = str(config.get("discovery_url") or "")
|
|
428
|
+
config["client_id"] = str(config.get("client_id") or "")
|
|
429
|
+
config["client_secret"] = str(config.get("client_secret") or "")
|
|
430
|
+
config["redirect_uri"] = str(config.get("redirect_uri") or SSO_REDIRECT_URI)
|
|
431
|
+
config["scopes"] = str(config.get("scopes") or "openid email profile")
|
|
432
|
+
config["enabled"] = bool(config.get("enabled")) and bool(
|
|
433
|
+
config["discovery_url"] and config["client_id"] and config["client_secret"]
|
|
434
|
+
)
|
|
435
|
+
return config
|
|
436
|
+
|
|
437
|
+
def get_sso_settings() -> Dict[str, object]:
|
|
438
|
+
return load_sso_config()
|
|
439
|
+
|
|
440
|
+
def public_sso_config(config: Optional[Dict[str, object]] = None) -> Dict[str, object]:
|
|
441
|
+
cfg = config or get_sso_settings()
|
|
442
|
+
return {
|
|
443
|
+
"enabled": bool(cfg.get("enabled")),
|
|
444
|
+
"provider_name": cfg.get("provider_name") or "",
|
|
445
|
+
"discovery_url": cfg.get("discovery_url") or "",
|
|
446
|
+
"client_id": cfg.get("client_id") or "",
|
|
447
|
+
"redirect_uri": cfg.get("redirect_uri") or SSO_REDIRECT_URI,
|
|
448
|
+
"scopes": cfg.get("scopes") or "openid email profile",
|
|
449
|
+
"secret_configured": bool(cfg.get("client_secret")),
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
def save_sso_config(update: Dict[str, object]) -> Dict[str, object]:
|
|
453
|
+
global _sso_discovery_cache, _sso_discovery_cache_url
|
|
454
|
+
current = load_sso_config()
|
|
455
|
+
if update.get("client_secret") == "":
|
|
456
|
+
update.pop("client_secret", None)
|
|
457
|
+
current.update({k: v for k, v in update.items() if v is not None})
|
|
458
|
+
current["enabled"] = bool(current.get("enabled")) and bool(
|
|
459
|
+
current.get("discovery_url") and current.get("client_id") and current.get("client_secret")
|
|
460
|
+
)
|
|
461
|
+
SSO_FILE.write_text(json.dumps(current, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
462
|
+
_sso_discovery_cache = None
|
|
463
|
+
_sso_discovery_cache_url = ""
|
|
464
|
+
return current
|
|
465
|
+
|
|
390
466
|
class McpRecommendRequest(BaseModel):
|
|
391
467
|
query: str
|
|
392
468
|
limit: int = 5
|
|
@@ -479,13 +555,13 @@ MCP_REGISTRY = [
|
|
|
479
555
|
},
|
|
480
556
|
{
|
|
481
557
|
"id": "computer-use",
|
|
482
|
-
"name": "
|
|
558
|
+
"name": "내 컴퓨터 MCP",
|
|
483
559
|
"category": "Desktop / Mac UI",
|
|
484
560
|
"install_mode": "connector",
|
|
485
561
|
"connector_url": "/mcp/connectors/computer-use",
|
|
486
562
|
"external_url": "codex://plugins/computer-use",
|
|
487
|
-
"description": "
|
|
488
|
-
"keywords": ["computer use", "desktop", "mac", "click", "type", "scroll", "컴퓨터", "맥", "앱", "클릭", "타이핑"],
|
|
563
|
+
"description": "사용자의 허용을 받아 이 컴퓨터의 파일, 화면, 앱 작업을 돕는 브리지입니다.",
|
|
564
|
+
"keywords": ["computer use", "desktop", "mac", "click", "type", "scroll", "내 컴퓨터", "컴퓨터", "맥", "앱", "클릭", "타이핑"],
|
|
489
565
|
"capabilities": ["Mac 앱 UI 조작", "스크린샷 기반 상태 확인", "클릭/입력/스크롤"],
|
|
490
566
|
},
|
|
491
567
|
{
|
|
@@ -2216,23 +2292,23 @@ async def login(req: UserLogin, request: Request):
|
|
|
2216
2292
|
|
|
2217
2293
|
@app.get("/auth/sso/config")
|
|
2218
2294
|
async def sso_config():
|
|
2219
|
-
|
|
2220
|
-
return {"enabled": enabled, "provider_name": SSO_PROVIDER_NAME if enabled else ""}
|
|
2295
|
+
return public_sso_config()
|
|
2221
2296
|
|
|
2222
2297
|
@app.get("/auth/sso/login")
|
|
2223
2298
|
async def sso_login():
|
|
2224
2299
|
from urllib.parse import urlencode
|
|
2225
2300
|
from fastapi.responses import RedirectResponse as _Redirect
|
|
2301
|
+
settings = get_sso_settings()
|
|
2226
2302
|
discovery = await _get_sso_discovery()
|
|
2227
|
-
if not discovery:
|
|
2303
|
+
if not settings.get("enabled") or not discovery:
|
|
2228
2304
|
raise HTTPException(status_code=503, detail="SSO가 설정되지 않았습니다.")
|
|
2229
2305
|
state = secrets.token_urlsafe(16)
|
|
2230
2306
|
_sso_states[state] = time.time()
|
|
2231
2307
|
params = urlencode({
|
|
2232
|
-
"client_id":
|
|
2308
|
+
"client_id": settings["client_id"],
|
|
2233
2309
|
"response_type": "code",
|
|
2234
|
-
"redirect_uri":
|
|
2235
|
-
"scope": "openid email profile",
|
|
2310
|
+
"redirect_uri": settings["redirect_uri"],
|
|
2311
|
+
"scope": settings.get("scopes") or "openid email profile",
|
|
2236
2312
|
"state": state,
|
|
2237
2313
|
})
|
|
2238
2314
|
return _Redirect(f"{discovery['authorization_endpoint']}?{params}")
|
|
@@ -2246,17 +2322,18 @@ async def sso_callback(code: str = "", state: str = "", error: str = ""):
|
|
|
2246
2322
|
ts = _sso_states.pop(state, None)
|
|
2247
2323
|
if ts is None or time.time() - ts > 300:
|
|
2248
2324
|
raise HTTPException(status_code=400, detail="유효하지 않은 SSO 상태입니다.")
|
|
2325
|
+
settings = get_sso_settings()
|
|
2249
2326
|
discovery = await _get_sso_discovery()
|
|
2250
|
-
if not discovery:
|
|
2327
|
+
if not settings.get("enabled") or not discovery:
|
|
2251
2328
|
raise HTTPException(status_code=503, detail="SSO 설정 오류입니다.")
|
|
2252
2329
|
import httpx as _httpx
|
|
2253
2330
|
async with _httpx.AsyncClient() as c:
|
|
2254
2331
|
r = await c.post(discovery["token_endpoint"], data={
|
|
2255
2332
|
"grant_type": "authorization_code",
|
|
2256
2333
|
"code": code,
|
|
2257
|
-
"redirect_uri":
|
|
2258
|
-
"client_id":
|
|
2259
|
-
"client_secret":
|
|
2334
|
+
"redirect_uri": settings["redirect_uri"],
|
|
2335
|
+
"client_id": settings["client_id"],
|
|
2336
|
+
"client_secret": settings["client_secret"],
|
|
2260
2337
|
}, headers={"Accept": "application/json"}, timeout=15)
|
|
2261
2338
|
tokens = r.json()
|
|
2262
2339
|
id_token = tokens.get("id_token")
|
|
@@ -2468,6 +2545,25 @@ async def admin_invite_link(request: Request):
|
|
|
2468
2545
|
url = f"{scheme}://{host}/"
|
|
2469
2546
|
return {"invite_url": url, "invite_code": INVITE_CODE, "gate_enabled": INVITE_GATE_ENABLED}
|
|
2470
2547
|
|
|
2548
|
+
@app.get("/admin/sso")
|
|
2549
|
+
async def admin_sso(request: Request):
|
|
2550
|
+
require_admin(request)
|
|
2551
|
+
return public_sso_config()
|
|
2552
|
+
|
|
2553
|
+
@app.patch("/admin/sso")
|
|
2554
|
+
async def admin_update_sso(req: SsoConfigUpdate, request: Request):
|
|
2555
|
+
admin_email, _ = require_admin(request)
|
|
2556
|
+
update = req.dict(exclude_unset=True)
|
|
2557
|
+
saved = save_sso_config(update)
|
|
2558
|
+
append_audit_event(
|
|
2559
|
+
"sso_config_update",
|
|
2560
|
+
user_email=admin_email,
|
|
2561
|
+
provider_name=saved.get("provider_name"),
|
|
2562
|
+
discovery_url=saved.get("discovery_url"),
|
|
2563
|
+
enabled=bool(saved.get("enabled")),
|
|
2564
|
+
)
|
|
2565
|
+
return public_sso_config(saved)
|
|
2566
|
+
|
|
2471
2567
|
# ── Invitation Logic ────────────────────────────────────────────────────────
|
|
2472
2568
|
INVITE_CODE = env_value("LATTICEAI_INVITE_CODE", "gemma-lattice-ai")
|
|
2473
2569
|
INVITE_GATE_ENABLED = env_bool("LATTICEAI_INVITE_GATE_ENABLED", default=False)
|
|
@@ -2495,7 +2591,7 @@ async def root(request: Request, code: Optional[str] = None, authorized: Optiona
|
|
|
2495
2591
|
<div style="font-size:48px; margin-bottom:20px;">🔒</div>
|
|
2496
2592
|
<h1 style="color:#378ADD; margin:0; font-size:24px;">Invitation Required</h1>
|
|
2497
2593
|
<p style="color:#94a3b8; margin:20px 0; line-height:1.6;">이 서비스는 비공개로 운영되고 있습니다.<br>선생님께 받은 <b>초대용 전용 링크</b>를 통해 접속해 주세요.</p>
|
|
2498
|
-
<div style="margin-top:30px; padding-top:20px; border-top:1px solid rgba(255,255,255,0.05); font-size:11px; color:rgba(255,255,255,0.2); letter-spacing:1px;">LATTICE AI
|
|
2594
|
+
<div style="margin-top:30px; padding-top:20px; border-top:1px solid rgba(255,255,255,0.05); font-size:11px; color:rgba(255,255,255,0.2); letter-spacing:1px;">LATTICE AI</div>
|
|
2499
2595
|
</div>
|
|
2500
2596
|
</body>
|
|
2501
2597
|
""", status_code=403)
|
|
@@ -2550,6 +2646,48 @@ async def status():
|
|
|
2550
2646
|
}
|
|
2551
2647
|
|
|
2552
2648
|
|
|
2649
|
+
@app.get("/local/sysinfo")
|
|
2650
|
+
async def local_sysinfo(request: Request):
|
|
2651
|
+
"""CPU / RAM / GPU(MLX) 사용량을 반환합니다."""
|
|
2652
|
+
require_user(request)
|
|
2653
|
+
import subprocess, re as _re
|
|
2654
|
+
result = {"cpu_pct": 0.0, "ram_pct": 0.0, "gpu_mem_pct": 0.0, "gpu_mem_gb": 0.0}
|
|
2655
|
+
try:
|
|
2656
|
+
# CPU
|
|
2657
|
+
top_out = subprocess.run(["top", "-l", "1", "-n", "0"], capture_output=True, text=True, timeout=4).stdout
|
|
2658
|
+
for line in top_out.splitlines():
|
|
2659
|
+
if "CPU usage" in line:
|
|
2660
|
+
m = _re.search(r"([\d.]+)% user.*?([\d.]+)% sys", line)
|
|
2661
|
+
if m:
|
|
2662
|
+
result["cpu_pct"] = round(float(m.group(1)) + float(m.group(2)), 1)
|
|
2663
|
+
# RAM
|
|
2664
|
+
vm_out = subprocess.run(["vm_stat"], capture_output=True, text=True, timeout=4).stdout
|
|
2665
|
+
page_size = 16384
|
|
2666
|
+
pages: dict = {}
|
|
2667
|
+
for line in vm_out.splitlines():
|
|
2668
|
+
for key in ["Pages free", "Pages active", "Pages inactive", "Pages wired down", "Pages occupied by compressor"]:
|
|
2669
|
+
if line.startswith(key):
|
|
2670
|
+
m = _re.search(r"(\d+)", line)
|
|
2671
|
+
if m:
|
|
2672
|
+
pages[key] = int(m.group(1))
|
|
2673
|
+
total = sum(pages.values())
|
|
2674
|
+
used = total - pages.get("Pages free", 0)
|
|
2675
|
+
result["ram_pct"] = round(used / total * 100, 1) if total else 0.0
|
|
2676
|
+
# GPU (MLX / Apple Silicon unified memory)
|
|
2677
|
+
try:
|
|
2678
|
+
import mlx.core as _mx
|
|
2679
|
+
hw_out = subprocess.run(["sysctl", "-n", "hw.memsize"], capture_output=True, text=True, timeout=2).stdout
|
|
2680
|
+
total_bytes = int(hw_out.strip())
|
|
2681
|
+
gpu_bytes = _mx.get_active_memory() + _mx.get_cache_memory()
|
|
2682
|
+
result["gpu_mem_gb"] = round(gpu_bytes / (1024 ** 3), 2)
|
|
2683
|
+
result["gpu_mem_pct"] = round(gpu_bytes / total_bytes * 100, 1) if total_bytes else 0.0
|
|
2684
|
+
except Exception:
|
|
2685
|
+
pass
|
|
2686
|
+
except Exception as e:
|
|
2687
|
+
result["error"] = str(e)
|
|
2688
|
+
return result
|
|
2689
|
+
|
|
2690
|
+
|
|
2553
2691
|
|
|
2554
2692
|
|
|
2555
2693
|
# ── Request / Response Models ──────────────────────────────────────────────────
|
|
@@ -3208,31 +3346,224 @@ def hf_model_ready(repo_id: str, provider: str = "local_mlx") -> bool:
|
|
|
3208
3346
|
)
|
|
3209
3347
|
return has_config and has_weights and has_tokenizer
|
|
3210
3348
|
|
|
3211
|
-
|
|
3349
|
+
|
|
3350
|
+
def model_download_progress_payload(
|
|
3351
|
+
stage: str,
|
|
3352
|
+
message: str,
|
|
3353
|
+
*,
|
|
3354
|
+
percent: Optional[float] = None,
|
|
3355
|
+
detail: Optional[str] = None,
|
|
3356
|
+
downloaded_bytes: Optional[int] = None,
|
|
3357
|
+
total_bytes: Optional[int] = None,
|
|
3358
|
+
eta_seconds: Optional[float] = None,
|
|
3359
|
+
file: Optional[str] = None,
|
|
3360
|
+
indeterminate: bool = False,
|
|
3361
|
+
) -> Dict[str, object]:
|
|
3362
|
+
payload: Dict[str, object] = {
|
|
3363
|
+
"stage": stage,
|
|
3364
|
+
"message": message,
|
|
3365
|
+
"indeterminate": indeterminate,
|
|
3366
|
+
"ts": time.time(),
|
|
3367
|
+
}
|
|
3368
|
+
if percent is not None:
|
|
3369
|
+
payload["percent"] = max(0, min(100, round(float(percent), 1)))
|
|
3370
|
+
if detail:
|
|
3371
|
+
payload["detail"] = detail
|
|
3372
|
+
if downloaded_bytes is not None:
|
|
3373
|
+
payload["downloaded_bytes"] = max(0, int(downloaded_bytes))
|
|
3374
|
+
if total_bytes is not None:
|
|
3375
|
+
payload["total_bytes"] = max(0, int(total_bytes))
|
|
3376
|
+
if eta_seconds is not None:
|
|
3377
|
+
payload["eta_seconds"] = max(0, round(float(eta_seconds)))
|
|
3378
|
+
if file:
|
|
3379
|
+
payload["file"] = file
|
|
3380
|
+
return payload
|
|
3381
|
+
|
|
3382
|
+
|
|
3383
|
+
def estimate_eta_seconds(started_at: float, percent: Optional[float]) -> Optional[float]:
|
|
3384
|
+
if percent is None or percent <= 0 or percent >= 100:
|
|
3385
|
+
return None
|
|
3386
|
+
elapsed = max(0.0, time.time() - started_at)
|
|
3387
|
+
return elapsed * (100.0 - percent) / percent
|
|
3388
|
+
|
|
3389
|
+
|
|
3390
|
+
def hf_repo_files_with_sizes(repo_id: str) -> List[Dict[str, object]]:
|
|
3391
|
+
from huggingface_hub import HfApi
|
|
3392
|
+
|
|
3393
|
+
api = HfApi()
|
|
3394
|
+
try:
|
|
3395
|
+
info = api.model_info(repo_id, files_metadata=True)
|
|
3396
|
+
files = []
|
|
3397
|
+
for sibling in getattr(info, "siblings", []) or []:
|
|
3398
|
+
name = str(getattr(sibling, "rfilename", "") or "").strip()
|
|
3399
|
+
if not name or name.endswith("/"):
|
|
3400
|
+
continue
|
|
3401
|
+
files.append({"name": name, "size": int(getattr(sibling, "size", 0) or 0)})
|
|
3402
|
+
if files:
|
|
3403
|
+
return files
|
|
3404
|
+
except TypeError:
|
|
3405
|
+
pass
|
|
3406
|
+
except Exception as e:
|
|
3407
|
+
logging.warning("huggingface model_info failed for %s: %s", repo_id, e)
|
|
3408
|
+
|
|
3409
|
+
return [{"name": str(name), "size": 0} for name in api.list_repo_files(repo_id) if str(name).strip()]
|
|
3410
|
+
|
|
3411
|
+
|
|
3412
|
+
def download_hf_model(
|
|
3413
|
+
repo_id: str,
|
|
3414
|
+
provider: str = "local_mlx",
|
|
3415
|
+
progress_emit=None,
|
|
3416
|
+
) -> Dict[str, object]:
|
|
3212
3417
|
if importlib.util.find_spec("huggingface_hub") is None:
|
|
3213
3418
|
raise HTTPException(status_code=400, detail="huggingface_hub가 없습니다. 먼저 MLX runtime 설치를 진행해 주세요.")
|
|
3214
3419
|
|
|
3215
3420
|
target_dir = hf_model_dir(repo_id)
|
|
3216
3421
|
if hf_model_ready(repo_id, provider):
|
|
3422
|
+
if progress_emit:
|
|
3423
|
+
progress_emit(model_download_progress_payload(
|
|
3424
|
+
"download",
|
|
3425
|
+
"이미 다운로드된 모델을 확인했습니다.",
|
|
3426
|
+
percent=100,
|
|
3427
|
+
downloaded_bytes=0,
|
|
3428
|
+
total_bytes=0,
|
|
3429
|
+
eta_seconds=0,
|
|
3430
|
+
))
|
|
3217
3431
|
return {"model": repo_id, "path": str(target_dir), "cached": True}
|
|
3218
3432
|
|
|
3219
3433
|
target_dir.mkdir(parents=True, exist_ok=True)
|
|
3220
3434
|
try:
|
|
3221
|
-
from huggingface_hub import
|
|
3435
|
+
from huggingface_hub import hf_hub_download
|
|
3222
3436
|
|
|
3437
|
+
started_at = time.time()
|
|
3438
|
+
all_files = hf_repo_files_with_sizes(repo_id)
|
|
3223
3439
|
if provider == "llamacpp":
|
|
3224
|
-
|
|
3225
|
-
|
|
3440
|
+
ggufs = sorted(
|
|
3441
|
+
[item for item in all_files if str(item["name"]).lower().endswith(".gguf")],
|
|
3442
|
+
key=lambda item: str(item["name"]),
|
|
3443
|
+
)
|
|
3226
3444
|
if not ggufs:
|
|
3227
3445
|
raise RuntimeError("GGUF 파일을 찾지 못했습니다.")
|
|
3228
3446
|
preference = ("q4_k_m", "q4_0", "q4_k_s", "q3_k_m", "q2_k")
|
|
3229
|
-
|
|
3230
|
-
(
|
|
3231
|
-
|
|
3232
|
-
|
|
3233
|
-
|
|
3447
|
+
selected_files = [
|
|
3448
|
+
next(
|
|
3449
|
+
(item for pref in preference for item in ggufs if pref in str(item["name"]).lower()),
|
|
3450
|
+
ggufs[0],
|
|
3451
|
+
)
|
|
3452
|
+
]
|
|
3234
3453
|
else:
|
|
3235
|
-
|
|
3454
|
+
selected_files = all_files
|
|
3455
|
+
|
|
3456
|
+
total_bytes = sum(int(item.get("size") or 0) for item in selected_files) or None
|
|
3457
|
+
downloaded_bytes = 0
|
|
3458
|
+
total_files = max(1, len(selected_files))
|
|
3459
|
+
if progress_emit:
|
|
3460
|
+
progress_emit(model_download_progress_payload(
|
|
3461
|
+
"download",
|
|
3462
|
+
"모델 파일 정보를 확인했습니다.",
|
|
3463
|
+
percent=0,
|
|
3464
|
+
downloaded_bytes=0,
|
|
3465
|
+
total_bytes=total_bytes,
|
|
3466
|
+
indeterminate=total_bytes is None,
|
|
3467
|
+
))
|
|
3468
|
+
|
|
3469
|
+
for index, item in enumerate(selected_files, start=1):
|
|
3470
|
+
filename = str(item["name"])
|
|
3471
|
+
size = int(item.get("size") or 0)
|
|
3472
|
+
tqdm_class = None
|
|
3473
|
+
if progress_emit:
|
|
3474
|
+
current_percent = (
|
|
3475
|
+
(downloaded_bytes / total_bytes) * 100 if total_bytes else ((index - 1) / total_files) * 100
|
|
3476
|
+
)
|
|
3477
|
+
progress_emit(model_download_progress_payload(
|
|
3478
|
+
"download",
|
|
3479
|
+
"모델 다운로드 중입니다.",
|
|
3480
|
+
percent=current_percent,
|
|
3481
|
+
detail=filename,
|
|
3482
|
+
downloaded_bytes=downloaded_bytes,
|
|
3483
|
+
total_bytes=total_bytes,
|
|
3484
|
+
eta_seconds=estimate_eta_seconds(started_at, current_percent),
|
|
3485
|
+
file=filename,
|
|
3486
|
+
indeterminate=total_bytes is None and total_files <= 1,
|
|
3487
|
+
))
|
|
3488
|
+
try:
|
|
3489
|
+
from tqdm.auto import tqdm as base_tqdm
|
|
3490
|
+
|
|
3491
|
+
downloaded_before = downloaded_bytes
|
|
3492
|
+
last_emit = {"at": 0.0, "percent": -1.0}
|
|
3493
|
+
|
|
3494
|
+
def emit_byte_progress(done_bytes: float) -> None:
|
|
3495
|
+
done = max(0, int(done_bytes or 0))
|
|
3496
|
+
if total_bytes:
|
|
3497
|
+
aggregate = min(total_bytes, downloaded_before + done)
|
|
3498
|
+
percent = (aggregate / total_bytes) * 100
|
|
3499
|
+
else:
|
|
3500
|
+
file_total = size or done
|
|
3501
|
+
file_ratio = min(1.0, done / file_total) if file_total else 0.0
|
|
3502
|
+
aggregate = downloaded_before + done
|
|
3503
|
+
percent = ((index - 1) + file_ratio) / total_files * 100
|
|
3504
|
+
now = time.time()
|
|
3505
|
+
if percent < 100 and now - last_emit["at"] < 0.5 and percent - last_emit["percent"] < 0.3:
|
|
3506
|
+
return
|
|
3507
|
+
last_emit["at"] = now
|
|
3508
|
+
last_emit["percent"] = percent
|
|
3509
|
+
progress_emit(model_download_progress_payload(
|
|
3510
|
+
"download",
|
|
3511
|
+
"모델 다운로드 중입니다.",
|
|
3512
|
+
percent=percent,
|
|
3513
|
+
detail=filename,
|
|
3514
|
+
downloaded_bytes=aggregate,
|
|
3515
|
+
total_bytes=total_bytes,
|
|
3516
|
+
eta_seconds=estimate_eta_seconds(started_at, percent),
|
|
3517
|
+
file=filename,
|
|
3518
|
+
indeterminate=total_bytes is None and total_files <= 1,
|
|
3519
|
+
))
|
|
3520
|
+
|
|
3521
|
+
class ProgressTqdm(base_tqdm):
|
|
3522
|
+
def update(self, n=1):
|
|
3523
|
+
result = super().update(n)
|
|
3524
|
+
emit_byte_progress(float(getattr(self, "n", 0) or 0))
|
|
3525
|
+
return result
|
|
3526
|
+
|
|
3527
|
+
tqdm_class = ProgressTqdm
|
|
3528
|
+
except Exception:
|
|
3529
|
+
tqdm_class = None
|
|
3530
|
+
local_path = hf_hub_download(
|
|
3531
|
+
repo_id=repo_id,
|
|
3532
|
+
filename=filename,
|
|
3533
|
+
local_dir=str(target_dir),
|
|
3534
|
+
tqdm_class=tqdm_class,
|
|
3535
|
+
)
|
|
3536
|
+
if size <= 0:
|
|
3537
|
+
try:
|
|
3538
|
+
size = Path(local_path).stat().st_size
|
|
3539
|
+
except OSError:
|
|
3540
|
+
size = 0
|
|
3541
|
+
downloaded_bytes += size
|
|
3542
|
+
if progress_emit:
|
|
3543
|
+
current_percent = (
|
|
3544
|
+
(downloaded_bytes / total_bytes) * 100 if total_bytes else (index / total_files) * 100
|
|
3545
|
+
)
|
|
3546
|
+
progress_emit(model_download_progress_payload(
|
|
3547
|
+
"download",
|
|
3548
|
+
"모델 다운로드 중입니다.",
|
|
3549
|
+
percent=current_percent,
|
|
3550
|
+
detail=filename,
|
|
3551
|
+
downloaded_bytes=downloaded_bytes,
|
|
3552
|
+
total_bytes=total_bytes,
|
|
3553
|
+
eta_seconds=estimate_eta_seconds(started_at, current_percent),
|
|
3554
|
+
file=filename,
|
|
3555
|
+
indeterminate=False,
|
|
3556
|
+
))
|
|
3557
|
+
|
|
3558
|
+
if progress_emit:
|
|
3559
|
+
progress_emit(model_download_progress_payload(
|
|
3560
|
+
"download",
|
|
3561
|
+
"모델 다운로드가 완료되었습니다.",
|
|
3562
|
+
percent=100,
|
|
3563
|
+
downloaded_bytes=downloaded_bytes,
|
|
3564
|
+
total_bytes=total_bytes or downloaded_bytes,
|
|
3565
|
+
eta_seconds=0,
|
|
3566
|
+
))
|
|
3236
3567
|
except Exception as e:
|
|
3237
3568
|
raise HTTPException(status_code=500, detail=f"{repo_id} 다운로드 실패: {str(e)[-2000:]}")
|
|
3238
3569
|
|
|
@@ -3242,6 +3573,75 @@ def download_hf_model(repo_id: str, provider: str = "local_mlx") -> Dict[str, ob
|
|
|
3242
3573
|
return {"model": repo_id, "path": str(target_dir), "cached": False}
|
|
3243
3574
|
|
|
3244
3575
|
|
|
3576
|
+
def pull_ollama_model_with_progress(model_name: str, progress_emit=None) -> Dict[str, object]:
|
|
3577
|
+
started_at = time.time()
|
|
3578
|
+
if progress_emit:
|
|
3579
|
+
progress_emit(model_download_progress_payload(
|
|
3580
|
+
"download",
|
|
3581
|
+
"Ollama 모델 다운로드를 시작합니다.",
|
|
3582
|
+
percent=0,
|
|
3583
|
+
detail=model_name,
|
|
3584
|
+
indeterminate=True,
|
|
3585
|
+
))
|
|
3586
|
+
process = subprocess.Popen(
|
|
3587
|
+
["ollama", "pull", model_name],
|
|
3588
|
+
stdout=subprocess.PIPE,
|
|
3589
|
+
stderr=subprocess.STDOUT,
|
|
3590
|
+
text=True,
|
|
3591
|
+
bufsize=1,
|
|
3592
|
+
)
|
|
3593
|
+
last_percent: Optional[float] = None
|
|
3594
|
+
lines: List[str] = []
|
|
3595
|
+
try:
|
|
3596
|
+
assert process.stdout is not None
|
|
3597
|
+
for raw_line in process.stdout:
|
|
3598
|
+
for part in re.split(r"[\r\n]+", raw_line):
|
|
3599
|
+
line = part.strip()
|
|
3600
|
+
if not line:
|
|
3601
|
+
continue
|
|
3602
|
+
lines.append(line)
|
|
3603
|
+
match = re.search(r"(\d{1,3}(?:\.\d+)?)\s*%", line)
|
|
3604
|
+
if match:
|
|
3605
|
+
last_percent = min(100.0, float(match.group(1)))
|
|
3606
|
+
if progress_emit:
|
|
3607
|
+
progress_emit(model_download_progress_payload(
|
|
3608
|
+
"download",
|
|
3609
|
+
"Ollama 모델 다운로드 중입니다.",
|
|
3610
|
+
percent=last_percent,
|
|
3611
|
+
detail=line[-180:],
|
|
3612
|
+
eta_seconds=estimate_eta_seconds(started_at, last_percent),
|
|
3613
|
+
indeterminate=False,
|
|
3614
|
+
))
|
|
3615
|
+
elif progress_emit:
|
|
3616
|
+
progress_emit(model_download_progress_payload(
|
|
3617
|
+
"download",
|
|
3618
|
+
"Ollama 모델 다운로드 중입니다.",
|
|
3619
|
+
percent=last_percent,
|
|
3620
|
+
detail=line[-180:],
|
|
3621
|
+
eta_seconds=estimate_eta_seconds(started_at, last_percent),
|
|
3622
|
+
indeterminate=last_percent is None,
|
|
3623
|
+
))
|
|
3624
|
+
returncode = process.wait()
|
|
3625
|
+
except Exception:
|
|
3626
|
+
process.kill()
|
|
3627
|
+
raise
|
|
3628
|
+
|
|
3629
|
+
if returncode != 0:
|
|
3630
|
+
tail = "\n".join(lines[-12:])
|
|
3631
|
+
raise HTTPException(status_code=500, detail=tail[-2000:] or "Ollama 모델 다운로드 실패")
|
|
3632
|
+
|
|
3633
|
+
if progress_emit:
|
|
3634
|
+
progress_emit(model_download_progress_payload(
|
|
3635
|
+
"download",
|
|
3636
|
+
"Ollama 모델 다운로드가 완료되었습니다.",
|
|
3637
|
+
percent=100,
|
|
3638
|
+
detail=model_name,
|
|
3639
|
+
eta_seconds=0,
|
|
3640
|
+
indeterminate=False,
|
|
3641
|
+
))
|
|
3642
|
+
return {"provider": "ollama", "model": model_name, "returncode": returncode}
|
|
3643
|
+
|
|
3644
|
+
|
|
3245
3645
|
def get_ollama_pulled_models() -> set:
|
|
3246
3646
|
if not shutil.which("ollama"):
|
|
3247
3647
|
return set()
|
|
@@ -3806,6 +4206,227 @@ async def prepare_and_load_model(
|
|
|
3806
4206
|
"download": download_result,
|
|
3807
4207
|
}
|
|
3808
4208
|
|
|
4209
|
+
|
|
4210
|
+
def sse_event(event: str, data: Dict[str, object]) -> str:
|
|
4211
|
+
return f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"
|
|
4212
|
+
|
|
4213
|
+
|
|
4214
|
+
async def prepare_and_load_model_stream(
|
|
4215
|
+
model_id: str,
|
|
4216
|
+
request: Request,
|
|
4217
|
+
engine: Optional[str] = None,
|
|
4218
|
+
user_email: Optional[str] = None,
|
|
4219
|
+
) -> AsyncIterator[str]:
|
|
4220
|
+
model_id = normalize_local_model_request(model_id, engine)
|
|
4221
|
+
if not model_id:
|
|
4222
|
+
raise HTTPException(status_code=400, detail="모델 식별자가 비어 있습니다.")
|
|
4223
|
+
|
|
4224
|
+
parsed_provider, parsed_model = parse_model_ref(model_id)
|
|
4225
|
+
if parsed_provider == "mlx":
|
|
4226
|
+
parsed_provider = "local_mlx"
|
|
4227
|
+
|
|
4228
|
+
work_queue: "queue.Queue[Dict[str, object]]" = queue.Queue()
|
|
4229
|
+
work_result: Dict[str, object] = {}
|
|
4230
|
+
|
|
4231
|
+
def emit_progress(payload: Dict[str, object]) -> None:
|
|
4232
|
+
work_queue.put({"kind": "progress", "data": payload})
|
|
4233
|
+
|
|
4234
|
+
def blocking_prepare() -> None:
|
|
4235
|
+
try:
|
|
4236
|
+
local_engines = {"local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"}
|
|
4237
|
+
install_result: Dict[str, object] = {}
|
|
4238
|
+
download_result: Optional[Dict[str, object]] = None
|
|
4239
|
+
prepared_model_id = model_id
|
|
4240
|
+
prepared_model_name = parsed_model
|
|
4241
|
+
|
|
4242
|
+
if parsed_provider in local_engines:
|
|
4243
|
+
emit_progress(model_download_progress_payload(
|
|
4244
|
+
"engine",
|
|
4245
|
+
"실행 엔진을 확인하는 중입니다.",
|
|
4246
|
+
percent=2,
|
|
4247
|
+
indeterminate=True,
|
|
4248
|
+
))
|
|
4249
|
+
install_result = ensure_engine_ready(parsed_provider)
|
|
4250
|
+
emit_progress(model_download_progress_payload(
|
|
4251
|
+
"engine",
|
|
4252
|
+
"실행 엔진 준비가 완료되었습니다.",
|
|
4253
|
+
percent=10,
|
|
4254
|
+
indeterminate=False,
|
|
4255
|
+
))
|
|
4256
|
+
|
|
4257
|
+
if parsed_provider == "local_mlx":
|
|
4258
|
+
explicit_path = Path(parsed_model).expanduser()
|
|
4259
|
+
if explicit_path.exists():
|
|
4260
|
+
download_result = {"model": parsed_model, "path": str(explicit_path), "cached": True}
|
|
4261
|
+
emit_progress(model_download_progress_payload(
|
|
4262
|
+
"download",
|
|
4263
|
+
"로컬 모델 경로를 확인했습니다.",
|
|
4264
|
+
percent=100,
|
|
4265
|
+
detail=str(explicit_path),
|
|
4266
|
+
eta_seconds=0,
|
|
4267
|
+
))
|
|
4268
|
+
elif not hf_model_ready(parsed_model, "local_mlx"):
|
|
4269
|
+
download_result = download_hf_model(parsed_model, "local_mlx", progress_emit=emit_progress)
|
|
4270
|
+
else:
|
|
4271
|
+
download_result = {"model": parsed_model, "path": str(hf_model_dir(parsed_model)), "cached": True}
|
|
4272
|
+
emit_progress(model_download_progress_payload(
|
|
4273
|
+
"download",
|
|
4274
|
+
"이미 다운로드된 모델을 확인했습니다.",
|
|
4275
|
+
percent=100,
|
|
4276
|
+
eta_seconds=0,
|
|
4277
|
+
))
|
|
4278
|
+
elif parsed_provider == "ollama":
|
|
4279
|
+
emit_progress(model_download_progress_payload(
|
|
4280
|
+
"engine",
|
|
4281
|
+
"Ollama 서버를 확인하는 중입니다.",
|
|
4282
|
+
percent=12,
|
|
4283
|
+
indeterminate=True,
|
|
4284
|
+
))
|
|
4285
|
+
ensure_ollama_server()
|
|
4286
|
+
if parsed_model not in get_ollama_pulled_models():
|
|
4287
|
+
download_result = pull_ollama_model_with_progress(parsed_model, progress_emit=emit_progress)
|
|
4288
|
+
else:
|
|
4289
|
+
download_result = {"provider": "ollama", "model": parsed_model, "cached": True}
|
|
4290
|
+
emit_progress(model_download_progress_payload(
|
|
4291
|
+
"download",
|
|
4292
|
+
"이미 다운로드된 Ollama 모델을 확인했습니다.",
|
|
4293
|
+
percent=100,
|
|
4294
|
+
detail=parsed_model,
|
|
4295
|
+
eta_seconds=0,
|
|
4296
|
+
))
|
|
4297
|
+
elif parsed_provider == "vllm":
|
|
4298
|
+
if not hf_model_ready(parsed_model, "vllm"):
|
|
4299
|
+
download_result = download_hf_model(parsed_model, "vllm", progress_emit=emit_progress)
|
|
4300
|
+
else:
|
|
4301
|
+
download_result = {"provider": "vllm", "model": parsed_model, "cached": True}
|
|
4302
|
+
emit_progress(model_download_progress_payload(
|
|
4303
|
+
"download",
|
|
4304
|
+
"이미 다운로드된 모델을 확인했습니다.",
|
|
4305
|
+
percent=100,
|
|
4306
|
+
detail=parsed_model,
|
|
4307
|
+
eta_seconds=0,
|
|
4308
|
+
))
|
|
4309
|
+
emit_progress(model_download_progress_payload(
|
|
4310
|
+
"server",
|
|
4311
|
+
"vLLM 서버를 시작하는 중입니다.",
|
|
4312
|
+
percent=92,
|
|
4313
|
+
indeterminate=True,
|
|
4314
|
+
))
|
|
4315
|
+
ensure_vllm_server(parsed_model)
|
|
4316
|
+
download_result = {**(download_result or {}), "provider": "vllm", "model": parsed_model, "server_ready": True}
|
|
4317
|
+
elif parsed_provider == "llamacpp":
|
|
4318
|
+
if not hf_model_ready(parsed_model, "llamacpp"):
|
|
4319
|
+
download_result = download_hf_model(parsed_model, "llamacpp", progress_emit=emit_progress)
|
|
4320
|
+
else:
|
|
4321
|
+
download_result = {"provider": "llamacpp", "model": parsed_model, "cached": True}
|
|
4322
|
+
emit_progress(model_download_progress_payload(
|
|
4323
|
+
"download",
|
|
4324
|
+
"이미 다운로드된 GGUF 모델을 확인했습니다.",
|
|
4325
|
+
percent=100,
|
|
4326
|
+
detail=parsed_model,
|
|
4327
|
+
eta_seconds=0,
|
|
4328
|
+
))
|
|
4329
|
+
emit_progress(model_download_progress_payload(
|
|
4330
|
+
"server",
|
|
4331
|
+
"llama.cpp 서버를 시작하는 중입니다.",
|
|
4332
|
+
percent=92,
|
|
4333
|
+
indeterminate=True,
|
|
4334
|
+
))
|
|
4335
|
+
ensure_llamacpp_server(parsed_model)
|
|
4336
|
+
download_result = {**(download_result or {}), "provider": "llamacpp", "model": parsed_model, "server_ready": True}
|
|
4337
|
+
elif parsed_provider == "lmstudio":
|
|
4338
|
+
emit_progress(model_download_progress_payload(
|
|
4339
|
+
"download",
|
|
4340
|
+
"LM Studio 모델을 확인하는 중입니다.",
|
|
4341
|
+
percent=35,
|
|
4342
|
+
indeterminate=True,
|
|
4343
|
+
))
|
|
4344
|
+
ensured = ensure_lmstudio_model(parsed_model)
|
|
4345
|
+
resolved_model = str(
|
|
4346
|
+
ensured.get("instance_id")
|
|
4347
|
+
or ensured.get("resolved_model")
|
|
4348
|
+
or parsed_model
|
|
4349
|
+
).strip()
|
|
4350
|
+
prepared_model_name = resolved_model
|
|
4351
|
+
prepared_model_id = f"lmstudio:{resolved_model}"
|
|
4352
|
+
download_result = ensured
|
|
4353
|
+
else:
|
|
4354
|
+
emit_progress(model_download_progress_payload(
|
|
4355
|
+
"engine",
|
|
4356
|
+
"모델 연결을 준비하는 중입니다.",
|
|
4357
|
+
percent=30,
|
|
4358
|
+
indeterminate=True,
|
|
4359
|
+
))
|
|
4360
|
+
|
|
4361
|
+
work_result.update({
|
|
4362
|
+
"model_id": prepared_model_id,
|
|
4363
|
+
"parsed_provider": parsed_provider,
|
|
4364
|
+
"parsed_model": prepared_model_name,
|
|
4365
|
+
"install_result": install_result,
|
|
4366
|
+
"download_result": download_result,
|
|
4367
|
+
})
|
|
4368
|
+
work_queue.put({"kind": "done"})
|
|
4369
|
+
except HTTPException as exc:
|
|
4370
|
+
work_queue.put({"kind": "error", "status_code": exc.status_code, "detail": exc.detail})
|
|
4371
|
+
except Exception as exc:
|
|
4372
|
+
logging.exception("model prepare stream worker failed")
|
|
4373
|
+
work_queue.put({"kind": "error", "status_code": 500, "detail": str(exc)[-2000:]})
|
|
4374
|
+
|
|
4375
|
+
worker = threading.Thread(target=blocking_prepare, daemon=True)
|
|
4376
|
+
worker.start()
|
|
4377
|
+
|
|
4378
|
+
while True:
|
|
4379
|
+
item = await asyncio.to_thread(work_queue.get)
|
|
4380
|
+
kind = item.get("kind")
|
|
4381
|
+
if kind == "progress":
|
|
4382
|
+
yield sse_event("progress", item["data"])
|
|
4383
|
+
elif kind == "error":
|
|
4384
|
+
raise HTTPException(
|
|
4385
|
+
status_code=int(item.get("status_code") or 500),
|
|
4386
|
+
detail=item.get("detail") or "모델 준비에 실패했습니다.",
|
|
4387
|
+
)
|
|
4388
|
+
elif kind == "done":
|
|
4389
|
+
break
|
|
4390
|
+
|
|
4391
|
+
prepared_model_id = str(work_result.get("model_id") or model_id)
|
|
4392
|
+
prepared_provider = str(work_result.get("parsed_provider") or parsed_provider)
|
|
4393
|
+
install_result = work_result.get("install_result") or {}
|
|
4394
|
+
download_result = work_result.get("download_result")
|
|
4395
|
+
|
|
4396
|
+
yield sse_event("progress", model_download_progress_payload(
|
|
4397
|
+
"load",
|
|
4398
|
+
"모델을 메모리에 로드하는 중입니다.",
|
|
4399
|
+
percent=96,
|
|
4400
|
+
indeterminate=True,
|
|
4401
|
+
))
|
|
4402
|
+
|
|
4403
|
+
effective_email = (user_email or get_current_user(request) or "").strip()
|
|
4404
|
+
user_api_key = get_user_api_key(effective_email, prepared_provider) if prepared_provider != "local_mlx" else None
|
|
4405
|
+
msg = await router.load_model(
|
|
4406
|
+
prepared_model_id,
|
|
4407
|
+
None,
|
|
4408
|
+
draft_model_id=None,
|
|
4409
|
+
api_key_override=user_api_key,
|
|
4410
|
+
owner=effective_email or None,
|
|
4411
|
+
)
|
|
4412
|
+
result = {
|
|
4413
|
+
"status": "ok",
|
|
4414
|
+
"message": msg,
|
|
4415
|
+
"model": prepared_model_id,
|
|
4416
|
+
"current": router.current_model_id,
|
|
4417
|
+
"engine": prepared_provider,
|
|
4418
|
+
"installed_now": bool(isinstance(install_result, dict) and install_result.get("installed_now")),
|
|
4419
|
+
"download": download_result,
|
|
4420
|
+
}
|
|
4421
|
+
yield sse_event("progress", model_download_progress_payload(
|
|
4422
|
+
"done",
|
|
4423
|
+
"모델 준비가 완료되었습니다.",
|
|
4424
|
+
percent=100,
|
|
4425
|
+
eta_seconds=0,
|
|
4426
|
+
))
|
|
4427
|
+
yield sse_event("done", result)
|
|
4428
|
+
|
|
4429
|
+
|
|
3809
4430
|
CLOUD_VERIFY_CACHE: Dict[str, Dict] = {}
|
|
3810
4431
|
CLOUD_VERIFY_TTL_SECONDS = 600
|
|
3811
4432
|
|
|
@@ -3964,6 +4585,38 @@ async def engines_prepare_model(req: PrepareModelRequest, request: Request):
|
|
|
3964
4585
|
)
|
|
3965
4586
|
|
|
3966
4587
|
|
|
4588
|
+
@app.post("/engines/prepare-model/stream")
|
|
4589
|
+
async def engines_prepare_model_stream(req: PrepareModelRequest, request: Request):
|
|
4590
|
+
require_user(request)
|
|
4591
|
+
|
|
4592
|
+
async def event_stream():
|
|
4593
|
+
try:
|
|
4594
|
+
async for chunk in prepare_and_load_model_stream(
|
|
4595
|
+
req.model,
|
|
4596
|
+
request,
|
|
4597
|
+
engine=req.engine,
|
|
4598
|
+
user_email=req.user_email,
|
|
4599
|
+
):
|
|
4600
|
+
yield chunk
|
|
4601
|
+
except HTTPException as exc:
|
|
4602
|
+
yield sse_event("error", {
|
|
4603
|
+
"status_code": exc.status_code,
|
|
4604
|
+
"detail": exc.detail or "모델 준비에 실패했습니다.",
|
|
4605
|
+
})
|
|
4606
|
+
except Exception as exc:
|
|
4607
|
+
logging.exception("model prepare stream failed")
|
|
4608
|
+
yield sse_event("error", {
|
|
4609
|
+
"status_code": 500,
|
|
4610
|
+
"detail": str(exc)[-1000:] or "모델 준비에 실패했습니다.",
|
|
4611
|
+
})
|
|
4612
|
+
|
|
4613
|
+
return StreamingResponse(
|
|
4614
|
+
event_stream(),
|
|
4615
|
+
media_type="text/event-stream",
|
|
4616
|
+
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
|
|
4617
|
+
)
|
|
4618
|
+
|
|
4619
|
+
|
|
3967
4620
|
@app.post("/setup/set-api-key")
|
|
3968
4621
|
async def set_api_key(req: SetApiKeyRequest, request: Request):
|
|
3969
4622
|
from llm_router import OPENAI_COMPATIBLE_PROVIDERS
|
|
@@ -4122,14 +4775,14 @@ async def chat(req: ChatRequest, request: Request):
|
|
|
4122
4775
|
logging.warning("knowledge graph clear event ingest failed: %s", e)
|
|
4123
4776
|
if command == "/clear_all":
|
|
4124
4777
|
result = clear_history(0)
|
|
4125
|
-
answer = f"채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와
|
|
4778
|
+
answer = f"채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와 지식 그래프/RAG 데이터는 유지됩니다."
|
|
4126
4779
|
else:
|
|
4127
4780
|
if req.conversation_id:
|
|
4128
4781
|
result = clear_conversation(req.conversation_id)
|
|
4129
|
-
answer = f"현재 대화방 채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와
|
|
4782
|
+
answer = f"현재 대화방 채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와 지식 그래프/RAG 데이터는 유지됩니다."
|
|
4130
4783
|
else:
|
|
4131
4784
|
result = clear_history(0)
|
|
4132
|
-
answer = f"채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와
|
|
4785
|
+
answer = f"채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와 지식 그래프/RAG 데이터는 유지됩니다."
|
|
4133
4786
|
append_audit_event(
|
|
4134
4787
|
"clear_command",
|
|
4135
4788
|
user_email=effective_email,
|
|
@@ -5155,10 +5808,7 @@ async def _phase_verify(
|
|
|
5155
5808
|
ctx.state = AgentState.ROLLBACK
|
|
5156
5809
|
elif next_s == "EXECUTING":
|
|
5157
5810
|
if ctx.retry_count >= max_retry:
|
|
5158
|
-
ctx.final_message =
|
|
5159
|
-
f"최대 재시도({max_retry}회) 초과로 작업을 종료했습니다. "
|
|
5160
|
-
f"마지막 비판: {verdict.get('reason', '(없음)')}"
|
|
5161
|
-
)
|
|
5811
|
+
ctx.final_message = "처리 중 문제가 발생했습니다. 다시 시도해 주세요."
|
|
5162
5812
|
ctx.state = AgentState.FAILED
|
|
5163
5813
|
else:
|
|
5164
5814
|
ctx.retry_count += 1
|
|
@@ -6047,9 +6697,9 @@ async def tools_computer_use_status(request: Request):
|
|
|
6047
6697
|
return _tool_response(computer_status)
|
|
6048
6698
|
|
|
6049
6699
|
|
|
6050
|
-
# ──
|
|
6700
|
+
# ── 내 컴퓨터 API ──────────────────────────────────────────────────────────
|
|
6051
6701
|
|
|
6052
|
-
CU_SYSTEM_PROMPT = """You are Lattice AI
|
|
6702
|
+
CU_SYSTEM_PROMPT = """You are Lattice AI desktop-control agent. You control the Mac desktop using tools.
|
|
6053
6703
|
Prefer non-visual direct actions when possible. Use screenshots only when you must inspect visible UI state or choose screen coordinates.
|
|
6054
6704
|
|
|
6055
6705
|
Available actions:
|
|
@@ -6185,8 +6835,8 @@ async def cu_drag(req: CuDragRequest, request: Request):
|
|
|
6185
6835
|
|
|
6186
6836
|
@app.post("/cu/agent")
|
|
6187
6837
|
async def cu_agent(req: CuAgentRequest, request: Request):
|
|
6188
|
-
"""SSE streaming
|
|
6189
|
-
|
|
6838
|
+
"""SSE streaming desktop-control agent loop."""
|
|
6839
|
+
require_user(request)
|
|
6190
6840
|
async def _stream():
|
|
6191
6841
|
task_lower = (req.task or "").lower()
|
|
6192
6842
|
url_match = re.search(r"(https?://[^\s]+|localhost:\d+[^\s]*|127\.0\.0\.1:\d+[^\s]*)", req.task or "")
|
|
@@ -6413,9 +7063,9 @@ _MCP_TOOL_DESCRIPTIONS: Dict[str, str] = {
|
|
|
6413
7063
|
"computer_scroll": "Scroll at screen coordinates.",
|
|
6414
7064
|
"computer_move": "Move the mouse to screen coordinates.",
|
|
6415
7065
|
"computer_drag": "Drag from (x1,y1) to (x2,y2).",
|
|
6416
|
-
"computer_status": "Check if Mac
|
|
7066
|
+
"computer_status": "Check if Mac desktop control (pyautogui) is available.",
|
|
6417
7067
|
"chrome_status": "Report Chrome desktop bridge availability.",
|
|
6418
|
-
"computer_use_status": "Report Mac
|
|
7068
|
+
"computer_use_status": "Report Mac desktop-control bridge availability.",
|
|
6419
7069
|
"knowledge_save": "Save a note into the local knowledge garden.",
|
|
6420
7070
|
"knowledge_search": "Search the local knowledge garden.",
|
|
6421
7071
|
"knowledge_tree": "List local knowledge garden markdown files.",
|
|
@@ -6803,6 +7453,20 @@ def setup_auto_state() -> Dict[str, object]:
|
|
|
6803
7453
|
"preset": auto_setup_preset(profile, recommendation),
|
|
6804
7454
|
}
|
|
6805
7455
|
|
|
7456
|
+
|
|
7457
|
+
def primary_setup_model(recs: Dict[str, object]) -> Optional[Dict[str, object]]:
|
|
7458
|
+
models = recs.get("models") if isinstance(recs, dict) else None
|
|
7459
|
+
if not isinstance(models, list):
|
|
7460
|
+
return None
|
|
7461
|
+
candidates = [
|
|
7462
|
+
item for item in models
|
|
7463
|
+
if isinstance(item, dict) and not item.get("disabled") and (item.get("model_id") or (item.get("action") or {}).get("model_id"))
|
|
7464
|
+
]
|
|
7465
|
+
if not candidates:
|
|
7466
|
+
return None
|
|
7467
|
+
return next((item for item in candidates if item.get("checked")), candidates[0])
|
|
7468
|
+
|
|
7469
|
+
|
|
6806
7470
|
@app.get("/setup/scan")
|
|
6807
7471
|
async def setup_scan(request: Request):
|
|
6808
7472
|
"""환경 감지 및 맞춤 추천 반환."""
|
|
@@ -6810,6 +7474,27 @@ async def setup_scan(request: Request):
|
|
|
6810
7474
|
env = scan_environment()
|
|
6811
7475
|
recs = get_recommendations(env)
|
|
6812
7476
|
zero_config = setup_auto_state()
|
|
7477
|
+
primary_model = primary_setup_model(recs)
|
|
7478
|
+
if primary_model:
|
|
7479
|
+
model_id = primary_model.get("model_id") or (primary_model.get("action") or {}).get("model_id")
|
|
7480
|
+
zero_config.setdefault("recommend", {})["model_id"] = model_id
|
|
7481
|
+
zero_config["recommend"]["runtime"] = "mlx"
|
|
7482
|
+
rationale = [
|
|
7483
|
+
item for item in zero_config["recommend"].get("rationale", [])
|
|
7484
|
+
if not (isinstance(item, str) and item.startswith("RAM ") and "→" in item)
|
|
7485
|
+
]
|
|
7486
|
+
rationale.append(f"실제 다운로드 및 로드 가능한 MLX 모델 → {model_id}")
|
|
7487
|
+
zero_config["recommend"]["rationale"] = rationale
|
|
7488
|
+
if isinstance(zero_config.get("plan"), dict):
|
|
7489
|
+
zero_config["plan"]["steps"] = [{
|
|
7490
|
+
"name": f"weights:{model_id}",
|
|
7491
|
+
"why": "추론에 사용할 모델 가중치",
|
|
7492
|
+
"command": ["huggingface-cli", "download", model_id, "--quiet"],
|
|
7493
|
+
"requires_admin": False,
|
|
7494
|
+
}]
|
|
7495
|
+
if isinstance(zero_config.get("preset"), dict):
|
|
7496
|
+
zero_config["preset"].setdefault("model", {})["id"] = model_id
|
|
7497
|
+
zero_config["preset"]["model"]["runtime"] = "mlx"
|
|
6813
7498
|
env["zero_config"] = zero_config
|
|
6814
7499
|
recs.setdefault("summary", {})["zero_config"] = zero_config["recommend"]
|
|
6815
7500
|
recs["install_plan"] = zero_config["plan"]
|