ltcai 0.1.28 → 0.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.py CHANGED
@@ -12,6 +12,7 @@ import json
12
12
  import logging
13
13
  import os
14
14
  import platform
15
+ import queue
15
16
  import re
16
17
  import secrets
17
18
  import threading
@@ -211,19 +212,24 @@ SSO_CLIENT_SECRET = env_value("OIDC_CLIENT_SECRET", "")
211
212
  SSO_REDIRECT_URI = env_value("OIDC_REDIRECT_URI", "http://localhost:4825/auth/sso/callback")
212
213
  SSO_PROVIDER_NAME = env_value("OIDC_PROVIDER_NAME", "SSO")
213
214
  _sso_discovery_cache: Optional[Dict] = None
215
+ _sso_discovery_cache_url: str = ""
214
216
  _sso_states: Dict[str, float] = {} # state → timestamp (CSRF protection)
215
217
 
216
218
  async def _get_sso_discovery() -> Optional[Dict]:
217
- global _sso_discovery_cache
218
- if _sso_discovery_cache:
219
+ global _sso_discovery_cache, _sso_discovery_cache_url
220
+ settings = get_sso_settings()
221
+ discovery_url = settings.get("discovery_url", "")
222
+ if _sso_discovery_cache and _sso_discovery_cache_url == discovery_url:
219
223
  return _sso_discovery_cache
220
- if not SSO_DISCOVERY_URL:
224
+ if not discovery_url:
221
225
  return None
222
226
  try:
223
227
  import httpx as _httpx
224
228
  async with _httpx.AsyncClient() as c:
225
- r = await c.get(SSO_DISCOVERY_URL, timeout=10)
229
+ r = await c.get(discovery_url, timeout=10)
230
+ r.raise_for_status()
226
231
  _sso_discovery_cache = r.json()
232
+ _sso_discovery_cache_url = discovery_url
227
233
  except Exception as e:
228
234
  logging.warning("SSO discovery failed: %s", e)
229
235
  return None
@@ -357,11 +363,12 @@ HISTORY_FILE = DATA_DIR / "chat_history.json"
357
363
  VPC_FILE = DATA_DIR / "vpc_config.json"
358
364
  MCP_FILE = DATA_DIR / "mcp_installs.json"
359
365
  AUDIT_FILE = DATA_DIR / "audit_log.json"
366
+ SSO_FILE = DATA_DIR / "sso_config.json"
360
367
  KNOWLEDGE_GRAPH = KnowledgeGraphStore(DATA_DIR / "knowledge_graph.sqlite", DATA_DIR / "knowledge_graph_blobs") if ENABLE_GRAPH else None
361
368
 
362
369
  def _require_graph():
363
370
  if not ENABLE_GRAPH or KNOWLEDGE_GRAPH is None:
364
- raise HTTPException(status_code=404, detail="Data Graph is disabled. Set LATTICEAI_ENABLE_GRAPH=true in .env to enable.")
371
+ raise HTTPException(status_code=404, detail="지식 그래프가 비활성화되어 있습니다. LATTICEAI_ENABLE_GRAPH=true 설정 다시 시도해 주세요.")
365
372
 
366
373
  class UserRegister(BaseModel):
367
374
  email: str
@@ -387,6 +394,75 @@ class VpcConfigUpdate(BaseModel):
387
394
  peering_status: Optional[str] = None
388
395
  notes: Optional[str] = None
389
396
 
397
+ class SsoConfigUpdate(BaseModel):
398
+ enabled: Optional[bool] = None
399
+ provider_name: Optional[str] = None
400
+ discovery_url: Optional[str] = None
401
+ client_id: Optional[str] = None
402
+ client_secret: Optional[str] = None
403
+ redirect_uri: Optional[str] = None
404
+ scopes: Optional[str] = None
405
+
406
+ def _sso_env_defaults() -> Dict[str, object]:
407
+ return {
408
+ "enabled": bool(SSO_DISCOVERY_URL and SSO_CLIENT_ID and SSO_CLIENT_SECRET),
409
+ "provider_name": SSO_PROVIDER_NAME,
410
+ "discovery_url": SSO_DISCOVERY_URL,
411
+ "client_id": SSO_CLIENT_ID,
412
+ "client_secret": SSO_CLIENT_SECRET,
413
+ "redirect_uri": SSO_REDIRECT_URI,
414
+ "scopes": "openid email profile",
415
+ }
416
+
417
+ def load_sso_config() -> Dict[str, object]:
418
+ config = _sso_env_defaults()
419
+ if SSO_FILE.exists():
420
+ try:
421
+ data = json.loads(SSO_FILE.read_text(encoding="utf-8"))
422
+ if isinstance(data, dict):
423
+ config.update({k: v for k, v in data.items() if v is not None})
424
+ except Exception as e:
425
+ logging.warning("load_sso_config failed (using env/defaults): %s", e)
426
+ config["provider_name"] = str(config.get("provider_name") or "SSO")
427
+ config["discovery_url"] = str(config.get("discovery_url") or "")
428
+ config["client_id"] = str(config.get("client_id") or "")
429
+ config["client_secret"] = str(config.get("client_secret") or "")
430
+ config["redirect_uri"] = str(config.get("redirect_uri") or SSO_REDIRECT_URI)
431
+ config["scopes"] = str(config.get("scopes") or "openid email profile")
432
+ config["enabled"] = bool(config.get("enabled")) and bool(
433
+ config["discovery_url"] and config["client_id"] and config["client_secret"]
434
+ )
435
+ return config
436
+
437
+ def get_sso_settings() -> Dict[str, object]:
438
+ return load_sso_config()
439
+
440
+ def public_sso_config(config: Optional[Dict[str, object]] = None) -> Dict[str, object]:
441
+ cfg = config or get_sso_settings()
442
+ return {
443
+ "enabled": bool(cfg.get("enabled")),
444
+ "provider_name": cfg.get("provider_name") or "",
445
+ "discovery_url": cfg.get("discovery_url") or "",
446
+ "client_id": cfg.get("client_id") or "",
447
+ "redirect_uri": cfg.get("redirect_uri") or SSO_REDIRECT_URI,
448
+ "scopes": cfg.get("scopes") or "openid email profile",
449
+ "secret_configured": bool(cfg.get("client_secret")),
450
+ }
451
+
452
+ def save_sso_config(update: Dict[str, object]) -> Dict[str, object]:
453
+ global _sso_discovery_cache, _sso_discovery_cache_url
454
+ current = load_sso_config()
455
+ if update.get("client_secret") == "":
456
+ update.pop("client_secret", None)
457
+ current.update({k: v for k, v in update.items() if v is not None})
458
+ current["enabled"] = bool(current.get("enabled")) and bool(
459
+ current.get("discovery_url") and current.get("client_id") and current.get("client_secret")
460
+ )
461
+ SSO_FILE.write_text(json.dumps(current, ensure_ascii=False, indent=2), encoding="utf-8")
462
+ _sso_discovery_cache = None
463
+ _sso_discovery_cache_url = ""
464
+ return current
465
+
390
466
  class McpRecommendRequest(BaseModel):
391
467
  query: str
392
468
  limit: int = 5
@@ -479,13 +555,13 @@ MCP_REGISTRY = [
479
555
  },
480
556
  {
481
557
  "id": "computer-use",
482
- "name": "Computer Use MCP",
558
+ "name": " 컴퓨터 MCP",
483
559
  "category": "Desktop / Mac UI",
484
560
  "install_mode": "connector",
485
561
  "connector_url": "/mcp/connectors/computer-use",
486
562
  "external_url": "codex://plugins/computer-use",
487
- "description": "Mac 화면을 읽고 클릭, 타이핑, 스크롤하는 데스크톱 UI 자동화 브리지입니다.",
488
- "keywords": ["computer use", "desktop", "mac", "click", "type", "scroll", "컴퓨터", "맥", "앱", "클릭", "타이핑"],
563
+ "description": "사용자의 허용을 받아 컴퓨터의 파일, 화면, 작업을 돕는 브리지입니다.",
564
+ "keywords": ["computer use", "desktop", "mac", "click", "type", "scroll", "컴퓨터", "컴퓨터", "맥", "앱", "클릭", "타이핑"],
489
565
  "capabilities": ["Mac 앱 UI 조작", "스크린샷 기반 상태 확인", "클릭/입력/스크롤"],
490
566
  },
491
567
  {
@@ -2216,23 +2292,23 @@ async def login(req: UserLogin, request: Request):
2216
2292
 
2217
2293
  @app.get("/auth/sso/config")
2218
2294
  async def sso_config():
2219
- enabled = bool(SSO_DISCOVERY_URL and SSO_CLIENT_ID and SSO_CLIENT_SECRET)
2220
- return {"enabled": enabled, "provider_name": SSO_PROVIDER_NAME if enabled else ""}
2295
+ return public_sso_config()
2221
2296
 
2222
2297
  @app.get("/auth/sso/login")
2223
2298
  async def sso_login():
2224
2299
  from urllib.parse import urlencode
2225
2300
  from fastapi.responses import RedirectResponse as _Redirect
2301
+ settings = get_sso_settings()
2226
2302
  discovery = await _get_sso_discovery()
2227
- if not discovery:
2303
+ if not settings.get("enabled") or not discovery:
2228
2304
  raise HTTPException(status_code=503, detail="SSO가 설정되지 않았습니다.")
2229
2305
  state = secrets.token_urlsafe(16)
2230
2306
  _sso_states[state] = time.time()
2231
2307
  params = urlencode({
2232
- "client_id": SSO_CLIENT_ID,
2308
+ "client_id": settings["client_id"],
2233
2309
  "response_type": "code",
2234
- "redirect_uri": SSO_REDIRECT_URI,
2235
- "scope": "openid email profile",
2310
+ "redirect_uri": settings["redirect_uri"],
2311
+ "scope": settings.get("scopes") or "openid email profile",
2236
2312
  "state": state,
2237
2313
  })
2238
2314
  return _Redirect(f"{discovery['authorization_endpoint']}?{params}")
@@ -2246,17 +2322,18 @@ async def sso_callback(code: str = "", state: str = "", error: str = ""):
2246
2322
  ts = _sso_states.pop(state, None)
2247
2323
  if ts is None or time.time() - ts > 300:
2248
2324
  raise HTTPException(status_code=400, detail="유효하지 않은 SSO 상태입니다.")
2325
+ settings = get_sso_settings()
2249
2326
  discovery = await _get_sso_discovery()
2250
- if not discovery:
2327
+ if not settings.get("enabled") or not discovery:
2251
2328
  raise HTTPException(status_code=503, detail="SSO 설정 오류입니다.")
2252
2329
  import httpx as _httpx
2253
2330
  async with _httpx.AsyncClient() as c:
2254
2331
  r = await c.post(discovery["token_endpoint"], data={
2255
2332
  "grant_type": "authorization_code",
2256
2333
  "code": code,
2257
- "redirect_uri": SSO_REDIRECT_URI,
2258
- "client_id": SSO_CLIENT_ID,
2259
- "client_secret": SSO_CLIENT_SECRET,
2334
+ "redirect_uri": settings["redirect_uri"],
2335
+ "client_id": settings["client_id"],
2336
+ "client_secret": settings["client_secret"],
2260
2337
  }, headers={"Accept": "application/json"}, timeout=15)
2261
2338
  tokens = r.json()
2262
2339
  id_token = tokens.get("id_token")
@@ -2468,6 +2545,25 @@ async def admin_invite_link(request: Request):
2468
2545
  url = f"{scheme}://{host}/"
2469
2546
  return {"invite_url": url, "invite_code": INVITE_CODE, "gate_enabled": INVITE_GATE_ENABLED}
2470
2547
 
2548
+ @app.get("/admin/sso")
2549
+ async def admin_sso(request: Request):
2550
+ require_admin(request)
2551
+ return public_sso_config()
2552
+
2553
+ @app.patch("/admin/sso")
2554
+ async def admin_update_sso(req: SsoConfigUpdate, request: Request):
2555
+ admin_email, _ = require_admin(request)
2556
+ update = req.dict(exclude_unset=True)
2557
+ saved = save_sso_config(update)
2558
+ append_audit_event(
2559
+ "sso_config_update",
2560
+ user_email=admin_email,
2561
+ provider_name=saved.get("provider_name"),
2562
+ discovery_url=saved.get("discovery_url"),
2563
+ enabled=bool(saved.get("enabled")),
2564
+ )
2565
+ return public_sso_config(saved)
2566
+
2471
2567
  # ── Invitation Logic ────────────────────────────────────────────────────────
2472
2568
  INVITE_CODE = env_value("LATTICEAI_INVITE_CODE", "gemma-lattice-ai")
2473
2569
  INVITE_GATE_ENABLED = env_bool("LATTICEAI_INVITE_GATE_ENABLED", default=False)
@@ -2495,7 +2591,7 @@ async def root(request: Request, code: Optional[str] = None, authorized: Optiona
2495
2591
  <div style="font-size:48px; margin-bottom:20px;">🔒</div>
2496
2592
  <h1 style="color:#378ADD; margin:0; font-size:24px;">Invitation Required</h1>
2497
2593
  <p style="color:#94a3b8; margin:20px 0; line-height:1.6;">이 서비스는 비공개로 운영되고 있습니다.<br>선생님께 받은 <b>초대용 전용 링크</b>를 통해 접속해 주세요.</p>
2498
- <div style="margin-top:30px; padding-top:20px; border-top:1px solid rgba(255,255,255,0.05); font-size:11px; color:rgba(255,255,255,0.2); letter-spacing:1px;">LATTICE AI SECURITY AGENT</div>
2594
+ <div style="margin-top:30px; padding-top:20px; border-top:1px solid rgba(255,255,255,0.05); font-size:11px; color:rgba(255,255,255,0.2); letter-spacing:1px;">LATTICE AI</div>
2499
2595
  </div>
2500
2596
  </body>
2501
2597
  """, status_code=403)
@@ -2550,6 +2646,48 @@ async def status():
2550
2646
  }
2551
2647
 
2552
2648
 
2649
+ @app.get("/local/sysinfo")
2650
+ async def local_sysinfo(request: Request):
2651
+ """CPU / RAM / GPU(MLX) 사용량을 반환합니다."""
2652
+ require_user(request)
2653
+ import subprocess, re as _re
2654
+ result = {"cpu_pct": 0.0, "ram_pct": 0.0, "gpu_mem_pct": 0.0, "gpu_mem_gb": 0.0}
2655
+ try:
2656
+ # CPU
2657
+ top_out = subprocess.run(["top", "-l", "1", "-n", "0"], capture_output=True, text=True, timeout=4).stdout
2658
+ for line in top_out.splitlines():
2659
+ if "CPU usage" in line:
2660
+ m = _re.search(r"([\d.]+)% user.*?([\d.]+)% sys", line)
2661
+ if m:
2662
+ result["cpu_pct"] = round(float(m.group(1)) + float(m.group(2)), 1)
2663
+ # RAM
2664
+ vm_out = subprocess.run(["vm_stat"], capture_output=True, text=True, timeout=4).stdout
2665
+ page_size = 16384
2666
+ pages: dict = {}
2667
+ for line in vm_out.splitlines():
2668
+ for key in ["Pages free", "Pages active", "Pages inactive", "Pages wired down", "Pages occupied by compressor"]:
2669
+ if line.startswith(key):
2670
+ m = _re.search(r"(\d+)", line)
2671
+ if m:
2672
+ pages[key] = int(m.group(1))
2673
+ total = sum(pages.values())
2674
+ used = total - pages.get("Pages free", 0)
2675
+ result["ram_pct"] = round(used / total * 100, 1) if total else 0.0
2676
+ # GPU (MLX / Apple Silicon unified memory)
2677
+ try:
2678
+ import mlx.core as _mx
2679
+ hw_out = subprocess.run(["sysctl", "-n", "hw.memsize"], capture_output=True, text=True, timeout=2).stdout
2680
+ total_bytes = int(hw_out.strip())
2681
+ gpu_bytes = _mx.get_active_memory() + _mx.get_cache_memory()
2682
+ result["gpu_mem_gb"] = round(gpu_bytes / (1024 ** 3), 2)
2683
+ result["gpu_mem_pct"] = round(gpu_bytes / total_bytes * 100, 1) if total_bytes else 0.0
2684
+ except Exception:
2685
+ pass
2686
+ except Exception as e:
2687
+ result["error"] = str(e)
2688
+ return result
2689
+
2690
+
2553
2691
 
2554
2692
 
2555
2693
  # ── Request / Response Models ──────────────────────────────────────────────────
@@ -3208,31 +3346,224 @@ def hf_model_ready(repo_id: str, provider: str = "local_mlx") -> bool:
3208
3346
  )
3209
3347
  return has_config and has_weights and has_tokenizer
3210
3348
 
3211
- def download_hf_model(repo_id: str, provider: str = "local_mlx") -> Dict[str, object]:
3349
+
3350
+ def model_download_progress_payload(
3351
+ stage: str,
3352
+ message: str,
3353
+ *,
3354
+ percent: Optional[float] = None,
3355
+ detail: Optional[str] = None,
3356
+ downloaded_bytes: Optional[int] = None,
3357
+ total_bytes: Optional[int] = None,
3358
+ eta_seconds: Optional[float] = None,
3359
+ file: Optional[str] = None,
3360
+ indeterminate: bool = False,
3361
+ ) -> Dict[str, object]:
3362
+ payload: Dict[str, object] = {
3363
+ "stage": stage,
3364
+ "message": message,
3365
+ "indeterminate": indeterminate,
3366
+ "ts": time.time(),
3367
+ }
3368
+ if percent is not None:
3369
+ payload["percent"] = max(0, min(100, round(float(percent), 1)))
3370
+ if detail:
3371
+ payload["detail"] = detail
3372
+ if downloaded_bytes is not None:
3373
+ payload["downloaded_bytes"] = max(0, int(downloaded_bytes))
3374
+ if total_bytes is not None:
3375
+ payload["total_bytes"] = max(0, int(total_bytes))
3376
+ if eta_seconds is not None:
3377
+ payload["eta_seconds"] = max(0, round(float(eta_seconds)))
3378
+ if file:
3379
+ payload["file"] = file
3380
+ return payload
3381
+
3382
+
3383
+ def estimate_eta_seconds(started_at: float, percent: Optional[float]) -> Optional[float]:
3384
+ if percent is None or percent <= 0 or percent >= 100:
3385
+ return None
3386
+ elapsed = max(0.0, time.time() - started_at)
3387
+ return elapsed * (100.0 - percent) / percent
3388
+
3389
+
3390
+ def hf_repo_files_with_sizes(repo_id: str) -> List[Dict[str, object]]:
3391
+ from huggingface_hub import HfApi
3392
+
3393
+ api = HfApi()
3394
+ try:
3395
+ info = api.model_info(repo_id, files_metadata=True)
3396
+ files = []
3397
+ for sibling in getattr(info, "siblings", []) or []:
3398
+ name = str(getattr(sibling, "rfilename", "") or "").strip()
3399
+ if not name or name.endswith("/"):
3400
+ continue
3401
+ files.append({"name": name, "size": int(getattr(sibling, "size", 0) or 0)})
3402
+ if files:
3403
+ return files
3404
+ except TypeError:
3405
+ pass
3406
+ except Exception as e:
3407
+ logging.warning("huggingface model_info failed for %s: %s", repo_id, e)
3408
+
3409
+ return [{"name": str(name), "size": 0} for name in api.list_repo_files(repo_id) if str(name).strip()]
3410
+
3411
+
3412
+ def download_hf_model(
3413
+ repo_id: str,
3414
+ provider: str = "local_mlx",
3415
+ progress_emit=None,
3416
+ ) -> Dict[str, object]:
3212
3417
  if importlib.util.find_spec("huggingface_hub") is None:
3213
3418
  raise HTTPException(status_code=400, detail="huggingface_hub가 없습니다. 먼저 MLX runtime 설치를 진행해 주세요.")
3214
3419
 
3215
3420
  target_dir = hf_model_dir(repo_id)
3216
3421
  if hf_model_ready(repo_id, provider):
3422
+ if progress_emit:
3423
+ progress_emit(model_download_progress_payload(
3424
+ "download",
3425
+ "이미 다운로드된 모델을 확인했습니다.",
3426
+ percent=100,
3427
+ downloaded_bytes=0,
3428
+ total_bytes=0,
3429
+ eta_seconds=0,
3430
+ ))
3217
3431
  return {"model": repo_id, "path": str(target_dir), "cached": True}
3218
3432
 
3219
3433
  target_dir.mkdir(parents=True, exist_ok=True)
3220
3434
  try:
3221
- from huggingface_hub import HfApi, hf_hub_download, snapshot_download
3435
+ from huggingface_hub import hf_hub_download
3222
3436
 
3437
+ started_at = time.time()
3438
+ all_files = hf_repo_files_with_sizes(repo_id)
3223
3439
  if provider == "llamacpp":
3224
- files = HfApi().list_repo_files(repo_id)
3225
- ggufs = sorted([name for name in files if name.lower().endswith(".gguf")])
3440
+ ggufs = sorted(
3441
+ [item for item in all_files if str(item["name"]).lower().endswith(".gguf")],
3442
+ key=lambda item: str(item["name"]),
3443
+ )
3226
3444
  if not ggufs:
3227
3445
  raise RuntimeError("GGUF 파일을 찾지 못했습니다.")
3228
3446
  preference = ("q4_k_m", "q4_0", "q4_k_s", "q3_k_m", "q2_k")
3229
- filename = next(
3230
- (name for pref in preference for name in ggufs if pref in name.lower()),
3231
- ggufs[0],
3232
- )
3233
- hf_hub_download(repo_id=repo_id, filename=filename, local_dir=str(target_dir))
3447
+ selected_files = [
3448
+ next(
3449
+ (item for pref in preference for item in ggufs if pref in str(item["name"]).lower()),
3450
+ ggufs[0],
3451
+ )
3452
+ ]
3234
3453
  else:
3235
- snapshot_download(repo_id=repo_id, local_dir=str(target_dir), resume_download=True)
3454
+ selected_files = all_files
3455
+
3456
+ total_bytes = sum(int(item.get("size") or 0) for item in selected_files) or None
3457
+ downloaded_bytes = 0
3458
+ total_files = max(1, len(selected_files))
3459
+ if progress_emit:
3460
+ progress_emit(model_download_progress_payload(
3461
+ "download",
3462
+ "모델 파일 정보를 확인했습니다.",
3463
+ percent=0,
3464
+ downloaded_bytes=0,
3465
+ total_bytes=total_bytes,
3466
+ indeterminate=total_bytes is None,
3467
+ ))
3468
+
3469
+ for index, item in enumerate(selected_files, start=1):
3470
+ filename = str(item["name"])
3471
+ size = int(item.get("size") or 0)
3472
+ tqdm_class = None
3473
+ if progress_emit:
3474
+ current_percent = (
3475
+ (downloaded_bytes / total_bytes) * 100 if total_bytes else ((index - 1) / total_files) * 100
3476
+ )
3477
+ progress_emit(model_download_progress_payload(
3478
+ "download",
3479
+ "모델 다운로드 중입니다.",
3480
+ percent=current_percent,
3481
+ detail=filename,
3482
+ downloaded_bytes=downloaded_bytes,
3483
+ total_bytes=total_bytes,
3484
+ eta_seconds=estimate_eta_seconds(started_at, current_percent),
3485
+ file=filename,
3486
+ indeterminate=total_bytes is None and total_files <= 1,
3487
+ ))
3488
+ try:
3489
+ from tqdm.auto import tqdm as base_tqdm
3490
+
3491
+ downloaded_before = downloaded_bytes
3492
+ last_emit = {"at": 0.0, "percent": -1.0}
3493
+
3494
+ def emit_byte_progress(done_bytes: float) -> None:
3495
+ done = max(0, int(done_bytes or 0))
3496
+ if total_bytes:
3497
+ aggregate = min(total_bytes, downloaded_before + done)
3498
+ percent = (aggregate / total_bytes) * 100
3499
+ else:
3500
+ file_total = size or done
3501
+ file_ratio = min(1.0, done / file_total) if file_total else 0.0
3502
+ aggregate = downloaded_before + done
3503
+ percent = ((index - 1) + file_ratio) / total_files * 100
3504
+ now = time.time()
3505
+ if percent < 100 and now - last_emit["at"] < 0.5 and percent - last_emit["percent"] < 0.3:
3506
+ return
3507
+ last_emit["at"] = now
3508
+ last_emit["percent"] = percent
3509
+ progress_emit(model_download_progress_payload(
3510
+ "download",
3511
+ "모델 다운로드 중입니다.",
3512
+ percent=percent,
3513
+ detail=filename,
3514
+ downloaded_bytes=aggregate,
3515
+ total_bytes=total_bytes,
3516
+ eta_seconds=estimate_eta_seconds(started_at, percent),
3517
+ file=filename,
3518
+ indeterminate=total_bytes is None and total_files <= 1,
3519
+ ))
3520
+
3521
+ class ProgressTqdm(base_tqdm):
3522
+ def update(self, n=1):
3523
+ result = super().update(n)
3524
+ emit_byte_progress(float(getattr(self, "n", 0) or 0))
3525
+ return result
3526
+
3527
+ tqdm_class = ProgressTqdm
3528
+ except Exception:
3529
+ tqdm_class = None
3530
+ local_path = hf_hub_download(
3531
+ repo_id=repo_id,
3532
+ filename=filename,
3533
+ local_dir=str(target_dir),
3534
+ tqdm_class=tqdm_class,
3535
+ )
3536
+ if size <= 0:
3537
+ try:
3538
+ size = Path(local_path).stat().st_size
3539
+ except OSError:
3540
+ size = 0
3541
+ downloaded_bytes += size
3542
+ if progress_emit:
3543
+ current_percent = (
3544
+ (downloaded_bytes / total_bytes) * 100 if total_bytes else (index / total_files) * 100
3545
+ )
3546
+ progress_emit(model_download_progress_payload(
3547
+ "download",
3548
+ "모델 다운로드 중입니다.",
3549
+ percent=current_percent,
3550
+ detail=filename,
3551
+ downloaded_bytes=downloaded_bytes,
3552
+ total_bytes=total_bytes,
3553
+ eta_seconds=estimate_eta_seconds(started_at, current_percent),
3554
+ file=filename,
3555
+ indeterminate=False,
3556
+ ))
3557
+
3558
+ if progress_emit:
3559
+ progress_emit(model_download_progress_payload(
3560
+ "download",
3561
+ "모델 다운로드가 완료되었습니다.",
3562
+ percent=100,
3563
+ downloaded_bytes=downloaded_bytes,
3564
+ total_bytes=total_bytes or downloaded_bytes,
3565
+ eta_seconds=0,
3566
+ ))
3236
3567
  except Exception as e:
3237
3568
  raise HTTPException(status_code=500, detail=f"{repo_id} 다운로드 실패: {str(e)[-2000:]}")
3238
3569
 
@@ -3242,6 +3573,75 @@ def download_hf_model(repo_id: str, provider: str = "local_mlx") -> Dict[str, ob
3242
3573
  return {"model": repo_id, "path": str(target_dir), "cached": False}
3243
3574
 
3244
3575
 
3576
+ def pull_ollama_model_with_progress(model_name: str, progress_emit=None) -> Dict[str, object]:
3577
+ started_at = time.time()
3578
+ if progress_emit:
3579
+ progress_emit(model_download_progress_payload(
3580
+ "download",
3581
+ "Ollama 모델 다운로드를 시작합니다.",
3582
+ percent=0,
3583
+ detail=model_name,
3584
+ indeterminate=True,
3585
+ ))
3586
+ process = subprocess.Popen(
3587
+ ["ollama", "pull", model_name],
3588
+ stdout=subprocess.PIPE,
3589
+ stderr=subprocess.STDOUT,
3590
+ text=True,
3591
+ bufsize=1,
3592
+ )
3593
+ last_percent: Optional[float] = None
3594
+ lines: List[str] = []
3595
+ try:
3596
+ assert process.stdout is not None
3597
+ for raw_line in process.stdout:
3598
+ for part in re.split(r"[\r\n]+", raw_line):
3599
+ line = part.strip()
3600
+ if not line:
3601
+ continue
3602
+ lines.append(line)
3603
+ match = re.search(r"(\d{1,3}(?:\.\d+)?)\s*%", line)
3604
+ if match:
3605
+ last_percent = min(100.0, float(match.group(1)))
3606
+ if progress_emit:
3607
+ progress_emit(model_download_progress_payload(
3608
+ "download",
3609
+ "Ollama 모델 다운로드 중입니다.",
3610
+ percent=last_percent,
3611
+ detail=line[-180:],
3612
+ eta_seconds=estimate_eta_seconds(started_at, last_percent),
3613
+ indeterminate=False,
3614
+ ))
3615
+ elif progress_emit:
3616
+ progress_emit(model_download_progress_payload(
3617
+ "download",
3618
+ "Ollama 모델 다운로드 중입니다.",
3619
+ percent=last_percent,
3620
+ detail=line[-180:],
3621
+ eta_seconds=estimate_eta_seconds(started_at, last_percent),
3622
+ indeterminate=last_percent is None,
3623
+ ))
3624
+ returncode = process.wait()
3625
+ except Exception:
3626
+ process.kill()
3627
+ raise
3628
+
3629
+ if returncode != 0:
3630
+ tail = "\n".join(lines[-12:])
3631
+ raise HTTPException(status_code=500, detail=tail[-2000:] or "Ollama 모델 다운로드 실패")
3632
+
3633
+ if progress_emit:
3634
+ progress_emit(model_download_progress_payload(
3635
+ "download",
3636
+ "Ollama 모델 다운로드가 완료되었습니다.",
3637
+ percent=100,
3638
+ detail=model_name,
3639
+ eta_seconds=0,
3640
+ indeterminate=False,
3641
+ ))
3642
+ return {"provider": "ollama", "model": model_name, "returncode": returncode}
3643
+
3644
+
3245
3645
  def get_ollama_pulled_models() -> set:
3246
3646
  if not shutil.which("ollama"):
3247
3647
  return set()
@@ -3806,6 +4206,227 @@ async def prepare_and_load_model(
3806
4206
  "download": download_result,
3807
4207
  }
3808
4208
 
4209
+
4210
+ def sse_event(event: str, data: Dict[str, object]) -> str:
4211
+ return f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"
4212
+
4213
+
4214
+ async def prepare_and_load_model_stream(
4215
+ model_id: str,
4216
+ request: Request,
4217
+ engine: Optional[str] = None,
4218
+ user_email: Optional[str] = None,
4219
+ ) -> AsyncIterator[str]:
4220
+ model_id = normalize_local_model_request(model_id, engine)
4221
+ if not model_id:
4222
+ raise HTTPException(status_code=400, detail="모델 식별자가 비어 있습니다.")
4223
+
4224
+ parsed_provider, parsed_model = parse_model_ref(model_id)
4225
+ if parsed_provider == "mlx":
4226
+ parsed_provider = "local_mlx"
4227
+
4228
+ work_queue: "queue.Queue[Dict[str, object]]" = queue.Queue()
4229
+ work_result: Dict[str, object] = {}
4230
+
4231
+ def emit_progress(payload: Dict[str, object]) -> None:
4232
+ work_queue.put({"kind": "progress", "data": payload})
4233
+
4234
+ def blocking_prepare() -> None:
4235
+ try:
4236
+ local_engines = {"local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"}
4237
+ install_result: Dict[str, object] = {}
4238
+ download_result: Optional[Dict[str, object]] = None
4239
+ prepared_model_id = model_id
4240
+ prepared_model_name = parsed_model
4241
+
4242
+ if parsed_provider in local_engines:
4243
+ emit_progress(model_download_progress_payload(
4244
+ "engine",
4245
+ "실행 엔진을 확인하는 중입니다.",
4246
+ percent=2,
4247
+ indeterminate=True,
4248
+ ))
4249
+ install_result = ensure_engine_ready(parsed_provider)
4250
+ emit_progress(model_download_progress_payload(
4251
+ "engine",
4252
+ "실행 엔진 준비가 완료되었습니다.",
4253
+ percent=10,
4254
+ indeterminate=False,
4255
+ ))
4256
+
4257
+ if parsed_provider == "local_mlx":
4258
+ explicit_path = Path(parsed_model).expanduser()
4259
+ if explicit_path.exists():
4260
+ download_result = {"model": parsed_model, "path": str(explicit_path), "cached": True}
4261
+ emit_progress(model_download_progress_payload(
4262
+ "download",
4263
+ "로컬 모델 경로를 확인했습니다.",
4264
+ percent=100,
4265
+ detail=str(explicit_path),
4266
+ eta_seconds=0,
4267
+ ))
4268
+ elif not hf_model_ready(parsed_model, "local_mlx"):
4269
+ download_result = download_hf_model(parsed_model, "local_mlx", progress_emit=emit_progress)
4270
+ else:
4271
+ download_result = {"model": parsed_model, "path": str(hf_model_dir(parsed_model)), "cached": True}
4272
+ emit_progress(model_download_progress_payload(
4273
+ "download",
4274
+ "이미 다운로드된 모델을 확인했습니다.",
4275
+ percent=100,
4276
+ eta_seconds=0,
4277
+ ))
4278
+ elif parsed_provider == "ollama":
4279
+ emit_progress(model_download_progress_payload(
4280
+ "engine",
4281
+ "Ollama 서버를 확인하는 중입니다.",
4282
+ percent=12,
4283
+ indeterminate=True,
4284
+ ))
4285
+ ensure_ollama_server()
4286
+ if parsed_model not in get_ollama_pulled_models():
4287
+ download_result = pull_ollama_model_with_progress(parsed_model, progress_emit=emit_progress)
4288
+ else:
4289
+ download_result = {"provider": "ollama", "model": parsed_model, "cached": True}
4290
+ emit_progress(model_download_progress_payload(
4291
+ "download",
4292
+ "이미 다운로드된 Ollama 모델을 확인했습니다.",
4293
+ percent=100,
4294
+ detail=parsed_model,
4295
+ eta_seconds=0,
4296
+ ))
4297
+ elif parsed_provider == "vllm":
4298
+ if not hf_model_ready(parsed_model, "vllm"):
4299
+ download_result = download_hf_model(parsed_model, "vllm", progress_emit=emit_progress)
4300
+ else:
4301
+ download_result = {"provider": "vllm", "model": parsed_model, "cached": True}
4302
+ emit_progress(model_download_progress_payload(
4303
+ "download",
4304
+ "이미 다운로드된 모델을 확인했습니다.",
4305
+ percent=100,
4306
+ detail=parsed_model,
4307
+ eta_seconds=0,
4308
+ ))
4309
+ emit_progress(model_download_progress_payload(
4310
+ "server",
4311
+ "vLLM 서버를 시작하는 중입니다.",
4312
+ percent=92,
4313
+ indeterminate=True,
4314
+ ))
4315
+ ensure_vllm_server(parsed_model)
4316
+ download_result = {**(download_result or {}), "provider": "vllm", "model": parsed_model, "server_ready": True}
4317
+ elif parsed_provider == "llamacpp":
4318
+ if not hf_model_ready(parsed_model, "llamacpp"):
4319
+ download_result = download_hf_model(parsed_model, "llamacpp", progress_emit=emit_progress)
4320
+ else:
4321
+ download_result = {"provider": "llamacpp", "model": parsed_model, "cached": True}
4322
+ emit_progress(model_download_progress_payload(
4323
+ "download",
4324
+ "이미 다운로드된 GGUF 모델을 확인했습니다.",
4325
+ percent=100,
4326
+ detail=parsed_model,
4327
+ eta_seconds=0,
4328
+ ))
4329
+ emit_progress(model_download_progress_payload(
4330
+ "server",
4331
+ "llama.cpp 서버를 시작하는 중입니다.",
4332
+ percent=92,
4333
+ indeterminate=True,
4334
+ ))
4335
+ ensure_llamacpp_server(parsed_model)
4336
+ download_result = {**(download_result or {}), "provider": "llamacpp", "model": parsed_model, "server_ready": True}
4337
+ elif parsed_provider == "lmstudio":
4338
+ emit_progress(model_download_progress_payload(
4339
+ "download",
4340
+ "LM Studio 모델을 확인하는 중입니다.",
4341
+ percent=35,
4342
+ indeterminate=True,
4343
+ ))
4344
+ ensured = ensure_lmstudio_model(parsed_model)
4345
+ resolved_model = str(
4346
+ ensured.get("instance_id")
4347
+ or ensured.get("resolved_model")
4348
+ or parsed_model
4349
+ ).strip()
4350
+ prepared_model_name = resolved_model
4351
+ prepared_model_id = f"lmstudio:{resolved_model}"
4352
+ download_result = ensured
4353
+ else:
4354
+ emit_progress(model_download_progress_payload(
4355
+ "engine",
4356
+ "모델 연결을 준비하는 중입니다.",
4357
+ percent=30,
4358
+ indeterminate=True,
4359
+ ))
4360
+
4361
+ work_result.update({
4362
+ "model_id": prepared_model_id,
4363
+ "parsed_provider": parsed_provider,
4364
+ "parsed_model": prepared_model_name,
4365
+ "install_result": install_result,
4366
+ "download_result": download_result,
4367
+ })
4368
+ work_queue.put({"kind": "done"})
4369
+ except HTTPException as exc:
4370
+ work_queue.put({"kind": "error", "status_code": exc.status_code, "detail": exc.detail})
4371
+ except Exception as exc:
4372
+ logging.exception("model prepare stream worker failed")
4373
+ work_queue.put({"kind": "error", "status_code": 500, "detail": str(exc)[-2000:]})
4374
+
4375
+ worker = threading.Thread(target=blocking_prepare, daemon=True)
4376
+ worker.start()
4377
+
4378
+ while True:
4379
+ item = await asyncio.to_thread(work_queue.get)
4380
+ kind = item.get("kind")
4381
+ if kind == "progress":
4382
+ yield sse_event("progress", item["data"])
4383
+ elif kind == "error":
4384
+ raise HTTPException(
4385
+ status_code=int(item.get("status_code") or 500),
4386
+ detail=item.get("detail") or "모델 준비에 실패했습니다.",
4387
+ )
4388
+ elif kind == "done":
4389
+ break
4390
+
4391
+ prepared_model_id = str(work_result.get("model_id") or model_id)
4392
+ prepared_provider = str(work_result.get("parsed_provider") or parsed_provider)
4393
+ install_result = work_result.get("install_result") or {}
4394
+ download_result = work_result.get("download_result")
4395
+
4396
+ yield sse_event("progress", model_download_progress_payload(
4397
+ "load",
4398
+ "모델을 메모리에 로드하는 중입니다.",
4399
+ percent=96,
4400
+ indeterminate=True,
4401
+ ))
4402
+
4403
+ effective_email = (user_email or get_current_user(request) or "").strip()
4404
+ user_api_key = get_user_api_key(effective_email, prepared_provider) if prepared_provider != "local_mlx" else None
4405
+ msg = await router.load_model(
4406
+ prepared_model_id,
4407
+ None,
4408
+ draft_model_id=None,
4409
+ api_key_override=user_api_key,
4410
+ owner=effective_email or None,
4411
+ )
4412
+ result = {
4413
+ "status": "ok",
4414
+ "message": msg,
4415
+ "model": prepared_model_id,
4416
+ "current": router.current_model_id,
4417
+ "engine": prepared_provider,
4418
+ "installed_now": bool(isinstance(install_result, dict) and install_result.get("installed_now")),
4419
+ "download": download_result,
4420
+ }
4421
+ yield sse_event("progress", model_download_progress_payload(
4422
+ "done",
4423
+ "모델 준비가 완료되었습니다.",
4424
+ percent=100,
4425
+ eta_seconds=0,
4426
+ ))
4427
+ yield sse_event("done", result)
4428
+
4429
+
3809
4430
  CLOUD_VERIFY_CACHE: Dict[str, Dict] = {}
3810
4431
  CLOUD_VERIFY_TTL_SECONDS = 600
3811
4432
 
@@ -3964,6 +4585,38 @@ async def engines_prepare_model(req: PrepareModelRequest, request: Request):
3964
4585
  )
3965
4586
 
3966
4587
 
4588
+ @app.post("/engines/prepare-model/stream")
4589
+ async def engines_prepare_model_stream(req: PrepareModelRequest, request: Request):
4590
+ require_user(request)
4591
+
4592
+ async def event_stream():
4593
+ try:
4594
+ async for chunk in prepare_and_load_model_stream(
4595
+ req.model,
4596
+ request,
4597
+ engine=req.engine,
4598
+ user_email=req.user_email,
4599
+ ):
4600
+ yield chunk
4601
+ except HTTPException as exc:
4602
+ yield sse_event("error", {
4603
+ "status_code": exc.status_code,
4604
+ "detail": exc.detail or "모델 준비에 실패했습니다.",
4605
+ })
4606
+ except Exception as exc:
4607
+ logging.exception("model prepare stream failed")
4608
+ yield sse_event("error", {
4609
+ "status_code": 500,
4610
+ "detail": str(exc)[-1000:] or "모델 준비에 실패했습니다.",
4611
+ })
4612
+
4613
+ return StreamingResponse(
4614
+ event_stream(),
4615
+ media_type="text/event-stream",
4616
+ headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
4617
+ )
4618
+
4619
+
3967
4620
  @app.post("/setup/set-api-key")
3968
4621
  async def set_api_key(req: SetApiKeyRequest, request: Request):
3969
4622
  from llm_router import OPENAI_COMPATIBLE_PROVIDERS
@@ -4122,14 +4775,14 @@ async def chat(req: ChatRequest, request: Request):
4122
4775
  logging.warning("knowledge graph clear event ingest failed: %s", e)
4123
4776
  if command == "/clear_all":
4124
4777
  result = clear_history(0)
4125
- answer = f"채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와 Data Graph/RAG 데이터는 유지됩니다."
4778
+ answer = f"채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와 지식 그래프/RAG 데이터는 유지됩니다."
4126
4779
  else:
4127
4780
  if req.conversation_id:
4128
4781
  result = clear_conversation(req.conversation_id)
4129
- answer = f"현재 대화방 채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와 Data Graph/RAG 데이터는 유지됩니다."
4782
+ answer = f"현재 대화방 채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와 지식 그래프/RAG 데이터는 유지됩니다."
4130
4783
  else:
4131
4784
  result = clear_history(0)
4132
- answer = f"채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와 Data Graph/RAG 데이터는 유지됩니다."
4785
+ answer = f"채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와 지식 그래프/RAG 데이터는 유지됩니다."
4133
4786
  append_audit_event(
4134
4787
  "clear_command",
4135
4788
  user_email=effective_email,
@@ -5155,10 +5808,7 @@ async def _phase_verify(
5155
5808
  ctx.state = AgentState.ROLLBACK
5156
5809
  elif next_s == "EXECUTING":
5157
5810
  if ctx.retry_count >= max_retry:
5158
- ctx.final_message = (
5159
- f"최대 재시도({max_retry}회) 초과로 작업을 종료했습니다. "
5160
- f"마지막 비판: {verdict.get('reason', '(없음)')}"
5161
- )
5811
+ ctx.final_message = "처리 중 문제가 발생했습니다. 다시 시도해 주세요."
5162
5812
  ctx.state = AgentState.FAILED
5163
5813
  else:
5164
5814
  ctx.retry_count += 1
@@ -6047,9 +6697,9 @@ async def tools_computer_use_status(request: Request):
6047
6697
  return _tool_response(computer_status)
6048
6698
 
6049
6699
 
6050
- # ── Computer Use API ──────────────────────────────────────────────────────────
6700
+ # ── 컴퓨터 API ──────────────────────────────────────────────────────────
6051
6701
 
6052
- CU_SYSTEM_PROMPT = """You are Lattice AI Computer Use Agent. You control the Mac desktop using tools.
6702
+ CU_SYSTEM_PROMPT = """You are Lattice AI desktop-control agent. You control the Mac desktop using tools.
6053
6703
  Prefer non-visual direct actions when possible. Use screenshots only when you must inspect visible UI state or choose screen coordinates.
6054
6704
 
6055
6705
  Available actions:
@@ -6185,8 +6835,8 @@ async def cu_drag(req: CuDragRequest, request: Request):
6185
6835
 
6186
6836
  @app.post("/cu/agent")
6187
6837
  async def cu_agent(req: CuAgentRequest, request: Request):
6188
- """SSE streaming Computer Use agent loop."""
6189
- require_admin(request)
6838
+ """SSE streaming desktop-control agent loop."""
6839
+ require_user(request)
6190
6840
  async def _stream():
6191
6841
  task_lower = (req.task or "").lower()
6192
6842
  url_match = re.search(r"(https?://[^\s]+|localhost:\d+[^\s]*|127\.0\.0\.1:\d+[^\s]*)", req.task or "")
@@ -6413,9 +7063,9 @@ _MCP_TOOL_DESCRIPTIONS: Dict[str, str] = {
6413
7063
  "computer_scroll": "Scroll at screen coordinates.",
6414
7064
  "computer_move": "Move the mouse to screen coordinates.",
6415
7065
  "computer_drag": "Drag from (x1,y1) to (x2,y2).",
6416
- "computer_status": "Check if Mac Computer Use (pyautogui) is available.",
7066
+ "computer_status": "Check if Mac desktop control (pyautogui) is available.",
6417
7067
  "chrome_status": "Report Chrome desktop bridge availability.",
6418
- "computer_use_status": "Report Mac Computer Use bridge availability.",
7068
+ "computer_use_status": "Report Mac desktop-control bridge availability.",
6419
7069
  "knowledge_save": "Save a note into the local knowledge garden.",
6420
7070
  "knowledge_search": "Search the local knowledge garden.",
6421
7071
  "knowledge_tree": "List local knowledge garden markdown files.",
@@ -6803,6 +7453,20 @@ def setup_auto_state() -> Dict[str, object]:
6803
7453
  "preset": auto_setup_preset(profile, recommendation),
6804
7454
  }
6805
7455
 
7456
+
7457
+ def primary_setup_model(recs: Dict[str, object]) -> Optional[Dict[str, object]]:
7458
+ models = recs.get("models") if isinstance(recs, dict) else None
7459
+ if not isinstance(models, list):
7460
+ return None
7461
+ candidates = [
7462
+ item for item in models
7463
+ if isinstance(item, dict) and not item.get("disabled") and (item.get("model_id") or (item.get("action") or {}).get("model_id"))
7464
+ ]
7465
+ if not candidates:
7466
+ return None
7467
+ return next((item for item in candidates if item.get("checked")), candidates[0])
7468
+
7469
+
6806
7470
  @app.get("/setup/scan")
6807
7471
  async def setup_scan(request: Request):
6808
7472
  """환경 감지 및 맞춤 추천 반환."""
@@ -6810,6 +7474,27 @@ async def setup_scan(request: Request):
6810
7474
  env = scan_environment()
6811
7475
  recs = get_recommendations(env)
6812
7476
  zero_config = setup_auto_state()
7477
+ primary_model = primary_setup_model(recs)
7478
+ if primary_model:
7479
+ model_id = primary_model.get("model_id") or (primary_model.get("action") or {}).get("model_id")
7480
+ zero_config.setdefault("recommend", {})["model_id"] = model_id
7481
+ zero_config["recommend"]["runtime"] = "mlx"
7482
+ rationale = [
7483
+ item for item in zero_config["recommend"].get("rationale", [])
7484
+ if not (isinstance(item, str) and item.startswith("RAM ") and "→" in item)
7485
+ ]
7486
+ rationale.append(f"실제 다운로드 및 로드 가능한 MLX 모델 → {model_id}")
7487
+ zero_config["recommend"]["rationale"] = rationale
7488
+ if isinstance(zero_config.get("plan"), dict):
7489
+ zero_config["plan"]["steps"] = [{
7490
+ "name": f"weights:{model_id}",
7491
+ "why": "추론에 사용할 모델 가중치",
7492
+ "command": ["huggingface-cli", "download", model_id, "--quiet"],
7493
+ "requires_admin": False,
7494
+ }]
7495
+ if isinstance(zero_config.get("preset"), dict):
7496
+ zero_config["preset"].setdefault("model", {})["id"] = model_id
7497
+ zero_config["preset"]["model"]["runtime"] = "mlx"
6813
7498
  env["zero_config"] = zero_config
6814
7499
  recs.setdefault("summary", {})["zero_config"] = zero_config["recommend"]
6815
7500
  recs["install_plan"] = zero_config["plan"]