ltcai 0.1.9 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +174 -305
  2. package/docs/CHANGELOG.md +307 -0
  3. package/docs/architecture.md +121 -0
  4. package/docs/mcp-tools.md +116 -0
  5. package/docs/privacy.md +74 -0
  6. package/docs/public-deploy.md +137 -0
  7. package/docs/security-model.md +121 -0
  8. package/knowledge_graph.py +123 -15
  9. package/llm_router.py +100 -28
  10. package/ltcai_cli.py +138 -5
  11. package/package.json +14 -2
  12. package/server.py +1756 -329
  13. package/skills/SKILL_TEMPLATE.md +61 -29
  14. package/skills/code_review/SKILL.md +28 -0
  15. package/skills/code_review/examples.md +59 -0
  16. package/skills/code_review/risk.json +9 -0
  17. package/skills/code_review/schema.json +65 -0
  18. package/skills/data_analysis/SKILL.md +28 -0
  19. package/skills/data_analysis/examples.md +62 -0
  20. package/skills/data_analysis/risk.json +9 -0
  21. package/skills/data_analysis/schema.json +61 -0
  22. package/skills/file_edit/SKILL.md +33 -0
  23. package/skills/file_edit/examples.md +45 -0
  24. package/skills/file_edit/risk.json +9 -0
  25. package/skills/file_edit/schema.json +60 -0
  26. package/skills/summarize_document/SKILL.md +68 -0
  27. package/skills/summarize_document/examples.md +65 -0
  28. package/skills/summarize_document/risk.json +9 -0
  29. package/skills/summarize_document/schema.json +71 -0
  30. package/skills/web_search/SKILL.md +28 -0
  31. package/skills/web_search/examples.md +61 -0
  32. package/skills/web_search/risk.json +9 -0
  33. package/skills/web_search/schema.json +62 -0
  34. package/static/account.html +53 -51
  35. package/static/admin.html +50 -46
  36. package/static/chat.html +124 -96
  37. package/static/graph.html +1231 -337
  38. package/static/manifest.json +2 -2
  39. package/tests/integration/__pycache__/__init__.cpython-314.pyc +0 -0
  40. package/tests/integration/__pycache__/test_api.cpython-314-pytest-9.0.3.pyc +0 -0
  41. package/tests/unit/__pycache__/test_tools.cpython-314-pytest-9.0.3.pyc +0 -0
  42. package/tests/unit/test_tools.py +194 -1
  43. package/tools.py +264 -4
@@ -0,0 +1,137 @@
1
+ # 퍼블릭 배포 가이드
2
+
3
+ Render, Fly.io, Railway, VPS 등 외부 서버에 Lattice AI를 배포할 때 사용하는 가이드입니다.
4
+
5
+ ## 환경변수
6
+
7
+ ```bash
8
+ # 필수
9
+ LATTICEAI_MODE=public
10
+ LATTICEAI_INVITE_CODE=my-secret-invite-code # 회원가입 시 필요한 초대 코드
11
+
12
+ # 클라우드 모델 (최소 하나 이상)
13
+ OPENAI_API_KEY=sk-...
14
+ # GROQ_API_KEY=gsk_...
15
+ # OPENROUTER_API_KEY=sk-or-...
16
+
17
+ LATTICEAI_PUBLIC_MODEL=openai:gpt-4o-mini # 기본 공개 모델
18
+
19
+ # 보안
20
+ LATTICEAI_ALLOW_LOCAL_MODELS=false # MLX 비활성화 (서버에 불필요)
21
+ LATTICEAI_ENABLE_TELEGRAM=false # Telegram 봇 비활성화
22
+
23
+ # 선택적
24
+ LATTICEAI_ENABLE_GRAPH=false # Data Graph 비활성화
25
+ LATTICEAI_DATA_DIR=/data # 데이터 디렉토리
26
+ LATTICEAI_ADMIN_EMAILS=you@example.com # 어드민 이메일 고정
27
+ ```
28
+
29
+ ## Docker
30
+
31
+ ```dockerfile
32
+ # Dockerfile이 이미 포함되어 있습니다
33
+ docker build -t lattice-ai .
34
+ ```
35
+
36
+ ```bash
37
+ docker run --rm \
38
+ -p 4825:4825 \
39
+ -e LATTICEAI_MODE=public \
40
+ -e OPENAI_API_KEY="$OPENAI_API_KEY" \
41
+ -e LATTICEAI_INVITE_CODE="my-secret-code" \
42
+ -v "$PWD/.data:/data" \
43
+ lattice-ai
44
+ ```
45
+
46
+ ## Render 배포
47
+
48
+ 1. New Web Service → GitHub 레포 연결
49
+ 2. Environment: `Python 3`
50
+ 3. Build Command: `pip install ltcai`
51
+ 4. Start Command: `LTCAI`
52
+ 5. Environment Variables 탭에서 위 환경변수 입력
53
+ 6. Disk 추가: `/data` (영구 저장용)
54
+
55
+ ## Fly.io 배포
56
+
57
+ ```bash
58
+ fly launch
59
+ fly secrets set LATTICEAI_MODE=public OPENAI_API_KEY=sk-... LATTICEAI_INVITE_CODE=secret
60
+ fly volumes create ltcai_data --size 1
61
+ fly deploy
62
+ ```
63
+
64
+ `fly.toml`:
65
+ ```toml
66
+ [build]
67
+ dockerfile = "Dockerfile"
68
+
69
+ [[mounts]]
70
+ source = "ltcai_data"
71
+ destination = "/data"
72
+
73
+ [env]
74
+ LATTICEAI_DATA_DIR = "/data"
75
+ ```
76
+
77
+ ## nginx 리버스 프록시
78
+
79
+ ```nginx
80
+ server {
81
+ listen 80;
82
+ server_name yourdomain.com;
83
+ return 301 https://$host$request_uri;
84
+ }
85
+
86
+ server {
87
+ listen 443 ssl http2;
88
+ server_name yourdomain.com;
89
+
90
+ ssl_certificate /etc/letsencrypt/live/yourdomain.com/fullchain.pem;
91
+ ssl_certificate_key /etc/letsencrypt/live/yourdomain.com/privkey.pem;
92
+
93
+ location / {
94
+ proxy_pass http://127.0.0.1:4825;
95
+ proxy_set_header Host $host;
96
+ proxy_set_header X-Real-IP $remote_addr;
97
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
98
+ proxy_set_header X-Forwarded-Proto $scheme;
99
+
100
+ # SSE 스트리밍 지원
101
+ proxy_buffering off;
102
+ proxy_cache off;
103
+ proxy_read_timeout 300s;
104
+ chunked_transfer_encoding on;
105
+ }
106
+ }
107
+ ```
108
+
109
+ ## Caddy 리버스 프록시
110
+
111
+ ```caddyfile
112
+ yourdomain.com {
113
+ reverse_proxy localhost:4825
114
+ }
115
+ ```
116
+
117
+ ## 퍼블릭 배포 체크리스트
118
+
119
+ - [ ] `LATTICEAI_MODE=public` 설정
120
+ - [ ] `LATTICEAI_INVITE_CODE` 비공개 랜덤 값으로 설정
121
+ - [ ] HTTPS 리버스 프록시 구성 (nginx / Caddy)
122
+ - [ ] 영구 볼륨 마운트 (`/data` 또는 `LATTICEAI_DATA_DIR`)
123
+ - [ ] 방화벽에서 4825 포트 직접 노출 차단
124
+ - [ ] `LATTICEAI_ALLOW_LOCAL_MODELS=false`
125
+ - [ ] 최소 하나의 클라우드 API 키 설정
126
+ - [ ] 첫 가입 후 어드민 계정 확인 (`http://yourdomain.com/admin`)
127
+
128
+ ## 지원 클라우드 모델 프리픽스
129
+
130
+ ```
131
+ openai:gpt-4o-mini
132
+ openai:gpt-4o
133
+ openrouter:openai/gpt-4o-mini
134
+ groq:llama-3.1-8b-instant
135
+ groq:llama-3.3-70b-versatile
136
+ together:meta-llama/Llama-3.3-70B-Instruct-Turbo
137
+ ```
@@ -0,0 +1,121 @@
1
+ # Lattice AI — 보안 모델
2
+
3
+ ## 설계 원칙
4
+
5
+ Lattice AI는 **개인 AI 워크스페이스**로 설계되었습니다. 기본값은 최대한 안전하게, 네트워크 노출은 명시적 opt-in으로만 허용합니다.
6
+
7
+ ## 네트워크 바인딩
8
+
9
+ | 설정 | 바인딩 | 용도 |
10
+ |------|--------|------|
11
+ | 기본 | `127.0.0.1:4825` | 로컬 전용, 외부 접근 불가 |
12
+ | `LATTICEAI_HOST=0.0.0.0` | `0.0.0.0:4825` | 같은 Wi-Fi 기기 접근 허용 |
13
+ | 퍼블릭 배포 | nginx/Caddy 뒤에 두기 | HTTPS 종단 + 리버스 프록시 |
14
+
15
+ ## 인증
16
+
17
+ ### 비밀번호
18
+
19
+ - scrypt 해싱 (`hashlib.scrypt`, N=2^14, r=8, p=1)
20
+ - `users.json`에 `{"hash": "<scrypt hex>"}` 형식 저장
21
+ - 평문 비밀번호는 메모리에도 저장되지 않음
22
+
23
+ ### 세션
24
+
25
+ - UUID 토큰, `~/.ltcai/sessions.json` 파일 저장
26
+ - TTL: 24시간 + sliding refresh (활동 시 자동 연장, 15분 단위 디스크 쓰기)
27
+ - 쿠키: `HttpOnly; SameSite=Lax; Path=/`
28
+ - 서버 재시작 후에도 유지 (파일 기반)
29
+
30
+ ### SSO (선택적)
31
+
32
+ - Entra ID / Okta OIDC (`OIDC_DISCOVERY_URL`, `OIDC_CLIENT_ID`, `OIDC_CLIENT_SECRET`)
33
+ - 콜백 후 내부 세션 토큰으로 변환
34
+ - 어드민 핸드오프: `sessionStorage` 1회 읽기 (URL 파라미터 노출 방지)
35
+
36
+ ## API 키 보안
37
+
38
+ - OS keyring (macOS Keychain, Windows Credential Manager, Linux Secret Service) 저장
39
+ - 평문 디스크 저장은 `LATTICEAI_ALLOW_PLAINTEXT_API_KEYS=true` 명시 시에만
40
+ - 채팅 히스토리 저장 전 API key/token/password 패턴 자동 마스킹
41
+
42
+ ## CORS
43
+
44
+ ```python
45
+ CORS_ALLOWED_ORIGINS = ["http://localhost:4825", "http://127.0.0.1:4825"]
46
+ ```
47
+
48
+ - 기본: localhost만 허용
49
+ - `LATTICEAI_CORS_ALLOW_NETWORK=true`: 같은 Wi-Fi 기기 허용
50
+ - 퍼블릭 배포: 리버스 프록시 도메인만 허용 권장
51
+
52
+ ## Rate Limiting
53
+
54
+ 토큰 버킷 알고리즘, per-user:
55
+
56
+ | 엔드포인트 | burst | 지속 |
57
+ |-----------|-------|------|
58
+ | `/chat` | 30 | 30/분 |
59
+ | `/agent` | 10 | 6/분 |
60
+ | `/upload` | 20 | 12/분 |
61
+
62
+ `LATTICEAI_RATE_LIMIT=0`으로 비활성화 (개발 환경용).
63
+
64
+ ## 파일 업로드
65
+
66
+ ```python
67
+ MAGIC_NUMBERS = {
68
+ ".pdf": b"%PDF",
69
+ ".docx": b"PK\x03\x04",
70
+ ".xlsx": b"PK\x03\x04",
71
+ ".pptx": b"PK\x03\x04",
72
+ ".png": b"\x89PNG",
73
+ ".jpg": b"\xff\xd8\xff",
74
+ ".zip": b"PK\x03\x04",
75
+ }
76
+ ```
77
+
78
+ - 업로드 시 파일 첫 바이트와 확장자 매핑 검증
79
+ - 불일치 시 400 에러
80
+
81
+ ## 에이전트 도구 샌드박스
82
+
83
+ ### `run_command()` 위험 플래그 차단
84
+
85
+ 다음 패턴이 포함된 명령 실행 거부:
86
+ - `rm -rf`, `sudo`, `chmod 777`, `curl | bash`, `wget | sh`
87
+ - `> /dev/sda`, `dd if=`, `mkfs`
88
+
89
+ ### `edit_file()` 유일성 검증
90
+
91
+ - `old_string`이 파일에 정확히 한 번만 존재해야 성공
92
+ - `replace_all=true`로 전체 치환 허용
93
+ - 워크스페이스 외부 경로 접근 차단 (`../../../etc/passwd` 등)
94
+
95
+ ### `grep()` 이진 디렉토리 제외
96
+
97
+ `node_modules`, `.git`, `venv`, `dist`, `__pycache__` 자동 제외
98
+
99
+ ## 감사 로그
100
+
101
+ - 어드민 세션 핸드오프 이벤트 로깅
102
+ - 평문 비밀번호 마이그레이션 이벤트: `password_migrated_from_plaintext`
103
+ - `server.log` 파일에 모든 요청 기록
104
+
105
+ ## 텔레메트리
106
+
107
+ **없음.** 모든 데이터는 로컬에만 저장됩니다. 외부 서버로 어떠한 사용 데이터도 전송되지 않습니다.
108
+
109
+ 예외: 사용자가 직접 설정한 클라우드 API(OpenAI, Groq 등)로의 프롬프트 전송은 해당 제공업체의 정책을 따릅니다.
110
+
111
+ ## 퍼블릭 배포 체크리스트
112
+
113
+ - [ ] `LATTICEAI_MODE=public`
114
+ - [ ] `LATTICEAI_INVITE_CODE` 비공개 값 설정
115
+ - [ ] HTTPS 리버스 프록시 (nginx/Caddy)
116
+ - [ ] `LATTICEAI_ENABLE_GRAPH=false` (필요 시)
117
+ - [ ] `/data` 영구 볼륨 마운트
118
+ - [ ] `LATTICEAI_ALLOW_LOCAL_MODELS=false`
119
+ - [ ] 방화벽에서 4825 포트 직접 노출 차단 (리버스 프록시 통해서만)
120
+
121
+ 자세한 내용: [public-deploy.md](public-deploy.md)
@@ -9,6 +9,7 @@ the ingestion contract.
9
9
  import hashlib
10
10
  import json
11
11
  import logging
12
+ import math
12
13
  import re
13
14
  import shutil
14
15
  import sqlite3
@@ -25,6 +26,25 @@ def _now() -> str:
25
26
  return datetime.now().isoformat()
26
27
 
27
28
 
29
+ def _parse_iso(raw: Optional[str]) -> Optional[datetime]:
30
+ if not raw:
31
+ return None
32
+ try:
33
+ return datetime.fromisoformat(str(raw))
34
+ except (TypeError, ValueError):
35
+ return None
36
+
37
+
38
+ def _recency_score(updated_at: Optional[str], *, now: Optional[datetime] = None, half_life_days: float = 14.0) -> float:
39
+ stamp = _parse_iso(updated_at)
40
+ if not stamp:
41
+ return 0.0
42
+ now = now or datetime.now()
43
+ age_days = max(0.0, (now - stamp).total_seconds() / 86400.0)
44
+ decay = math.log(2) / max(0.1, half_life_days)
45
+ return math.exp(-decay * age_days)
46
+
47
+
28
48
  def _json(data: Optional[Dict[str, Any]]) -> str:
29
49
  return json.dumps(data or {}, ensure_ascii=False, sort_keys=True)
30
50
 
@@ -587,28 +607,115 @@ class KnowledgeGraphStore:
587
607
  "title": row["title"],
588
608
  "summary": row["summary"],
589
609
  "metadata": _safe_loads(row["metadata_json"]),
610
+ "updated_at": row["updated_at"],
590
611
  }
591
612
  for row in conn.execute(
592
- "SELECT id, type, title, summary, metadata_json FROM nodes WHERE type != 'Chunk' ORDER BY updated_at DESC LIMIT ?",
613
+ "SELECT id, type, title, summary, metadata_json, updated_at FROM nodes WHERE type != 'Chunk' ORDER BY updated_at DESC LIMIT ?",
593
614
  (limit,),
594
615
  )
595
616
  ]
596
617
  node_ids = {node["id"] for node in nodes}
597
- edges = [
598
- {
599
- "id": row["id"],
600
- "from": row["from_node"],
601
- "to": row["to_node"],
602
- "type": row["type"],
603
- "weight": row["weight"],
604
- "metadata": _safe_loads(row["metadata_json"]),
605
- }
606
- for row in conn.execute(
607
- "SELECT id, from_node, to_node, type, weight, metadata_json FROM edges ORDER BY created_at DESC LIMIT ?",
608
- (limit * 3,),
618
+ edges: List[Dict[str, Any]] = []
619
+ if node_ids:
620
+ edge_rows = conn.execute(
621
+ """
622
+ SELECT id, from_node, to_node, type, weight, metadata_json
623
+ FROM edges
624
+ WHERE from_node IN (
625
+ SELECT id
626
+ FROM nodes
627
+ WHERE type != 'Chunk'
628
+ ORDER BY updated_at DESC
629
+ LIMIT ?
630
+ )
631
+ AND to_node IN (
632
+ SELECT id
633
+ FROM nodes
634
+ WHERE type != 'Chunk'
635
+ ORDER BY updated_at DESC
636
+ LIMIT ?
637
+ )
638
+ ORDER BY created_at DESC
639
+ """,
640
+ (limit, limit),
641
+ ).fetchall()
642
+ edges = [
643
+ {
644
+ "id": row["id"],
645
+ "from": row["from_node"],
646
+ "to": row["to_node"],
647
+ "type": row["type"],
648
+ "weight": row["weight"],
649
+ "metadata": _safe_loads(row["metadata_json"]),
650
+ }
651
+ for row in edge_rows
652
+ ]
653
+
654
+ degree_map: Dict[str, int] = {}
655
+ now = datetime.now()
656
+ node_by_id = {node["id"]: node for node in nodes}
657
+ topic_metrics: Dict[str, Dict[str, Any]] = {}
658
+
659
+ for edge in edges:
660
+ degree_map[edge["from"]] = degree_map.get(edge["from"], 0) + 1
661
+ degree_map[edge["to"]] = degree_map.get(edge["to"], 0) + 1
662
+ from_node = node_by_id.get(edge["from"])
663
+ to_node = node_by_id.get(edge["to"])
664
+ if not from_node or not to_node:
665
+ continue
666
+ for topic_node, other_node in ((from_node, to_node), (to_node, from_node)):
667
+ if topic_node["type"] != "Topic":
668
+ continue
669
+ metrics = topic_metrics.setdefault(topic_node["id"], {
670
+ "mention_count": 0.0,
671
+ "conversation_ids": set(),
672
+ })
673
+ if edge["type"] in {"mentions", "discusses"}:
674
+ metrics["mention_count"] += max(0.5, float(edge.get("weight") or 1.0))
675
+ other_meta = other_node.get("metadata") or {}
676
+ conversation_id = other_meta.get("conversation_id")
677
+ if other_node["type"] == "Conversation":
678
+ conversation_id = other_node["id"]
679
+ if conversation_id:
680
+ metrics["conversation_ids"].add(str(conversation_id))
681
+
682
+ type_max_raw: Dict[str, float] = {}
683
+ for node in nodes:
684
+ degree = degree_map.get(node["id"], 0)
685
+ recency = _recency_score(node.get("updated_at"), now=now)
686
+ metrics = {
687
+ "degree": degree,
688
+ "recency_score": round(recency, 4),
689
+ }
690
+ if node["type"] == "Topic":
691
+ topic_stat = topic_metrics.get(node["id"], {})
692
+ mention_count = float(topic_stat.get("mention_count") or 0.0)
693
+ conversation_count = len(topic_stat.get("conversation_ids") or ())
694
+ raw_importance = (
695
+ math.log1p(mention_count) * 2.8
696
+ + math.log1p(conversation_count) * 2.2
697
+ + recency * 1.4
698
+ + math.sqrt(max(0, degree)) * 0.45
609
699
  )
610
- if row["from_node"] in node_ids and row["to_node"] in node_ids
611
- ]
700
+ metrics.update({
701
+ "mention_count": round(mention_count, 2),
702
+ "conversation_count": conversation_count,
703
+ })
704
+ else:
705
+ raw_importance = math.log1p(max(0, degree)) * 1.4 + recency * 0.9
706
+
707
+ metrics["importance_raw"] = round(raw_importance, 4)
708
+ node["importance"] = round(raw_importance, 4)
709
+ node["_raw_importance"] = raw_importance
710
+ node["metadata"] = {**(node.get("metadata") or {}), "graph_metrics": metrics}
711
+ type_max_raw[node["type"]] = max(type_max_raw.get(node["type"], 0.0), raw_importance)
712
+
713
+ for node in nodes:
714
+ max_raw = max(type_max_raw.get(node["type"], 0.0), 0.0001)
715
+ importance_norm = min(1.0, (node.get("_raw_importance") or 0.0) / max_raw)
716
+ node["importance_norm"] = round(importance_norm, 4)
717
+ node["metadata"]["graph_metrics"]["importance_norm"] = node["importance_norm"]
718
+ node.pop("_raw_importance", None)
612
719
  return {"nodes": nodes, "edges": edges}
613
720
 
614
721
  def search(self, query: str, limit: int = 30) -> Dict[str, Any]:
@@ -669,6 +776,7 @@ class KnowledgeGraphStore:
669
776
  "title": row["title"],
670
777
  "summary": row["summary"],
671
778
  "metadata": _safe_loads(row["metadata_json"]),
779
+ "updated_at": row["updated_at"],
672
780
  }
673
781
  for row in rows
674
782
  ],
package/llm_router.py CHANGED
@@ -10,6 +10,7 @@ import os
10
10
  import re
11
11
  import time
12
12
  from dataclasses import dataclass
13
+ from pathlib import Path
13
14
 
14
15
  # Set MLX_VLM_DRAFT_KIND to 'mtp' to enable the Gemma 4 assistant MTP drafter.
15
16
  os.environ["MLX_VLM_DRAFT_KIND"] = "mtp"
@@ -167,10 +168,59 @@ def parse_model_ref(model_id: str) -> tuple[str, str]:
167
168
  provider, model = model_id.split(":", 1)
168
169
  if provider in OPENAI_COMPATIBLE_PROVIDERS:
169
170
  return provider, model
171
+ if provider in {"local_mlx", "mlx"}:
172
+ return "local_mlx", model
170
173
  if model_id.startswith("local_mlx:"):
171
174
  return "local_mlx", model_id.split(":", 1)[1]
172
175
  return "local_mlx", model_id
173
176
 
177
+ HF_MODELS_ROOT = Path.home() / ".latticeai" / "hf-models"
178
+
179
+ def hf_model_dir(repo_id: str) -> Path:
180
+ return HF_MODELS_ROOT / repo_id.replace("/", "__")
181
+
182
+ def _looks_like_hf_model_dir(path: Path) -> bool:
183
+ if not path.exists() or not path.is_dir():
184
+ return False
185
+ has_config = (path / "config.json").exists()
186
+ has_weights = any(path.glob("*.safetensors")) or any(path.glob("*.bin"))
187
+ has_tokenizer = (
188
+ (path / "tokenizer.json").exists()
189
+ or (path / "tokenizer.model").exists()
190
+ or (path / "tokenizer_config.json").exists()
191
+ )
192
+ return has_config and has_weights and has_tokenizer
193
+
194
+ def _resolve_local_hf_model(model_id: str) -> str:
195
+ explicit_path = Path(model_id).expanduser()
196
+ if explicit_path.exists():
197
+ return str(explicit_path)
198
+ local_dir = hf_model_dir(model_id)
199
+ if _looks_like_hf_model_dir(local_dir):
200
+ return str(local_dir)
201
+ return model_id
202
+
203
+ def ensure_mlx_runtime() -> None:
204
+ global mx, lm_load, vlm_load, VLM_AVAILABLE
205
+ if mx is not None and lm_load is not None:
206
+ return
207
+ try:
208
+ import mlx.core as mlx_core
209
+ from mlx_lm import load as mlx_lm_load
210
+
211
+ mx = mlx_core
212
+ lm_load = mlx_lm_load
213
+ try:
214
+ from mlx_vlm import load as mlx_vlm_load
215
+ vlm_load = mlx_vlm_load
216
+ VLM_AVAILABLE = True
217
+ except Exception:
218
+ vlm_load = None
219
+ VLM_AVAILABLE = False
220
+ mx.set_default_device(mx.gpu)
221
+ except Exception as e:
222
+ raise RuntimeError(f"MLX runtime is not available after install: {e}") from e
223
+
174
224
  class LLMRouter:
175
225
  def __init__(self):
176
226
  self._cache: Dict[str, Tuple] = {}
@@ -262,6 +312,7 @@ class LLMRouter:
262
312
  if provider != "local_mlx":
263
313
  return self._load_cloud_model(provider, provider_model, api_key_override=api_key_override, owner=owner)
264
314
 
315
+ ensure_mlx_runtime()
265
316
  if mx is None or lm_load is None:
266
317
  raise RuntimeError("MLX is not available in this process. Run on Apple Silicon with Metal access.")
267
318
 
@@ -274,6 +325,8 @@ class LLMRouter:
274
325
  self._enforce_local_model_limit(cache_key)
275
326
  print(f"⏳ Loading Gemma 4 Stack: {cache_key}...")
276
327
  loop = asyncio.get_event_loop()
328
+ target_model_id = _resolve_local_hf_model(model_id)
329
+ target_draft_model_id = _resolve_local_hf_model(draft_model_id) if draft_model_id else None
277
330
 
278
331
  def _load():
279
332
  mx.set_default_device(mx.gpu)
@@ -281,20 +334,20 @@ class LLMRouter:
281
334
 
282
335
  # 1. Target 로드 (Gemma 4는 항상 vlm_load 사용)
283
336
  if is_gemma4 and VLM_AVAILABLE:
284
- print(f"🔄 Loading Target (VLM Mode): {model_id}...")
285
- model, tokenizer = vlm_load(model_id)
337
+ print(f"🔄 Loading Target (VLM Mode): {target_model_id}...")
338
+ model, tokenizer = vlm_load(target_model_id)
286
339
  else:
287
- print(f"🔄 Loading Target (LM Mode): {model_id}...")
288
- model, tokenizer = lm_load(model_id)
340
+ print(f"🔄 Loading Target (LM Mode): {target_model_id}...")
341
+ model, tokenizer = lm_load(target_model_id)
289
342
 
290
343
  # 2. Draft 로드 (Gemma 4는 항상 vlm_load 사용)
291
344
  draft_model = None
292
- if draft_model_id:
293
- print(f"🔄 Loading Assistant (VLM Mode): {draft_model_id}...")
345
+ if target_draft_model_id:
346
+ print(f"🔄 Loading Assistant (VLM Mode): {target_draft_model_id}...")
294
347
  if is_gemma4 and VLM_AVAILABLE:
295
- draft_model, _ = vlm_load(draft_model_id)
348
+ draft_model, _ = vlm_load(target_draft_model_id)
296
349
  else:
297
- draft_model, _ = lm_load(draft_model_id)
350
+ draft_model, _ = lm_load(target_draft_model_id)
298
351
  print(f"✅ Assistant Ready.")
299
352
 
300
353
  return model, tokenizer, draft_model
@@ -374,6 +427,18 @@ class LLMRouter:
374
427
  def _is_cloud_current(self) -> bool:
375
428
  return bool(self._current and isinstance(self._cache.get(self._current), CloudModel))
376
429
 
430
+ def _local_server_error_hint(self, cloud: CloudModel, error: Exception) -> str:
431
+ raw = str(error)
432
+ if cloud.provider == "lmstudio":
433
+ base_url = os.getenv("LMSTUDIO_BASE_URL") or OPENAI_COMPATIBLE_PROVIDERS["lmstudio"]["base_url"]
434
+ return (
435
+ f"LM Studio 연결 실패: {raw}\n\n"
436
+ f"- LM Studio의 Developer/Local Server를 켜고 모델을 로드했는지 확인하세요.\n"
437
+ f"- Lattice가 보는 주소는 {base_url} 입니다. 포트가 다르면 LMSTUDIO_BASE_URL을 맞춰주세요.\n"
438
+ f"- 모델 선택창에는 LM Studio /v1/models에서 감지된 모델만 표시됩니다."
439
+ )
440
+ return raw
441
+
377
442
  def _build_prompt(self, message: str, context: Optional[str], tokenizer) -> str:
378
443
  system = SYSTEM_PROMPT
379
444
  context = normalize_branding(context)
@@ -382,7 +447,7 @@ class LLMRouter:
382
447
  try:
383
448
  msgs = [{"role": "system", "content": system}, {"role": "user", "content": message}]
384
449
  return tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
385
- except: pass
450
+ except Exception: pass
386
451
  return f"<|im_start|>system\n{system}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
387
452
 
388
453
  def _build_vlm_prompt(self, model, processor, message: str, context: Optional[str], num_images: int) -> str:
@@ -445,15 +510,18 @@ class LLMRouter:
445
510
  context = normalize_branding(context)
446
511
  if context:
447
512
  system += f"\n\nContext:\n{context}"
448
- response = await cloud.client.chat.completions.create(
449
- model=cloud.model,
450
- messages=[
451
- {"role": "system", "content": system},
452
- {"role": "user", "content": message},
453
- ],
454
- max_tokens=max_tokens,
455
- temperature=temperature,
456
- )
513
+ try:
514
+ response = await cloud.client.chat.completions.create(
515
+ model=cloud.model,
516
+ messages=[
517
+ {"role": "system", "content": system},
518
+ {"role": "user", "content": message},
519
+ ],
520
+ max_tokens=max_tokens,
521
+ temperature=temperature,
522
+ )
523
+ except Exception as e:
524
+ raise RuntimeError(self._local_server_error_hint(cloud, e)) from e
457
525
  return normalize_branding(response.choices[0].message.content or "")
458
526
 
459
527
  async def stream_generate(self, message: str, context: Optional[str] = None, max_tokens: int = 4096, temperature: float = 0.2, image_data: Optional[str] = None) -> AsyncIterator[str]:
@@ -508,16 +576,20 @@ class LLMRouter:
508
576
  context = normalize_branding(context)
509
577
  if context:
510
578
  system += f"\n\nContext:\n{context}"
511
- stream = await cloud.client.chat.completions.create(
512
- model=cloud.model,
513
- messages=[
514
- {"role": "system", "content": system},
515
- {"role": "user", "content": message},
516
- ],
517
- max_tokens=max_tokens,
518
- temperature=temperature,
519
- stream=True,
520
- )
579
+ try:
580
+ stream = await cloud.client.chat.completions.create(
581
+ model=cloud.model,
582
+ messages=[
583
+ {"role": "system", "content": system},
584
+ {"role": "user", "content": message},
585
+ ],
586
+ max_tokens=max_tokens,
587
+ temperature=temperature,
588
+ stream=True,
589
+ )
590
+ except Exception as e:
591
+ yield f"⚠️ {self._local_server_error_hint(cloud, e)}"
592
+ return
521
593
  async for event in stream:
522
594
  if not event.choices:
523
595
  continue