ltcai 0.1.30 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.py CHANGED
@@ -47,6 +47,29 @@ from PIL import Image
47
47
 
48
48
  from llm_router import AsyncOpenAI, LLMRouter, OPENAI_COMPATIBLE_PROVIDERS, HF_MODELS_ROOT, ensure_mlx_runtime, hf_model_dir, parse_model_ref, mx, normalize_branding
49
49
  from knowledge_graph import KnowledgeGraphStore
50
+ from knowledge_graph_api import create_knowledge_graph_router
51
+ from local_knowledge_api import LocalKnowledgeWatcher, create_local_knowledge_router
52
+ from latticeai.core.security import (
53
+ hash_password as _hash_password,
54
+ verify_password as _verify_password,
55
+ host_is_loopback as _host_is_loopback_impl,
56
+ client_ip as _client_ip_impl,
57
+ bytes_match_extension as _bytes_match_extension_impl,
58
+ redact_secret_text as _redact_secret_text,
59
+ check_ip_rate_limit as _check_ip_rate_limit,
60
+ enforce_rate_limit as _enforce_rate_limit,
61
+ )
62
+ from latticeai.core.sessions import SessionStore as _SessionStore
63
+ from latticeai.core.audit import (
64
+ get_audit_log as _get_audit_log,
65
+ append_audit_event as _append_audit_event,
66
+ classify_sensitive_message as _classify_sensitive_message,
67
+ mask_sensitive_text as _mask_sensitive_text,
68
+ build_sensitivity_report as _build_sensitivity_report,
69
+ build_admin_audit_report as _build_admin_audit_report,
70
+ )
71
+ from latticeai.api.auth import create_auth_router
72
+ from latticeai.api.admin import create_admin_router
50
73
  import mcp_registry
51
74
  from mcp_registry import (
52
75
  MCP_REGISTRY, _THIRD_PARTY_SKILL_SOURCES, _KNOWN_REPO_LICENSES,
@@ -189,12 +212,7 @@ IS_PUBLIC_MODE = APP_MODE == "public"
189
212
  DEFAULT_HOST = env_value("LATTICEAI_HOST", "127.0.0.1")
190
213
  DEFAULT_PORT = int(env_value("LATTICEAI_PORT", "4825"))
191
214
  def _host_is_loopback(host: str) -> bool:
192
- if host in {"localhost", "127.0.0.1", "::1"}:
193
- return True
194
- try:
195
- return ipaddress.ip_address(host).is_loopback
196
- except ValueError:
197
- return False
215
+ return _host_is_loopback_impl(host)
198
216
 
199
217
  NETWORK_EXPOSED = not _host_is_loopback(DEFAULT_HOST)
200
218
  ENABLE_TELEGRAM = env_bool("LATTICEAI_ENABLE_TELEGRAM", default=not IS_PUBLIC_MODE)
@@ -244,19 +262,12 @@ async def _get_sso_discovery() -> Optional[Dict]:
244
262
  return None
245
263
  return _sso_discovery_cache
246
264
 
247
- # ── Password hashing (stdlib scrypt, no extra deps) ────────────────────────────
265
+ # ── Password hashing delegated to latticeai.core.security ────────────────────
248
266
  def hash_password(password: str) -> str:
249
- salt = secrets.token_hex(16)
250
- key = hashlib.scrypt(password.encode(), salt=salt.encode(), n=16384, r=8, p=1)
251
- return f"{salt}:{key.hex()}"
267
+ return _hash_password(password)
252
268
 
253
269
  def verify_password(password: str, hashed: str) -> bool:
254
- try:
255
- salt, key_hex = hashed.split(":", 1)
256
- key = hashlib.scrypt(password.encode(), salt=salt.encode(), n=16384, r=8, p=1)
257
- return secrets.compare_digest(key.hex(), key_hex)
258
- except Exception:
259
- return False
270
+ return _verify_password(password, hashed)
260
271
 
261
272
  def verify_and_migrate_password(email: str, plain: str, stored: str, users: Dict) -> bool:
262
273
  """평문 비밀번호를 투명하게 해시로 마이그레이션. 마이그레이션 발생 시 audit log 남김."""
@@ -273,89 +284,24 @@ def verify_and_migrate_password(email: str, plain: str, stored: str, users: Dict
273
284
  return True
274
285
  return False
275
286
 
276
- # ── Session store (file-backed, survives restarts) ────────────────────────────
277
- # 24-hour TTL with sliding-window refresh — every authenticated request bumps
278
- # created_at, so an active user stays logged in while idle sessions auto-expire.
279
- _SESSION_TTL = 60 * 60 * 24 # 24 hours
280
- _SESSION_REFRESH_THRESHOLD = 60 * 15 # only persist if >15 min since last bump (write amplification guard)
281
- _sessions_lock = threading.Lock()
282
-
283
- def _sessions_file() -> Path:
284
- data_dir = Path(os.getenv("LATTICEAI_DATA_DIR") or (Path.home() / ".ltcai"))
285
- data_dir.mkdir(parents=True, exist_ok=True)
286
- return data_dir / "sessions.json"
287
-
288
- def _load_sessions() -> Dict[str, tuple]:
289
- try:
290
- f = _sessions_file()
291
- if f.exists():
292
- raw = json.loads(f.read_text())
293
- return {k: tuple(v) for k, v in raw.items()}
294
- except Exception as e:
295
- logging.warning("_load_sessions failed (starting empty): %s", e)
296
- return {}
297
-
298
- def _persist_sessions(sessions: Dict[str, tuple]) -> None:
299
- try:
300
- _sessions_file().write_text(json.dumps({k: list(v) for k, v in sessions.items()}, ensure_ascii=False))
301
- except Exception as e:
302
- logging.warning("_persist_sessions failed: %s", e)
303
-
304
- _sessions: Dict[str, tuple] = _load_sessions()
305
-
306
- # ── Rate limiting ─────────────────────────────────────────────────────────────
307
- _rate_windows: dict[tuple[str, str], list[float]] = {}
308
- _rate_lock = threading.Lock()
287
+ # ── Session store delegated to latticeai.core.sessions ──────────────────────
288
+ _SESSION_TTL = 60 * 60 * 24
289
+ _session_store = _SessionStore()
309
290
 
310
291
  def _check_rate_limit(ip: str, action: str, max_calls: int, window_secs: float) -> None:
311
- key = (ip, action)
312
- now = time.time()
313
- cutoff = now - window_secs
314
- with _rate_lock:
315
- calls = [t for t in _rate_windows.get(key, []) if t > cutoff]
316
- if len(calls) >= max_calls:
317
- raise HTTPException(status_code=429, detail="요청이 너무 많습니다. 잠시 후 다시 시도하세요.")
318
- calls.append(now)
319
- _rate_windows[key] = calls
292
+ _check_ip_rate_limit(ip, action, max_calls=max_calls, window_secs=window_secs)
320
293
 
321
294
  def _client_ip(request: Request) -> str:
322
- for header in ("CF-Connecting-IP", "X-Forwarded-For"):
323
- val = request.headers.get(header)
324
- if val:
325
- return val.split(",")[0].strip()
326
- return request.client.host if request.client else "unknown"
327
-
328
- # ─────────────────────────────────────────────────────────────────────────────
295
+ return _client_ip_impl(request)
329
296
 
330
297
  def create_session(email: str) -> str:
331
- token = secrets.token_urlsafe(32)
332
- with _sessions_lock:
333
- _sessions[token] = (email, time.time())
334
- _persist_sessions(_sessions)
335
- return token
298
+ return _session_store.create(email)
336
299
 
337
300
  def get_session_email(token: str) -> Optional[str]:
338
- """Return email for a valid session, sliding the expiry forward on activity."""
339
- now = time.time()
340
- with _sessions_lock:
341
- entry = _sessions.get(token)
342
- if entry is None:
343
- return None
344
- email, created_at = entry
345
- if now - created_at > _SESSION_TTL:
346
- _sessions.pop(token, None)
347
- _persist_sessions(_sessions)
348
- return None
349
- # Sliding refresh: only update if the timestamp drifted enough to be worth a disk write
350
- if now - created_at > _SESSION_REFRESH_THRESHOLD:
351
- _sessions[token] = (email, now)
352
- _persist_sessions(_sessions)
353
- return email
301
+ return _session_store.get_email(token)
354
302
 
355
303
  def invalidate_session(token: str) -> None:
356
- with _sessions_lock:
357
- _sessions.pop(token, None)
358
- _persist_sessions(_sessions)
304
+ _session_store.invalidate(token)
359
305
 
360
306
  # ── User Management Logic ──────────────────────────────────────────────────
361
307
  BASE_DIR = Path(__file__).resolve().parent
@@ -374,6 +320,7 @@ MCP_FILE = DATA_DIR / "mcp_installs.json"
374
320
  AUDIT_FILE = DATA_DIR / "audit_log.json"
375
321
  SSO_FILE = DATA_DIR / "sso_config.json"
376
322
  KNOWLEDGE_GRAPH = KnowledgeGraphStore(DATA_DIR / "knowledge_graph.sqlite", DATA_DIR / "knowledge_graph_blobs") if ENABLE_GRAPH else None
323
+ LOCAL_KG_WATCHER = LocalKnowledgeWatcher(lambda: KNOWLEDGE_GRAPH) if ENABLE_GRAPH else None
377
324
 
378
325
  def _require_graph():
379
326
  if not ENABLE_GRAPH or KNOWLEDGE_GRAPH is None:
@@ -491,17 +438,6 @@ class SkillInstallRequest(BaseModel):
491
438
  plugin: str
492
439
  skill: str
493
440
 
494
- class KnowledgeGraphIngestRequest(BaseModel):
495
- type: str
496
- content: str = ""
497
- role: Optional[str] = None
498
- title: Optional[str] = None
499
- source: Optional[str] = None
500
- conversation_id: Optional[str] = None
501
- user_email: Optional[str] = None
502
- user_nickname: Optional[str] = None
503
- metadata: Optional[Dict] = None
504
-
505
441
  DEFAULT_VPC_CONFIG = {
506
442
  "provider": "AWS",
507
443
  "region": "ap-northeast-2",
@@ -675,34 +611,10 @@ async def install_mcp(mcp_id: str) -> Dict:
675
611
  _history_lock = threading.Lock()
676
612
 
677
613
  def get_audit_log() -> List[Dict]:
678
- if not os.path.exists(AUDIT_FILE):
679
- return []
680
- try:
681
- with open(AUDIT_FILE, "r", encoding="utf-8") as f:
682
- data = json.load(f)
683
- return data if isinstance(data, list) else []
684
- except Exception as e:
685
- logging.warning("get_audit_log failed: %s", e)
686
- return []
614
+ return _get_audit_log(AUDIT_FILE)
687
615
 
688
616
  def append_audit_event(event_type: str, **payload) -> None:
689
- try:
690
- event = {
691
- "event_type": event_type,
692
- "timestamp": datetime.now().isoformat(),
693
- **payload,
694
- }
695
- with _history_lock:
696
- events = get_audit_log()
697
- events.append(event)
698
- if len(events) > 5000:
699
- events = events[-5000:]
700
- tmp_path = str(AUDIT_FILE) + ".tmp"
701
- with open(tmp_path, "w", encoding="utf-8") as f:
702
- json.dump(events, f, ensure_ascii=False, indent=2)
703
- os.replace(tmp_path, AUDIT_FILE)
704
- except Exception as e:
705
- logging.warning("append_audit_event failed: %s", e)
617
+ _append_audit_event(AUDIT_FILE, event_type, **payload)
706
618
 
707
619
  def save_to_history(
708
620
  role: str,
@@ -767,18 +679,7 @@ def save_to_history(
767
679
  logging.warning("save_to_history failed: %s", e)
768
680
 
769
681
  def redact_secret_text(text: str) -> str:
770
- if not text:
771
- return ""
772
- patterns = [
773
- r"(?i)(api[_ -]?key|secret|token|password|passwd)\s*[:=]\s*['\"]?([A-Za-z0-9_\-\.]{12,})['\"]?",
774
- r"\b(sk-[A-Za-z0-9_\-]{16,})\b",
775
- r"\b(xai-[A-Za-z0-9_\-]{16,})\b",
776
- r"\b(gsk_[A-Za-z0-9_\-]{16,})\b",
777
- ]
778
- redacted = str(text)
779
- for pattern in patterns:
780
- redacted = re.sub(pattern, lambda m: f"{m.group(1)}=[REDACTED]" if len(m.groups()) > 1 else "[REDACTED]", redacted)
781
- return redacted
682
+ return _redact_secret_text(text)
782
683
 
783
684
  def get_history():
784
685
  if not os.path.exists(HISTORY_FILE):
@@ -977,69 +878,14 @@ def require_user(request: Request) -> str:
977
878
  return email or ""
978
879
 
979
880
 
980
- # ── Rate limiting ─────────────────────────────────────────────────────────────
981
- # Per-user token bucket. Disabled when LATTICEAI_RATE_LIMIT=0 (default: enabled).
881
+ # ── Rate limiting & file validation — delegated to latticeai.core.security ────
982
882
  _RATE_LIMIT_ENABLED = os.getenv("LATTICEAI_RATE_LIMIT", "1") != "0"
983
- _rate_buckets: Dict[str, Dict[str, float]] = {}
984
- _rate_lock = threading.Lock()
985
-
986
- # (capacity, refill_per_second) per endpoint family
987
- _RATE_LIMITS = {
988
- "chat": (30, 0.5), # 30 burst, 30/min sustained
989
- "agent": (10, 0.1), # 10 burst, 6/min sustained (agent is expensive)
990
- "upload": (20, 0.2), # 20 burst, 12/min sustained
991
- }
992
-
993
883
 
994
884
  def enforce_rate_limit(email: str, bucket_key: str) -> None:
995
- """Raise HTTP 429 if user exceeds the bucket. No-op when disabled or unauth'd."""
996
- if not _RATE_LIMIT_ENABLED or not email:
997
- return
998
- cap, refill = _RATE_LIMITS.get(bucket_key, (60, 1.0))
999
- key = f"{email}:{bucket_key}"
1000
- now = time.time()
1001
- with _rate_lock:
1002
- bucket = _rate_buckets.get(key)
1003
- if bucket is None:
1004
- _rate_buckets[key] = {"tokens": cap - 1, "ts": now}
1005
- return
1006
- elapsed = now - bucket["ts"]
1007
- bucket["tokens"] = min(cap, bucket["tokens"] + elapsed * refill)
1008
- bucket["ts"] = now
1009
- if bucket["tokens"] < 1:
1010
- retry_after = max(1, int((1 - bucket["tokens"]) / refill))
1011
- raise HTTPException(
1012
- status_code=429,
1013
- detail=f"Rate limit exceeded for {bucket_key}. Retry after {retry_after}s.",
1014
- headers={"Retry-After": str(retry_after)},
1015
- )
1016
- bucket["tokens"] -= 1
1017
-
1018
-
1019
- # ── File magic-number validation ──────────────────────────────────────────────
1020
- # Map of extension → list of byte-prefix signatures (any-match). Files without
1021
- # distinctive magic (.txt, .md, .csv) skip the check.
1022
- _FILE_MAGIC: Dict[str, List[bytes]] = {
1023
- ".pdf": [b"%PDF-"],
1024
- ".docx": [b"PK\x03\x04"],
1025
- ".xlsx": [b"PK\x03\x04"],
1026
- ".pptx": [b"PK\x03\x04"],
1027
- ".zip": [b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"],
1028
- ".png": [b"\x89PNG\r\n\x1a\n"],
1029
- ".jpg": [b"\xff\xd8\xff"],
1030
- ".jpeg": [b"\xff\xd8\xff"],
1031
- ".gif": [b"GIF87a", b"GIF89a"],
1032
- }
1033
-
885
+ _enforce_rate_limit(email, bucket_key, enabled=_RATE_LIMIT_ENABLED)
1034
886
 
1035
887
  def _bytes_match_extension(data: bytes, ext: str) -> bool:
1036
- """Return True if the file bytes match the claimed extension (or extension has no magic)."""
1037
- ext = (ext or "").lower()
1038
- signatures = _FILE_MAGIC.get(ext)
1039
- if not signatures:
1040
- return True # text-like formats — no reliable magic
1041
- head = data[:16]
1042
- return any(head.startswith(sig) for sig in signatures)
888
+ return _bytes_match_extension_impl(data, ext)
1043
889
 
1044
890
  def require_admin(request: Request) -> tuple[str, Dict]:
1045
891
  users = load_users()
@@ -1133,221 +979,26 @@ def set_user_api_key(email: str, provider: str, key: str) -> None:
1133
979
  users[email] = user
1134
980
  save_users(users)
1135
981
 
1136
- SENSITIVE_PATTERNS = [
1137
- {"key": "rrn", "label": "주민등록번호", "severity": "high", "pattern": r"\b\d{6}[- ]?[1-4]\d{6}\b"},
1138
- {"key": "card", "label": "카드번호", "severity": "high", "pattern": r"\b(?:\d[ -]?){13,19}\b"},
1139
- {"key": "account", "label": "계좌번호", "severity": "medium", "pattern": r"(?:계좌|account|bank).{0,12}\d[\d -]{8,24}"},
1140
- {"key": "password", "label": "비밀번호/인증정보", "severity": "high", "pattern": r"(?:password|passwd|비밀번호|암호|token|api[_ -]?key|secret)\s*[:=]\s*[^\s,;]{4,}"},
1141
- {"key": "email", "label": "이메일", "severity": "low", "pattern": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"},
1142
- {"key": "phone", "label": "전화번호", "severity": "medium", "pattern": r"\b(?:01[016789]|02|0[3-6][1-5])[- ]?\d{3,4}[- ]?\d{4}\b"},
1143
- {"key": "address", "label": "주소", "severity": "medium", "pattern": r"(?:[가-힣]+(?:시|도)\s*)?[가-힣]+(?:시|군|구)\s+[가-힣0-9\s-]+(?:로|길)\s*\d*"},
1144
- {"key": "health", "label": "건강/의료정보", "severity": "medium", "pattern": r"(?:진단|병명|처방|복용|수술|장애|임신|혈액형|알레르기|medical|diagnosis)"},
1145
- ]
1146
-
1147
- SEVERITY_SCORE = {"low": 1, "medium": 2, "high": 3}
1148
-
1149
- def mask_sensitive_text(text: str, matches: List[Dict]) -> str:
1150
- masked = text
1151
- for item in sorted(matches, key=lambda match: match["start"], reverse=True):
1152
- value = masked[item["start"]:item["end"]]
1153
- if len(value) <= 4:
1154
- replacement = "*" * len(value)
1155
- else:
1156
- replacement = value[:2] + "*" * min(len(value) - 4, 12) + value[-2:]
1157
- masked = masked[:item["start"]] + replacement + masked[item["end"]:]
1158
- return masked
1159
-
982
+ # ── Sensitivity analysis — delegated to latticeai.core.audit ──────────────────
1160
983
  def classify_sensitive_message(item: Dict, index: int) -> Dict:
1161
- content = str(item.get("content", ""))
1162
- found = []
1163
- seen = set()
1164
- for rule in SENSITIVE_PATTERNS:
1165
- for match in re.finditer(rule["pattern"], content, flags=re.IGNORECASE):
1166
- key = (rule["key"], match.start(), match.end())
1167
- if key in seen:
1168
- continue
1169
- seen.add(key)
1170
- found.append({
1171
- "type": rule["key"],
1172
- "label": rule["label"],
1173
- "severity": rule["severity"],
1174
- "start": match.start(),
1175
- "end": match.end(),
1176
- })
1177
- severity = "none"
1178
- if found:
1179
- severity = max(found, key=lambda item: SEVERITY_SCORE[item["severity"]])["severity"]
1180
- preview_text = content[:240]
1181
- preview_matches = [match for match in found if match["start"] < len(preview_text)]
1182
- return {
1183
- "index": index,
1184
- "role": item.get("role", ""),
1185
- "user_email": item.get("user_email"),
1186
- "user_nickname": item.get("user_nickname") or item.get("user_email") or "Unknown",
1187
- "timestamp": item.get("timestamp"),
1188
- "sensitivity": severity,
1189
- "labels": sorted({match["label"] for match in found}),
1190
- "risk_fields": found,
1191
- "compliance_fields": [] if found else ["민감정보 미검출"],
1192
- "preview": mask_sensitive_text(preview_text, preview_matches),
1193
- }
984
+ return _classify_sensitive_message(item, index)
1194
985
 
1195
986
  def build_sensitivity_report(history: List[Dict]) -> Dict:
1196
- items = [classify_sensitive_message(item, index) for index, item in enumerate(history)]
1197
- risky_items = [item for item in items if item["risk_fields"]]
1198
- compliant_items = [item for item in items if not item["risk_fields"]]
1199
- field_counts = {}
1200
- user_counts = {}
1201
- severity_counts = {"high": 0, "medium": 0, "low": 0, "none": len(compliant_items)}
1202
- for item in risky_items:
1203
- severity_counts[item["sensitivity"]] += 1
1204
- user_key = item.get("user_email") or item.get("user_nickname") or "Unknown"
1205
- user_counts[user_key] = user_counts.get(user_key, 0) + 1
1206
- for field in item["risk_fields"]:
1207
- field_counts[field["label"]] = field_counts.get(field["label"], 0) + 1
1208
- return {
1209
- "summary": {
1210
- "total_messages": len(items),
1211
- "risky_messages": len(risky_items),
1212
- "compliant_messages": len(compliant_items),
1213
- "risk_rate": round((len(risky_items) / len(items)) * 100, 1) if items else 0,
1214
- "severity_counts": severity_counts,
1215
- "field_counts": field_counts,
1216
- "user_counts": user_counts,
1217
- },
1218
- "risk_fields": risky_items[-30:],
1219
- "compliance_fields": compliant_items[-30:],
1220
- }
1221
-
1222
- AUDIT_DELETE_EVENTS = {"conversation_delete", "history_delete", "user_delete"}
1223
-
1224
- def _audit_user_bucket(email: Optional[str], nickname: Optional[str] = None, users: Optional[Dict] = None) -> Dict:
1225
- user = (users or {}).get(email or "", {})
1226
- return {
1227
- "email": email or "Unknown",
1228
- "nickname": nickname or user.get("nickname") or user.get("name") or email or "Unknown",
1229
- "role": get_user_role(email, users or {}) if email else "unknown",
1230
- "disabled": bool(user.get("disabled")) if user else False,
1231
- "user_messages": 0,
1232
- "assistant_messages": 0,
1233
- "document_uploads": 0,
1234
- "clear_events": 0,
1235
- "delete_events": 0,
1236
- "sensitive_events": 0,
1237
- "high_sensitive_events": 0,
1238
- "total_content_chars": 0,
1239
- "last_activity_at": None,
1240
- }
1241
-
1242
- def _public_audit_event(event: Dict) -> Dict:
1243
- allowed = {
1244
- "event_type",
1245
- "timestamp",
1246
- "role",
1247
- "user_email",
1248
- "user_nickname",
1249
- "source",
1250
- "conversation_id",
1251
- "command",
1252
- "scope",
1253
- "target_email",
1254
- "filename",
1255
- "mime_type",
1256
- "ext",
1257
- "bytes",
1258
- "extracted_chars",
1259
- "graph_node",
1260
- "keep_last",
1261
- "removed",
1262
- "kept",
1263
- "started_at",
1264
- "sensitivity",
1265
- "sensitive_labels",
1266
- "content_preview",
1267
- "content_chars",
1268
- }
1269
- return {key: event.get(key) for key in allowed if key in event}
987
+ return _build_sensitivity_report(history)
1270
988
 
989
+ # ── Admin audit report — delegated to latticeai.core.audit ───────────────────
1271
990
  def build_admin_audit_report(users: Dict) -> Dict:
1272
- events = get_audit_log()
1273
- per_user: Dict[str, Dict] = {}
1274
-
1275
- def ensure_user(email: Optional[str], nickname: Optional[str] = None) -> Dict:
1276
- key = email or nickname or "Unknown"
1277
- if key not in per_user:
1278
- per_user[key] = _audit_user_bucket(email, nickname, users)
1279
- elif nickname and per_user[key].get("nickname") in {"Unknown", email, None}:
1280
- per_user[key]["nickname"] = nickname
1281
- return per_user[key]
1282
-
1283
- for email, user in users.items():
1284
- ensure_user(email, user.get("nickname") or user.get("name"))
1285
-
1286
- summary = {
1287
- "total_events": len(events),
1288
- "chat_events": 0,
1289
- "user_messages": 0,
1290
- "assistant_messages": 0,
1291
- "document_uploads": 0,
1292
- "clear_events": 0,
1293
- "delete_events": 0,
1294
- "sensitive_events": 0,
1295
- "high_sensitive_events": 0,
1296
- }
1297
-
1298
- sensitive_events = []
1299
- deletion_events = []
1300
- for event in events:
1301
- event_type = event.get("event_type")
1302
- email = event.get("user_email")
1303
- user = ensure_user(email, event.get("user_nickname"))
1304
- timestamp = event.get("timestamp")
1305
- if timestamp and (not user["last_activity_at"] or timestamp > user["last_activity_at"]):
1306
- user["last_activity_at"] = timestamp
1307
-
1308
- user["total_content_chars"] += int(event.get("content_chars") or event.get("extracted_chars") or 0)
1309
- sensitivity = event.get("sensitivity") or "none"
1310
- labels = event.get("sensitive_labels") or []
1311
- is_sensitive = sensitivity != "none" or bool(labels)
1312
-
1313
- if event_type == "chat_message":
1314
- summary["chat_events"] += 1
1315
- if event.get("role") == "user":
1316
- summary["user_messages"] += 1
1317
- user["user_messages"] += 1
1318
- elif event.get("role") == "assistant":
1319
- summary["assistant_messages"] += 1
1320
- user["assistant_messages"] += 1
1321
- elif event_type == "document_upload":
1322
- summary["document_uploads"] += 1
1323
- user["document_uploads"] += 1
1324
- elif event_type == "clear_command":
1325
- summary["clear_events"] += 1
1326
- user["clear_events"] += 1
1327
- elif event_type in AUDIT_DELETE_EVENTS:
1328
- summary["delete_events"] += 1
1329
- user["delete_events"] += 1
1330
- deletion_events.append(_public_audit_event(event))
1331
-
1332
- if is_sensitive:
1333
- summary["sensitive_events"] += 1
1334
- user["sensitive_events"] += 1
1335
- sensitive_events.append(_public_audit_event(event))
1336
- if sensitivity == "high":
1337
- summary["high_sensitive_events"] += 1
1338
- user["high_sensitive_events"] += 1
1339
-
1340
- return {
1341
- "summary": summary,
1342
- "per_user": sorted(
1343
- per_user.values(),
1344
- key=lambda item: (item.get("last_activity_at") or "", item.get("user_messages", 0) + item.get("assistant_messages", 0)),
1345
- reverse=True,
1346
- ),
1347
- "recent_events": [_public_audit_event(event) for event in events[-80:]][::-1],
1348
- "sensitive_events": sensitive_events[-80:][::-1],
1349
- "deletion_events": deletion_events[-80:][::-1],
1350
- }
991
+ graph_stats = None
992
+ try:
993
+ if ENABLE_GRAPH and KNOWLEDGE_GRAPH:
994
+ graph_stats = KNOWLEDGE_GRAPH.stats()
995
+ except Exception:
996
+ pass
997
+ return _build_admin_audit_report(
998
+ AUDIT_FILE, users,
999
+ get_user_role=get_user_role,
1000
+ graph_stats=graph_stats,
1001
+ )
1351
1002
 
1352
1003
  router = LLMRouter()
1353
1004
  gardener = PReinforceGardener()
@@ -1432,11 +1083,17 @@ async def lifespan(app: FastAPI):
1432
1083
  print("⏭️ Telegram Bot Bridge disabled for this mode.")
1433
1084
  _spawn(unload_idle_models_loop(), name="unload_idle_models")
1434
1085
  _spawn(autoload_default_model(), name="autoload_default_model")
1086
+ if LOCAL_KG_WATCHER:
1087
+ restored = LOCAL_KG_WATCHER.restore_enabled_sources()
1088
+ if restored.get("restored"):
1089
+ print(f"🕸️ Local knowledge watchers restored: {restored['restored']}")
1435
1090
  except Exception as e:
1436
1091
  print(f"⚠️ Startup sequence failed: {e}")
1437
1092
  try:
1438
1093
  yield
1439
1094
  finally:
1095
+ if LOCAL_KG_WATCHER:
1096
+ LOCAL_KG_WATCHER.stop_all()
1440
1097
  router.unload_all()
1441
1098
  for proc in LOCAL_SERVER_PROCESSES.values():
1442
1099
  try:
@@ -1477,329 +1134,42 @@ if _ICONS_DIR.exists():
1477
1134
  ensure_agent_root()
1478
1135
 
1479
1136
  OPEN_REGISTRATION = env_bool("LATTICEAI_OPEN_REGISTRATION", default=not NETWORK_EXPOSED and not IS_PUBLIC_MODE)
1137
+ INVITE_CODE = env_value("LATTICEAI_INVITE_CODE", "gemma-lattice-ai")
1138
+ INVITE_GATE_ENABLED = env_bool("LATTICEAI_INVITE_GATE_ENABLED", default=False)
1480
1139
 
1481
- @app.post("/register")
1482
- async def register(req: UserRegister, request: Request):
1483
- # 5 registration attempts per IP per hour
1484
- _check_rate_limit(_client_ip(request), "register", max_calls=5, window_secs=3600)
1485
- if not OPEN_REGISTRATION:
1486
- raise HTTPException(status_code=403, detail="회원가입이 비활성화되어 있습니다. 관리자에게 문의하세요.")
1487
- users = load_users()
1488
- if req.email in users:
1489
- raise HTTPException(status_code=400, detail="이미 존재하는 이메일입니다.")
1490
- # First user to register on a fresh server becomes admin automatically
1491
- role = "admin" if not users else "user"
1492
- users[req.email] = {
1493
- "password": hash_password(req.password),
1494
- "name": req.name,
1495
- "nickname": req.nickname,
1496
- "role": role,
1497
- "disabled": False,
1498
- }
1499
- save_users(users)
1500
- msg = "회원가입 성공! 첫 번째 사용자로 관리자 권한이 부여되었습니다." if role == "admin" else "회원가입 성공!"
1501
- return {"status": "ok", "message": msg, "role": role}
1502
-
1503
- @app.post("/login")
1504
- async def login(req: UserLogin, request: Request):
1505
- # 10 login attempts per IP per 5 minutes
1506
- _check_rate_limit(_client_ip(request), "login", max_calls=10, window_secs=300)
1507
- users = load_users()
1508
- user = users.get(req.email)
1509
- if not user or not verify_and_migrate_password(req.email, req.password, user.get("password", ""), users):
1510
- raise HTTPException(status_code=401, detail="이메일 또는 비밀번호가 틀렸습니다.")
1511
- if user.get("disabled"):
1512
- raise HTTPException(status_code=403, detail="비활성화된 계정입니다.")
1513
- role = get_user_role(req.email, users)
1514
- token = create_session(req.email)
1515
- response = JSONResponse(content={
1516
- "status": "ok",
1517
- "nickname": user["nickname"],
1518
- "name": user["name"],
1519
- "email": req.email,
1520
- "role": role,
1521
- "is_admin": role == "admin",
1522
- })
1523
- response.set_cookie(key="session_token", value=token, httponly=True, samesite="lax", max_age=_SESSION_TTL)
1524
- return response
1525
-
1526
- @app.get("/auth/sso/config")
1527
- async def sso_config():
1528
- return public_sso_config()
1529
-
1530
- @app.get("/auth/sso/login")
1531
- async def sso_login():
1532
- from urllib.parse import urlencode
1533
- from fastapi.responses import RedirectResponse as _Redirect
1534
- settings = get_sso_settings()
1535
- discovery = await _get_sso_discovery()
1536
- if not settings.get("enabled") or not discovery:
1537
- raise HTTPException(status_code=503, detail="SSO가 설정되지 않았습니다.")
1538
- state = secrets.token_urlsafe(16)
1539
- _sso_states[state] = time.time()
1540
- params = urlencode({
1541
- "client_id": settings["client_id"],
1542
- "response_type": "code",
1543
- "redirect_uri": settings["redirect_uri"],
1544
- "scope": settings.get("scopes") or "openid email profile",
1545
- "state": state,
1546
- })
1547
- return _Redirect(f"{discovery['authorization_endpoint']}?{params}")
1548
-
1549
- @app.get("/auth/sso/callback")
1550
- async def sso_callback(code: str = "", state: str = "", error: str = ""):
1551
- from fastapi.responses import RedirectResponse as _Redirect
1552
- import base64 as _b64
1553
- if error:
1554
- return _Redirect(f"/?sso_error={error}")
1555
- ts = _sso_states.pop(state, None)
1556
- if ts is None or time.time() - ts > 300:
1557
- raise HTTPException(status_code=400, detail="유효하지 않은 SSO 상태입니다.")
1558
- settings = get_sso_settings()
1559
- discovery = await _get_sso_discovery()
1560
- if not settings.get("enabled") or not discovery:
1561
- raise HTTPException(status_code=503, detail="SSO 설정 오류입니다.")
1562
- import httpx as _httpx
1563
- async with _httpx.AsyncClient() as c:
1564
- r = await c.post(discovery["token_endpoint"], data={
1565
- "grant_type": "authorization_code",
1566
- "code": code,
1567
- "redirect_uri": settings["redirect_uri"],
1568
- "client_id": settings["client_id"],
1569
- "client_secret": settings["client_secret"],
1570
- }, headers={"Accept": "application/json"}, timeout=15)
1571
- tokens = r.json()
1572
- id_token = tokens.get("id_token")
1573
- if not id_token:
1574
- raise HTTPException(status_code=400, detail="ID 토큰을 받지 못했습니다.")
1575
- # Decode JWT payload (no signature verification — trust IdP redirect)
1576
- padded = id_token.split(".")[1] + "=="
1577
- payload = json.loads(_b64.urlsafe_b64decode(padded))
1578
- email = payload.get("email") or payload.get("preferred_username") or payload.get("upn") or ""
1579
- if not email:
1580
- raise HTTPException(status_code=400, detail="이메일을 확인할 수 없습니다.")
1581
- users = load_users()
1582
- if email not in users:
1583
- is_first = len(users) == 0
1584
- users[email] = {
1585
- "password": "",
1586
- "name": payload.get("name", email.split("@")[0]),
1587
- "nickname": payload.get("given_name", email.split("@")[0]),
1588
- "role": "admin" if is_first else "user",
1589
- "disabled": False,
1590
- "sso": True,
1591
- }
1592
- save_users(users)
1593
- if users[email].get("disabled"):
1594
- raise HTTPException(status_code=403, detail="비활성화된 계정입니다.")
1595
- token = create_session(email)
1596
- resp = _Redirect("/chat", status_code=302)
1597
- resp.set_cookie("session_token", token, httponly=True, samesite="lax", max_age=_SESSION_TTL)
1598
- return resp
1599
-
1600
- @app.post("/logout")
1601
- async def logout(request: Request):
1602
- token = _extract_bearer_token(request)
1603
- if token:
1604
- invalidate_session(token)
1605
- response = JSONResponse(content={"status": "ok"})
1606
- response.delete_cookie("session_token")
1607
- return response
1608
-
1609
- class ChangePasswordRequest(BaseModel):
1610
- current_password: str
1611
- new_password: str
1612
-
1613
- @app.post("/account/change-password")
1614
- async def change_password(req: ChangePasswordRequest, request: Request):
1615
- email = require_user(request)
1616
- if not email:
1617
- raise HTTPException(status_code=401, detail="인증이 필요합니다.")
1618
- if len(req.new_password) < 4:
1619
- raise HTTPException(status_code=400, detail="새 비밀번호는 4자 이상이어야 합니다.")
1620
- users = load_users()
1621
- user = users.get(email)
1622
- if not user:
1623
- raise HTTPException(status_code=404, detail="사용자를 찾을 수 없습니다.")
1624
- if not verify_and_migrate_password(email, req.current_password, user.get("password", ""), users):
1625
- raise HTTPException(status_code=401, detail="현재 비밀번호가 틀렸습니다.")
1626
- users[email]["password"] = hash_password(req.new_password)
1627
- save_users(users)
1628
- return {"status": "ok", "message": "비밀번호가 변경되었습니다."}
1629
-
1630
- class UpdateProfileRequest(BaseModel):
1631
- name: Optional[str] = None
1632
- nickname: Optional[str] = None
1633
-
1634
- @app.patch("/account/profile")
1635
- async def update_profile(req: UpdateProfileRequest, request: Request):
1636
- email = require_user(request)
1637
- if not email:
1638
- raise HTTPException(status_code=401, detail="인증이 필요합니다.")
1639
- if req.name is not None and not req.name.strip():
1640
- raise HTTPException(status_code=400, detail="이름을 입력해주세요.")
1641
- if req.nickname is not None and not req.nickname.strip():
1642
- raise HTTPException(status_code=400, detail="닉네임을 입력해주세요.")
1643
- users = load_users()
1644
- user = users.get(email)
1645
- if not user:
1646
- raise HTTPException(status_code=404, detail="사용자를 찾을 수 없습니다.")
1647
- if req.name is not None:
1648
- users[email]["name"] = req.name.strip()
1649
- if req.nickname is not None:
1650
- users[email]["nickname"] = req.nickname.strip()
1651
- save_users(users)
1652
- return {"status": "ok", "name": users[email]["name"], "nickname": users[email]["nickname"]}
1653
-
1654
- @app.get("/account/profile")
1655
- async def get_profile(request: Request):
1656
- email = require_user(request)
1657
- if not email:
1658
- raise HTTPException(status_code=401, detail="인증이 필요합니다.")
1659
- users = load_users()
1660
- user = users.get(email)
1661
- if not user:
1662
- raise HTTPException(status_code=404, detail="사용자를 찾을 수 없습니다.")
1663
- role = get_user_role(email, users)
1664
- return {"email": email, "name": user.get("name", ""), "nickname": user.get("nickname", ""),
1665
- "role": role, "is_admin": role == "admin"}
1666
-
1667
- @app.get("/admin/summary")
1668
- async def admin_summary(request: Request):
1669
- _, users = require_admin(request)
1670
- history = get_history()
1671
- user_messages = [item for item in history if item.get("role") == "user"]
1672
- assistant_messages = [item for item in history if item.get("role") == "assistant"]
1673
- last_timestamp = history[-1].get("timestamp") if history else None
1674
- return {
1675
- "total_users": len(users),
1676
- "active_users": sum(1 for user in users.values() if not user.get("disabled")),
1677
- "admin_users": sum(1 for email in users if get_user_role(email, users) == "admin"),
1678
- "total_messages": len(history),
1679
- "user_messages": len(user_messages),
1680
- "assistant_messages": len(assistant_messages),
1681
- "last_message_at": last_timestamp,
1682
- }
1683
-
1684
- @app.get("/admin/stats")
1685
- async def admin_stats(request: Request):
1686
- require_admin(request)
1687
- history = get_history()
1688
- from collections import defaultdict
1689
- daily: dict = defaultdict(lambda: {"user": 0, "assistant": 0})
1690
- for item in history:
1691
- ts = item.get("timestamp", "")
1692
- day = ts[:10] if ts else "unknown"
1693
- role = item.get("role", "")
1694
- if role in ("user", "assistant"):
1695
- daily[day][role] += 1
1696
- sorted_days = sorted(daily.keys())[-14:]
1697
- return {
1698
- "daily": [{"date": d, "user": daily[d]["user"], "assistant": daily[d]["assistant"]} for d in sorted_days]
1699
- }
1700
-
1701
- @app.get("/admin/users")
1702
- async def admin_users(request: Request):
1703
- _, users = require_admin(request)
1704
- return [public_user(email, user, users) for email, user in users.items()]
1705
-
1706
- @app.get("/admin/sensitivity")
1707
- async def admin_sensitivity(request: Request):
1708
- require_admin(request)
1709
- return build_sensitivity_report(get_history())
1710
-
1711
- @app.get("/admin/audit")
1712
- async def admin_audit(request: Request):
1713
- _, users = require_admin(request)
1714
- report = build_admin_audit_report(users)
1140
+ # ── Auth & Admin routers (latticeai.api) ─────────────────────────────────────
1141
+ app.include_router(create_auth_router(
1142
+ load_users=load_users, save_users=save_users,
1143
+ hash_password=hash_password, verify_and_migrate=verify_and_migrate_password,
1144
+ create_session=create_session, get_session_email=get_session_email,
1145
+ invalidate_session=invalidate_session, extract_bearer_token=_extract_bearer_token,
1146
+ get_user_role=get_user_role, require_user=require_user,
1147
+ check_ip_rate_limit=_check_rate_limit, client_ip=_client_ip,
1148
+ get_sso_settings=get_sso_settings, get_sso_discovery=_get_sso_discovery,
1149
+ public_sso_config=public_sso_config,
1150
+ open_registration=OPEN_REGISTRATION, session_ttl=_SESSION_TTL,
1151
+ ))
1152
+
1153
+ def _graph_stats_safe():
1715
1154
  try:
1716
- report["graph"] = KNOWLEDGE_GRAPH.stats() if (ENABLE_GRAPH and KNOWLEDGE_GRAPH) else {"disabled": True}
1155
+ return KNOWLEDGE_GRAPH.stats() if (ENABLE_GRAPH and KNOWLEDGE_GRAPH) else {"disabled": True}
1717
1156
  except Exception as e:
1718
- logging.warning("knowledge graph stats for audit failed: %s", e)
1719
- report["graph"] = {"error": str(e)}
1720
- return report
1721
-
1722
- @app.get("/vpc/status")
1723
- async def vpc_status(request: Request):
1724
- require_user(request)
1725
- return load_vpc_config()
1726
-
1727
- @app.patch("/admin/vpc")
1728
- async def admin_update_vpc(req: VpcConfigUpdate, request: Request):
1729
- require_admin(request)
1730
- config = load_vpc_config()
1731
- update = req.dict(exclude_unset=True)
1732
- if "private_subnets" in update and update["private_subnets"] is not None:
1733
- update["private_subnets"] = [item.strip() for item in update["private_subnets"] if item.strip()]
1734
- config.update(update)
1735
- save_vpc_config(config)
1736
- return config
1737
-
1738
- @app.patch("/admin/users/{email:path}")
1739
- async def admin_update_user(email: str, req: AdminUserUpdate, request: Request):
1740
- admin_email, users = require_admin(request)
1741
- if email not in users:
1742
- raise HTTPException(status_code=404, detail="사용자를 찾을 수 없습니다.")
1743
- before = public_user(email, users[email], users)
1744
- if req.role is not None:
1745
- if req.role not in {"admin", "user"}:
1746
- raise HTTPException(status_code=400, detail="role은 admin 또는 user만 가능합니다.")
1747
- users[email]["role"] = req.role
1748
- if req.disabled is not None:
1749
- if email == admin_email and req.disabled:
1750
- raise HTTPException(status_code=400, detail="자기 자신은 비활성화할 수 없습니다.")
1751
- users[email]["disabled"] = req.disabled
1752
- save_users(users)
1753
- after = public_user(email, users[email], users)
1754
- append_audit_event("user_update", user_email=admin_email, target_email=email, before=before, after=after)
1755
- return after
1756
-
1757
- @app.delete("/admin/users/{email:path}")
1758
- async def admin_delete_user(email: str, request: Request):
1759
- admin_email, users = require_admin(request)
1760
- if email == admin_email:
1761
- raise HTTPException(status_code=400, detail="자기 자신은 삭제할 수 없습니다.")
1762
- if email not in users:
1763
- raise HTTPException(status_code=404, detail="사용자를 찾을 수 없습니다.")
1764
- deleted = public_user(email, users[email], users)
1765
- append_audit_event("user_delete", user_email=admin_email, target_email=email, deleted_user=deleted)
1766
- del users[email]
1767
- save_users(users)
1768
- return {"status": "ok", "deleted": deleted}
1769
-
1770
- @app.get("/admin/invite-link")
1771
- async def admin_invite_link(request: Request):
1772
- require_admin(request)
1773
- host = request.headers.get("host", f"localhost:{DEFAULT_PORT}")
1774
- scheme = "https" if request.headers.get("x-forwarded-proto") == "https" else "http"
1775
- if INVITE_GATE_ENABLED:
1776
- url = f"{scheme}://{host}/?code={INVITE_CODE}"
1777
- else:
1778
- url = f"{scheme}://{host}/"
1779
- return {"invite_url": url, "invite_code": INVITE_CODE, "gate_enabled": INVITE_GATE_ENABLED}
1780
-
1781
- @app.get("/admin/sso")
1782
- async def admin_sso(request: Request):
1783
- require_admin(request)
1784
- return public_sso_config()
1785
-
1786
- @app.patch("/admin/sso")
1787
- async def admin_update_sso(req: SsoConfigUpdate, request: Request):
1788
- admin_email, _ = require_admin(request)
1789
- update = req.dict(exclude_unset=True)
1790
- saved = save_sso_config(update)
1791
- append_audit_event(
1792
- "sso_config_update",
1793
- user_email=admin_email,
1794
- provider_name=saved.get("provider_name"),
1795
- discovery_url=saved.get("discovery_url"),
1796
- enabled=bool(saved.get("enabled")),
1797
- )
1798
- return public_sso_config(saved)
1799
-
1800
- # ── Invitation Logic ────────────────────────────────────────────────────────
1801
- INVITE_CODE = env_value("LATTICEAI_INVITE_CODE", "gemma-lattice-ai")
1802
- INVITE_GATE_ENABLED = env_bool("LATTICEAI_INVITE_GATE_ENABLED", default=False)
1157
+ return {"error": str(e)}
1158
+
1159
+ app.include_router(create_admin_router(
1160
+ require_admin=require_admin, require_user=require_user,
1161
+ load_users=load_users, save_users=save_users,
1162
+ get_user_role=get_user_role, get_history=get_history,
1163
+ public_user=public_user, load_vpc_config=load_vpc_config,
1164
+ save_vpc_config=save_vpc_config,
1165
+ build_admin_audit_report=build_admin_audit_report,
1166
+ build_sensitivity_report=build_sensitivity_report,
1167
+ append_audit_event=append_audit_event,
1168
+ public_sso_config=public_sso_config, save_sso_config=save_sso_config,
1169
+ get_graph_stats=_graph_stats_safe, enable_graph=ENABLE_GRAPH,
1170
+ invite_code=INVITE_CODE, invite_gate_enabled=INVITE_GATE_ENABLED,
1171
+ default_port=DEFAULT_PORT,
1172
+ ))
1803
1173
 
1804
1174
  @app.get("/")
1805
1175
  async def root(request: Request, code: Optional[str] = None, authorized: Optional[str] = Cookie(None)):
@@ -2224,83 +1594,114 @@ ENGINE_INSTALLERS = {
2224
1594
 
2225
1595
  ENGINE_MODEL_CATALOG = {
2226
1596
  "local_mlx": [
1597
+ {"id": "mlx-community/SmolLM-1.7B-Instruct-4bit", "name": "SmolLM 1.7B", "family": "SmolLM", "tag": "local-light", "size": "963MB", "pullable": True},
1598
+ {"id": "mlx-community/gemma-3-1b-it-4bit", "name": "Gemma 3 1B", "family": "Gemma 3", "tag": "local-light", "size": "733MB", "pullable": True},
1599
+ {"id": "mlx-community/Llama-3.2-1B-Instruct-4bit", "name": "Llama 3.2 1B", "family": "Llama 3.x", "tag": "local-light", "size": "1.3GB", "pullable": True},
1600
+ {"id": "mlx-community/gemma-2-2b-it-4bit", "name": "Gemma 2 2B", "family": "Gemma 2", "tag": "local-light", "size": "1.6GB", "pullable": True},
2227
1601
  {"id": "mlx-community/gemma-4-e2b-4bit", "name": "Gemma 4 E2B Base", "family": "Gemma 4", "tag": "local-vlm", "size": "3.6GB", "pullable": True},
2228
1602
  {"id": "mlx-community/gemma-4-e2b-it-4bit", "name": "Gemma 4 E2B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "3.6GB", "pullable": True},
2229
1603
  {"id": "mlx-community/gemma-4-e4b-4bit", "name": "Gemma 4 E4B Base", "family": "Gemma 4", "tag": "local-vlm", "size": "5.2GB", "pullable": True},
2230
1604
  {"id": "mlx-community/gemma-4-e4b-it-4bit", "name": "Gemma 4 E4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "5.2GB", "pullable": True},
2231
- {"id": "mlx-community/gemma-4-26b-a4b-it-4bit", "name": "Gemma 4 26B A4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "Apple Silicon", "pullable": True},
2232
- {"id": "Jiunsong/supergemma4-26b-abliterated-multimodal-mlx-4bit", "name": "SuperGemma4 26B Abliterated Multimodal", "family": "Gemma 4", "tag": "local-vlm", "size": "Apple Silicon", "pullable": True},
2233
- {"id": "mlx-community/Qwen2.5-Coder-3B-Instruct-4bit", "name": "Qwen 2.5 Coder 3B", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "2.1GB", "pullable": True},
2234
- {"id": "mlx-community/Qwen2.5-Coder-7B-Instruct-4bit", "name": "Qwen 2.5 Coder 7B", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "4.3GB", "pullable": True},
2235
- {"id": "mlx-community/Qwen2.5-Coder-14B-Instruct-4bit", "name": "Qwen 2.5 Coder 14B", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "8.5GB", "pullable": True},
2236
- {"id": "mlx-community/Qwen2.5-3B-Instruct-4bit", "name": "Qwen 2.5 3B", "family": "Qwen 2.5", "tag": "local-general", "size": "2.1GB", "pullable": True},
2237
- {"id": "mlx-community/Qwen2.5-7B-Instruct-4bit", "name": "Qwen 2.5 7B", "family": "Qwen 2.5", "tag": "local-general", "size": "4.3GB", "pullable": True},
2238
- {"id": "mlx-community/Qwen2.5-14B-Instruct-4bit", "name": "Qwen 2.5 14B", "family": "Qwen 2.5", "tag": "local-general", "size": "8.5GB", "pullable": True},
1605
+ {"id": "mlx-community/Qwen3-VL-4B-Instruct-4bit", "name": "Qwen3-VL 4B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "2.7GB", "pullable": True},
1606
+ {"id": "mlx-community/Qwen3-VL-8B-Instruct-4bit", "name": "Qwen3-VL 8B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "4.8GB", "pullable": True},
1607
+ {"id": "mlx-community/Qwen2.5-VL-7B-Instruct-4bit", "name": "Qwen2.5-VL 7B", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "4.4GB", "pullable": True},
1608
+ {"id": "mlx-community/gemma-3-4b-it-4bit", "name": "Gemma 3 4B", "family": "Gemma 3", "tag": "local-vlm", "size": "3.3GB", "pullable": True},
2239
1609
  {"id": "mlx-community/Llama-3.2-3B-Instruct-4bit", "name": "Llama 3.2 3B", "family": "Llama 3.x", "tag": "local-general", "size": "2.0GB", "pullable": True},
2240
1610
  {"id": "mlx-community/Llama-3.1-8B-Instruct-4bit", "name": "Llama 3.1 8B", "family": "Llama 3.1", "tag": "local-general", "size": "4.7GB", "pullable": True},
1611
+ {"id": "mlx-community/gemma-2-9b-it-4bit", "name": "Gemma 2 9B", "family": "Gemma 2", "tag": "local-general", "size": "5.4GB", "pullable": True},
1612
+ {"id": "mlx-community/gemma-3-12b-it-4bit", "name": "Gemma 3 12B", "family": "Gemma 3", "tag": "local-vlm", "size": "8.0GB", "pullable": True},
1613
+ {"id": "mlx-community/Phi-3.5-mini-instruct-4bit", "name": "Phi 3.5 Mini", "family": "Phi", "tag": "local-coding", "size": "2.2GB", "pullable": True},
1614
+ {"id": "mlx-community/Phi-4-mini-instruct-4bit", "name": "Phi 4 Mini", "family": "Phi", "tag": "local-coding", "size": "2.2GB", "pullable": True},
1615
+ {"id": "mlx-community/phi-4-4bit", "name": "Phi 4", "family": "Phi", "tag": "local-coding", "size": "8.3GB", "pullable": True},
1616
+ {"id": "mlx-community/Mistral-7B-Instruct-v0.3-4bit", "name": "Mistral 7B Instruct v0.3", "family": "Mistral", "tag": "local-general", "size": "4.1GB", "pullable": True},
1617
+ {"id": "mlx-community/Ministral-8B-Instruct-2410-4bit", "name": "Ministral 8B Instruct", "family": "Mistral", "tag": "local-general", "size": "4.5GB", "pullable": True},
1618
+ {"id": "mlx-community/Mistral-Small-24B-Instruct-2501-4bit", "name": "Mistral Small 24B", "family": "Mistral", "tag": "local-large", "size": "13.3GB", "pullable": True},
1619
+ {"id": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", "name": "Qwen2.5 Coder 32B", "family": "Qwen2.5", "tag": "local-coding", "size": "18.5GB", "pullable": True},
1620
+ {"id": "mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit", "name": "Qwen3-VL 30B A3B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "18GB", "pullable": True},
1621
+ {"id": "mlx-community/gemma-3-27b-it-4bit", "name": "Gemma 3 27B", "family": "Gemma 3", "tag": "local-vlm", "size": "17GB", "pullable": True},
1622
+ {"id": "mlx-community/gemma-4-26b-a4b-it-4bit", "name": "Gemma 4 26B A4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "15.6GB", "pullable": True},
2241
1623
  {"id": "mlx-community/Llama-3.3-70B-Instruct-4bit", "name": "Llama 3.3 70B", "family": "Llama 3.x", "tag": "local-general", "size": "40GB+", "pullable": True},
2242
1624
  {"id": "mlx-community/Llama-3.1-70B-Instruct-4bit", "name": "Llama 3.1 70B", "family": "Llama 3.1", "tag": "local-general", "size": "40GB+", "pullable": True},
2243
- {"id": "mlx-community/Phi-3.5-mini-instruct-4bit", "name": "Phi 3.5 Mini", "family": "Phi", "tag": "local-light", "size": "2.2GB", "pullable": True},
2244
- {"id": "mlx-community/DeepSeek-R1-Distill-Qwen-7B-4bit", "name": "DeepSeek R1 Distill 7B", "family": "DeepSeek", "tag": "reasoning", "size": "4.3GB", "pullable": True},
2245
1625
  ],
2246
1626
  "ollama": [
1627
+ {"id": "ollama:qwen3-vl:4b", "name": "Qwen3-VL 4B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
1628
+ {"id": "ollama:qwen3-vl:8b", "name": "Qwen3-VL 8B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
1629
+ {"id": "ollama:qwen3-vl:30b", "name": "Qwen3-VL 30B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
1630
+ {"id": "ollama:qwen3:8b", "name": "Qwen3 8B via Ollama", "family": "Qwen", "tag": "local-server", "size": "pull required", "pullable": True},
1631
+ {"id": "ollama:qwen2.5-coder:14b", "name": "Qwen2.5 Coder 14B via Ollama", "family": "Qwen", "tag": "local-coding", "size": "pull required", "pullable": True},
1632
+ {"id": "ollama:gemma3:1b", "name": "Gemma 3 1B via Ollama", "family": "Gemma", "tag": "local-light", "size": "pull required", "pullable": True},
2247
1633
  {"id": "ollama:gemma3:4b", "name": "Gemma 3 4B via Ollama", "family": "Gemma", "tag": "local-server", "size": "pull required", "pullable": True},
2248
1634
  {"id": "ollama:gemma3:4b-it-q4_K_M", "name": "Gemma 3 4B q4_K_M via Ollama", "family": "Gemma", "tag": "quantized", "size": "pull required", "pullable": True},
2249
1635
  {"id": "ollama:gemma3:12b", "name": "Gemma 3 12B via Ollama", "family": "Gemma", "tag": "local-server", "size": "pull required", "pullable": True},
2250
1636
  {"id": "ollama:gemma3:12b-it-q4_K_M", "name": "Gemma 3 12B q4_K_M via Ollama", "family": "Gemma", "tag": "quantized", "size": "pull required", "pullable": True},
2251
- {"id": "ollama:qwen2.5:3b", "name": "Qwen 2.5 3B via Ollama", "family": "Qwen 2.5", "tag": "local-server", "size": "pull required", "pullable": True},
2252
- {"id": "ollama:qwen2.5:7b", "name": "Qwen 2.5 7B via Ollama", "family": "Qwen 2.5", "tag": "local-server", "size": "pull required", "pullable": True},
2253
- {"id": "ollama:qwen2.5:14b", "name": "Qwen 2.5 14B via Ollama", "family": "Qwen 2.5", "tag": "local-server", "size": "pull required", "pullable": True},
2254
- {"id": "ollama:qwen2.5:32b", "name": "Qwen 2.5 32B via Ollama", "family": "Qwen 2.5", "tag": "local-server", "size": "pull required", "pullable": True},
2255
- {"id": "ollama:qwen2.5-coder:7b", "name": "Qwen 2.5 Coder 7B via Ollama", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "pull required", "pullable": True},
2256
- {"id": "ollama:qwen2.5-coder:14b", "name": "Qwen 2.5 Coder 14B via Ollama", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "pull required", "pullable": True},
1637
+ {"id": "ollama:gemma3:27b", "name": "Gemma 3 27B via Ollama", "family": "Gemma", "tag": "local-large", "size": "pull required", "pullable": True},
1638
+ {"id": "ollama:llama3.2:1b", "name": "Llama 3.2 1B via Ollama", "family": "Llama 3.x", "tag": "local-light", "size": "pull required", "pullable": True},
2257
1639
  {"id": "ollama:llama3.2:3b", "name": "Llama 3.2 3B via Ollama", "family": "Llama 3.x", "tag": "local-server", "size": "pull required", "pullable": True},
2258
1640
  {"id": "ollama:llama3.1:8b", "name": "Llama 3.1 8B via Ollama", "family": "Llama 3.1", "tag": "local-server", "size": "pull required", "pullable": True},
2259
1641
  {"id": "ollama:llama3.1:8b-instruct-q4_0", "name": "Llama 3.1 8B q4_0 via Ollama", "family": "Llama 3.1", "tag": "quantized", "size": "pull required", "pullable": True},
2260
1642
  {"id": "ollama:llama3.1:8b-instruct-q8_0", "name": "Llama 3.1 8B q8_0 via Ollama", "family": "Llama 3.1", "tag": "quantized", "size": "pull required", "pullable": True},
2261
1643
  {"id": "ollama:llama3.1:70b", "name": "Llama 3.1 70B via Ollama", "family": "Llama 3.1", "tag": "local-server", "size": "pull required", "pullable": True},
1644
+ {"id": "ollama:llama3.3:70b", "name": "Llama 3.3 70B via Ollama", "family": "Llama 3.x", "tag": "local-large", "size": "pull required", "pullable": True},
1645
+ {"id": "ollama:mistral:7b", "name": "Mistral 7B via Ollama", "family": "Mistral", "tag": "local-server", "size": "pull required", "pullable": True},
1646
+ {"id": "ollama:mixtral:8x7b", "name": "Mixtral 8x7B via Ollama", "family": "Mistral", "tag": "local-large", "size": "pull required", "pullable": True},
1647
+ {"id": "ollama:phi4-mini", "name": "Phi 4 Mini via Ollama", "family": "Phi", "tag": "local-coding", "size": "pull required", "pullable": True},
1648
+ {"id": "ollama:phi4", "name": "Phi 4 via Ollama", "family": "Phi", "tag": "local-coding", "size": "pull required", "pullable": True},
1649
+ {"id": "ollama:smollm2:1.7b", "name": "SmolLM2 1.7B via Ollama", "family": "SmolLM", "tag": "local-light", "size": "pull required", "pullable": True},
2262
1650
  ],
2263
1651
  "vllm": [
2264
- {"id": "vllm:Qwen/Qwen2.5-0.5B-Instruct-AWQ", "name": "Qwen 2.5 0.5B AWQ via vLLM", "family": "Qwen 2.5", "tag": "local-light", "size": "0.5B", "pullable": True},
1652
+ {"id": "vllm:Qwen/Qwen3-VL-4B-Instruct", "name": "Qwen3-VL 4B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
1653
+ {"id": "vllm:Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen3-VL 8B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
1654
+ {"id": "vllm:Qwen/Qwen3-VL-30B-A3B-Instruct", "name": "Qwen3-VL 30B A3B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
1655
+ {"id": "vllm:Qwen/Qwen2.5-VL-7B-Instruct", "name": "Qwen2.5-VL 7B via vLLM", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
2265
1656
  {"id": "vllm:google/gemma-2-2b", "name": "Gemma 2 2B Base via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2266
1657
  {"id": "vllm:google/gemma-2-2b-it", "name": "Gemma 2 2B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2267
1658
  {"id": "vllm:google/gemma-2-9b", "name": "Gemma 2 9B Base via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2268
1659
  {"id": "vllm:google/gemma-2-9b-it", "name": "Gemma 2 9B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2269
- {"id": "vllm:Qwen/Qwen2.5-3B-Instruct", "name": "Qwen 2.5 3B via vLLM", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2270
- {"id": "vllm:Qwen/Qwen2.5-7B-Instruct", "name": "Qwen 2.5 7B via vLLM", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2271
- {"id": "vllm:Qwen/Qwen2.5-14B-Instruct", "name": "Qwen 2.5 14B via vLLM", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2272
- {"id": "vllm:Qwen/Qwen2.5-32B-Instruct", "name": "Qwen 2.5 32B via vLLM", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2273
- {"id": "vllm:Qwen/Qwen2.5-Coder-7B-Instruct", "name": "Qwen 2.5 Coder 7B via vLLM", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "server model", "pullable": True},
2274
- {"id": "vllm:Qwen/Qwen2.5-Coder-14B-Instruct", "name": "Qwen 2.5 Coder 14B via vLLM", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "server model", "pullable": True},
1660
+ {"id": "vllm:google/gemma-3-4b-it", "name": "Gemma 3 4B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
1661
+ {"id": "vllm:google/gemma-3-12b-it", "name": "Gemma 3 12B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
1662
+ {"id": "vllm:microsoft/Phi-3.5-mini-instruct", "name": "Phi 3.5 Mini via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
1663
+ {"id": "vllm:microsoft/Phi-4-mini-instruct", "name": "Phi 4 Mini via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
1664
+ {"id": "vllm:microsoft/phi-4", "name": "Phi 4 via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
1665
+ {"id": "vllm:mistralai/Mistral-7B-Instruct-v0.3", "name": "Mistral 7B via vLLM", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
1666
+ {"id": "vllm:mistralai/Ministral-8B-Instruct-2410", "name": "Ministral 8B via vLLM", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
1667
+ {"id": "vllm:mistralai/Mistral-Small-24B-Instruct-2501", "name": "Mistral Small 24B via vLLM", "family": "Mistral", "tag": "local-large", "size": "server model", "pullable": True},
2275
1668
  {"id": "vllm:meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B via vLLM", "family": "Llama 3.x", "tag": "local-server", "size": "server model", "pullable": True},
2276
1669
  {"id": "vllm:meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B via vLLM", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
1670
+ {"id": "vllm:meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B via vLLM", "family": "Llama 3.x", "tag": "local-large", "size": "server model", "pullable": True},
2277
1671
  {"id": "vllm:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B via vLLM", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
2278
1672
  ],
2279
1673
  "lmstudio": [
2280
- {"id": "lmstudio:https://huggingface.co/lmstudio-community/Qwen2.5-0.5B-Instruct-GGUF", "name": "Qwen 2.5 0.5B GGUF via LM Studio", "family": "Qwen 2.5", "tag": "local-light", "size": "0.5B", "pullable": True},
1674
+ {"id": "lmstudio:Qwen/Qwen3-VL-4B-Instruct", "name": "Qwen3-VL 4B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
1675
+ {"id": "lmstudio:Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen3-VL 8B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
1676
+ {"id": "lmstudio:Qwen/Qwen3-VL-30B-A3B-Instruct", "name": "Qwen3-VL 30B A3B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
1677
+ {"id": "lmstudio:Qwen/Qwen2.5-VL-7B-Instruct", "name": "Qwen2.5-VL 7B via LM Studio", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
2281
1678
  {"id": "lmstudio:google/gemma-2-2b-it", "name": "Gemma 2 2B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2282
1679
  {"id": "lmstudio:google/gemma-2-9b-it", "name": "Gemma 2 9B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2283
- {"id": "lmstudio:Qwen/Qwen2.5-3B-Instruct", "name": "Qwen 2.5 3B via LM Studio", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2284
- {"id": "lmstudio:Qwen/Qwen2.5-7B-Instruct", "name": "Qwen 2.5 7B via LM Studio", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2285
- {"id": "lmstudio:Qwen/Qwen2.5-14B-Instruct", "name": "Qwen 2.5 14B via LM Studio", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2286
- {"id": "lmstudio:Qwen/Qwen2.5-32B-Instruct", "name": "Qwen 2.5 32B via LM Studio", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2287
- {"id": "lmstudio:Qwen/Qwen2.5-Coder-7B-Instruct", "name": "Qwen 2.5 Coder 7B via LM Studio", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "server model", "pullable": True},
2288
- {"id": "lmstudio:Qwen/Qwen2.5-Coder-14B-Instruct", "name": "Qwen 2.5 Coder 14B via LM Studio", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "server model", "pullable": True},
1680
+ {"id": "lmstudio:google/gemma-3-4b-it", "name": "Gemma 3 4B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
1681
+ {"id": "lmstudio:google/gemma-3-12b-it", "name": "Gemma 3 12B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
1682
+ {"id": "lmstudio:microsoft/Phi-3.5-mini-instruct", "name": "Phi 3.5 Mini via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
1683
+ {"id": "lmstudio:microsoft/Phi-4-mini-instruct", "name": "Phi 4 Mini via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
1684
+ {"id": "lmstudio:microsoft/phi-4", "name": "Phi 4 via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
1685
+ {"id": "lmstudio:mistralai/Mistral-7B-Instruct-v0.3", "name": "Mistral 7B via LM Studio", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
1686
+ {"id": "lmstudio:mistralai/Ministral-8B-Instruct-2410", "name": "Ministral 8B via LM Studio", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
1687
+ {"id": "lmstudio:mistralai/Mistral-Small-24B-Instruct-2501", "name": "Mistral Small 24B via LM Studio", "family": "Mistral", "tag": "local-large", "size": "server model", "pullable": True},
2289
1688
  {"id": "lmstudio:meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B via LM Studio", "family": "Llama 3.x", "tag": "local-server", "size": "server model", "pullable": True},
2290
1689
  {"id": "lmstudio:meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B via LM Studio", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
1690
+ {"id": "lmstudio:meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B via LM Studio", "family": "Llama 3.x", "tag": "local-large", "size": "server model", "pullable": True},
2291
1691
  {"id": "lmstudio:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B via LM Studio", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
2292
1692
  ],
2293
1693
  "llamacpp": [
2294
- {"id": "llamacpp:lmstudio-community/Qwen2.5-0.5B-Instruct-GGUF", "name": "Qwen 2.5 0.5B GGUF via llama.cpp", "family": "Qwen 2.5", "tag": "gguf-q4", "size": "0.5B", "pullable": True},
1694
+ {"id": "llamacpp:Qwen/Qwen3-VL-4B-Instruct-GGUF", "name": "Qwen3-VL 4B GGUF via llama.cpp", "family": "Qwen3-VL", "tag": "gguf-vlm", "size": "gguf", "pullable": True},
1695
+ {"id": "llamacpp:Qwen/Qwen3-VL-8B-Instruct-GGUF", "name": "Qwen3-VL 8B GGUF via llama.cpp", "family": "Qwen3-VL", "tag": "gguf-vlm", "size": "gguf", "pullable": True},
2295
1696
  {"id": "llamacpp:unsloth/gemma-2-2b-it-GGUF", "name": "Gemma 2 2B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
2296
1697
  {"id": "llamacpp:unsloth/gemma-2-9b-it-GGUF", "name": "Gemma 2 9B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
2297
- {"id": "llamacpp:Qwen/Qwen2.5-7B-Instruct-GGUF", "name": "Qwen 2.5 7B GGUF via llama.cpp", "family": "Qwen 2.5", "tag": "local-server", "size": "gguf", "pullable": True},
2298
- {"id": "llamacpp:Qwen/Qwen2.5-14B-Instruct-GGUF", "name": "Qwen 2.5 14B GGUF via llama.cpp", "family": "Qwen 2.5", "tag": "local-server", "size": "gguf", "pullable": True},
2299
- {"id": "llamacpp:Qwen/Qwen2.5-32B-Instruct-GGUF", "name": "Qwen 2.5 32B GGUF via llama.cpp", "family": "Qwen 2.5", "tag": "gguf-q4", "size": "gguf", "pullable": True},
2300
- {"id": "llamacpp:Qwen/Qwen2.5-Coder-7B-Instruct-GGUF", "name": "Qwen 2.5 Coder 7B GGUF via llama.cpp", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "gguf", "pullable": True},
2301
- {"id": "llamacpp:Qwen/Qwen2.5-Coder-14B-Instruct-GGUF", "name": "Qwen 2.5 Coder 14B GGUF via llama.cpp", "family": "Qwen 2.5 Coder", "tag": "gguf-q4", "size": "gguf", "pullable": True},
1698
+ {"id": "llamacpp:unsloth/gemma-3-4b-it-GGUF", "name": "Gemma 3 4B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
1699
+ {"id": "llamacpp:bartowski/Mistral-7B-Instruct-v0.3-GGUF", "name": "Mistral 7B GGUF via llama.cpp", "family": "Mistral", "tag": "gguf-q4", "size": "gguf", "pullable": True},
1700
+ {"id": "llamacpp:bartowski/Phi-3.5-mini-instruct-GGUF", "name": "Phi 3.5 Mini GGUF via llama.cpp", "family": "Phi", "tag": "gguf-q4", "size": "gguf", "pullable": True},
1701
+ {"id": "llamacpp:bartowski/phi-4-GGUF", "name": "Phi 4 GGUF via llama.cpp", "family": "Phi", "tag": "gguf-q4", "size": "gguf", "pullable": True},
2302
1702
  {"id": "llamacpp:bartowski/Llama-3.2-3B-Instruct-GGUF", "name": "Llama 3.2 3B GGUF via llama.cpp", "family": "Llama 3.x", "tag": "gguf-q4", "size": "gguf", "pullable": True},
2303
1703
  {"id": "llamacpp:bartowski/Llama-3.1-8B-Instruct-GGUF", "name": "Llama 3.1 8B GGUF via llama.cpp", "family": "Llama 3.1", "tag": "local-server", "size": "gguf", "pullable": True},
1704
+ {"id": "llamacpp:bartowski/Llama-3.3-70B-Instruct-GGUF", "name": "Llama 3.3 70B GGUF via llama.cpp", "family": "Llama 3.x", "tag": "local-large", "size": "gguf", "pullable": True},
2304
1705
  {"id": "llamacpp:bartowski/Llama-3.1-70B-Instruct-GGUF", "name": "Llama 3.1 70B GGUF via llama.cpp", "family": "Llama 3.1", "tag": "local-server", "size": "gguf", "pullable": True},
2305
1706
  ],
2306
1707
  }
@@ -2326,8 +1727,40 @@ VLLM_METAL_BIN = VLLM_METAL_ENV / "bin" / "vllm"
2326
1727
  VLLM_METAL_PYTHON = VLLM_METAL_ENV / "bin" / "python"
2327
1728
  LMSTUDIO_BUNDLED_CLI = Path("/Applications/LM Studio.app/Contents/Resources/app/.webpack/lms")
2328
1729
 
1730
+ def windows_binary_candidates(binary: str) -> List[Path]:
1731
+ local_appdata = os.environ.get("LOCALAPPDATA", "")
1732
+ program_files = os.environ.get("ProgramFiles", r"C:\Program Files")
1733
+ program_files_x86 = os.environ.get("ProgramFiles(x86)", r"C:\Program Files (x86)")
1734
+ candidates = {
1735
+ "ollama": [
1736
+ Path(local_appdata) / "Programs" / "Ollama" / "ollama.exe" if local_appdata else None,
1737
+ Path(program_files) / "Ollama" / "ollama.exe",
1738
+ ],
1739
+ "lms": [
1740
+ Path(local_appdata) / "Programs" / "LM Studio" / "resources" / "app" / ".webpack" / "lms.exe" if local_appdata else None,
1741
+ Path(program_files) / "LM Studio" / "resources" / "app" / ".webpack" / "lms.exe",
1742
+ ],
1743
+ "nvidia-smi": [
1744
+ Path(program_files) / "NVIDIA Corporation" / "NVSMI" / "nvidia-smi.exe",
1745
+ Path(program_files_x86) / "NVIDIA Corporation" / "NVSMI" / "nvidia-smi.exe",
1746
+ ],
1747
+ }
1748
+ return [item for item in candidates.get(binary, []) if item is not None]
1749
+
1750
+
1751
+ def local_binary(binary: str) -> Optional[str]:
1752
+ found = shutil.which(binary)
1753
+ if found:
1754
+ return found
1755
+ if platform.system() == "Windows":
1756
+ for candidate in windows_binary_candidates(binary):
1757
+ if candidate.exists():
1758
+ return str(candidate)
1759
+ return None
1760
+
1761
+
2329
1762
  def find_lmstudio_cli() -> Optional[str]:
2330
- cli = shutil.which("lms")
1763
+ cli = local_binary("lms")
2331
1764
  if cli:
2332
1765
  return cli
2333
1766
  if LMSTUDIO_BUNDLED_CLI.exists():
@@ -2551,6 +1984,8 @@ def engine_support_status(engine: str) -> Dict[str, object]:
2551
1984
  if engine != "vllm":
2552
1985
  return {"supported": True, "reason": None}
2553
1986
  is_apple_silicon = sys.platform == "darwin" and platform.machine() == "arm64"
1987
+ if sys.platform.startswith("win"):
1988
+ return {"supported": False, "reason": "vLLM은 Windows native 자동 설치보다 WSL2/Linux 환경을 권장합니다."}
2554
1989
  if sys.platform == "darwin" and not is_apple_silicon:
2555
1990
  return {"supported": False, "reason": "vLLM Metal 자동 설치는 Apple Silicon macOS에서만 지원됩니다."}
2556
1991
  if sys.version_info >= (3, 13) and is_apple_silicon:
@@ -2807,6 +2242,9 @@ def download_hf_model(
2807
2242
 
2808
2243
 
2809
2244
  def pull_ollama_model_with_progress(model_name: str, progress_emit=None) -> Dict[str, object]:
2245
+ ollama = local_binary("ollama")
2246
+ if not ollama:
2247
+ raise HTTPException(status_code=400, detail="Ollama가 설치되지 않았습니다.")
2810
2248
  started_at = time.time()
2811
2249
  if progress_emit:
2812
2250
  progress_emit(model_download_progress_payload(
@@ -2817,7 +2255,7 @@ def pull_ollama_model_with_progress(model_name: str, progress_emit=None) -> Dict
2817
2255
  indeterminate=True,
2818
2256
  ))
2819
2257
  process = subprocess.Popen(
2820
- ["ollama", "pull", model_name],
2258
+ [ollama, "pull", model_name],
2821
2259
  stdout=subprocess.PIPE,
2822
2260
  stderr=subprocess.STDOUT,
2823
2261
  text=True,
@@ -2876,10 +2314,11 @@ def pull_ollama_model_with_progress(model_name: str, progress_emit=None) -> Dict
2876
2314
 
2877
2315
 
2878
2316
  def get_ollama_pulled_models() -> set:
2879
- if not shutil.which("ollama"):
2317
+ ollama = local_binary("ollama")
2318
+ if not ollama:
2880
2319
  return set()
2881
2320
  try:
2882
- result = subprocess.run(["ollama", "list"], capture_output=True, text=True, timeout=5, check=False)
2321
+ result = subprocess.run([ollama, "list"], capture_output=True, text=True, timeout=5, check=False)
2883
2322
  pulled = set()
2884
2323
  for line in result.stdout.splitlines()[1:]:
2885
2324
  parts = line.split()
@@ -2934,16 +2373,17 @@ def get_openai_compatible_server_models(provider: str) -> List[str]:
2934
2373
 
2935
2374
 
2936
2375
  def ensure_ollama_server() -> None:
2937
- if not shutil.which("ollama"):
2376
+ ollama = local_binary("ollama")
2377
+ if not ollama:
2938
2378
  raise HTTPException(status_code=400, detail="Ollama가 설치되지 않았습니다.")
2939
2379
  try:
2940
- probe = subprocess.run(["ollama", "list"], capture_output=True, text=True, timeout=3, check=False)
2380
+ probe = subprocess.run([ollama, "list"], capture_output=True, text=True, timeout=3, check=False)
2941
2381
  if probe.returncode == 0:
2942
2382
  return
2943
2383
  except Exception:
2944
2384
  pass
2945
2385
  subprocess.Popen(
2946
- ["ollama", "serve"],
2386
+ [ollama, "serve"],
2947
2387
  stdout=subprocess.DEVNULL,
2948
2388
  stderr=subprocess.DEVNULL,
2949
2389
  start_new_session=True,
@@ -2951,7 +2391,7 @@ def ensure_ollama_server() -> None:
2951
2391
  deadline = time.time() + 20
2952
2392
  while time.time() < deadline:
2953
2393
  try:
2954
- probe = subprocess.run(["ollama", "list"], capture_output=True, text=True, timeout=3, check=False)
2394
+ probe = subprocess.run([ollama, "list"], capture_output=True, text=True, timeout=3, check=False)
2955
2395
  if probe.returncode == 0:
2956
2396
  return
2957
2397
  except Exception:
@@ -3062,7 +2502,7 @@ def engine_installed(engine: str) -> bool:
3062
2502
  if engine == "local_mlx":
3063
2503
  return bool(importlib.util.find_spec("mlx") and importlib.util.find_spec("mlx_lm"))
3064
2504
  if engine == "ollama":
3065
- return shutil.which("ollama") is not None
2505
+ return local_binary("ollama") is not None
3066
2506
  if engine == "vllm":
3067
2507
  return vllm_metal_python() is not None or vllm_executable() is not None or importlib.util.find_spec("vllm") is not None
3068
2508
  if engine == "lmstudio":
@@ -3301,11 +2741,12 @@ def install_engine(engine: str) -> Dict:
3301
2741
  "stderr": completed.stderr[-12000:],
3302
2742
  "installed": engine_installed(engine),
3303
2743
  }
3304
- if engine == "ollama" and completed.returncode == 0 and shutil.which("ollama"):
2744
+ ollama = local_binary("ollama")
2745
+ if engine == "ollama" and completed.returncode == 0 and ollama:
3305
2746
  # Skip if already running to avoid orphan daemons.
3306
2747
  already_up = False
3307
2748
  try:
3308
- probe = subprocess.run(["ollama", "list"], capture_output=True, timeout=2, check=False)
2749
+ probe = subprocess.run([ollama, "list"], capture_output=True, timeout=2, check=False)
3309
2750
  already_up = probe.returncode == 0
3310
2751
  except Exception:
3311
2752
  already_up = False
@@ -3315,7 +2756,7 @@ def install_engine(engine: str) -> Dict:
3315
2756
  try:
3316
2757
  # Detach so the daemon survives this request but doesn't become our zombie.
3317
2758
  subprocess.Popen(
3318
- ["ollama", "serve"],
2759
+ [ollama, "serve"],
3319
2760
  stdout=subprocess.DEVNULL,
3320
2761
  stderr=subprocess.DEVNULL,
3321
2762
  start_new_session=True,
@@ -3392,9 +2833,12 @@ async def prepare_and_load_model(
3392
2833
  download_result = download_hf_model(parsed_model, "local_mlx")
3393
2834
  elif parsed_provider == "ollama":
3394
2835
  ensure_ollama_server()
2836
+ ollama = local_binary("ollama")
2837
+ if not ollama:
2838
+ raise HTTPException(status_code=400, detail="Ollama가 설치되지 않았습니다.")
3395
2839
  if parsed_model not in get_ollama_pulled_models():
3396
2840
  completed = subprocess.run(
3397
- ["ollama", "pull", parsed_model],
2841
+ [ollama, "pull", parsed_model],
3398
2842
  capture_output=True,
3399
2843
  text=True,
3400
2844
  timeout=900,
@@ -3778,9 +3222,12 @@ async def pull_ollama_model(req: PullModelRequest, request: Request):
3778
3222
 
3779
3223
  if provider == "ollama":
3780
3224
  ensure_ollama_server()
3225
+ ollama = local_binary("ollama")
3226
+ if not ollama:
3227
+ raise HTTPException(status_code=400, detail="Ollama가 설치되지 않았습니다.")
3781
3228
  try:
3782
3229
  completed = subprocess.run(
3783
- ["ollama", "pull", model_name],
3230
+ [ollama, "pull", model_name],
3784
3231
  capture_output=True, text=True, timeout=900, check=False,
3785
3232
  )
3786
3233
  except subprocess.TimeoutExpired:
@@ -3877,21 +3324,23 @@ async def set_api_key(req: SetApiKeyRequest, request: Request):
3877
3324
  async def list_models():
3878
3325
  """HuggingFace 추천 모델 목록 및 로드 상태 반환"""
3879
3326
  recommended = [
3880
- # Qwen Series
3881
- {"id": "mlx-community/Qwen2.5-Coder-7B-Instruct-4bit", "name": "Qwen 2.5 Coder 7B", "tag": "coding", "size": "4.3GB"},
3882
- {"id": "mlx-community/Qwen2.5-7B-Instruct-4bit", "name": "Qwen 2.5 7B", "tag": "general", "size": "4.3GB"},
3883
-
3884
- # Llama Series
3885
- {"id": "mlx-community/Llama-3.2-3B-Instruct-4bit", "name": "Llama 3.2 3B", "tag": "light", "size": "2.0GB"},
3886
- {"id": "mlx-community/Llama-3.1-8B-Instruct-4bit", "name": "Llama 3.1 8B", "tag": "general", "size": "4.7GB"},
3887
-
3888
- # Gemma Series
3889
- {"id": "mlx-community/gemma-4-e4b-it-4bit", "name": "Gemma 4 E4B (4-bit)", "tag": "next-gen", "size": "5.2GB"},
3890
- {"id": "mlx-community/gemma-2-9b-it-4bit", "name": "Gemma 2 9B", "tag": "balanced","size": "5.4GB"},
3891
- {"id": "mlx-community/gemma-2-2b-it-4bit", "name": "Gemma 2 2B", "tag": "ultra-light", "size": "1.6GB"},
3892
-
3893
- # Reasoning
3894
- {"id": "mlx-community/DeepSeek-R1-Distill-Qwen-7B-4bit","name": "DeepSeek R1 (7B)", "tag": "reasoning","size": "4.3GB"},
3327
+ {"id": "mlx-community/Qwen3-VL-4B-Instruct-4bit", "name": "Qwen3-VL 4B", "tag": "multimodal", "size": "2.7GB"},
3328
+ {"id": "mlx-community/Qwen3-VL-8B-Instruct-4bit", "name": "Qwen3-VL 8B", "tag": "multimodal", "size": "4.8GB"},
3329
+ {"id": "mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit", "name": "Qwen3-VL 30B A3B","tag": "multimodal", "size": "18GB"},
3330
+ {"id": "mlx-community/SmolLM-1.7B-Instruct-4bit", "name": "SmolLM 1.7B", "tag": "ultra-light", "size": "963MB"},
3331
+ {"id": "mlx-community/gemma-3-1b-it-4bit", "name": "Gemma 3 1B", "tag": "ultra-light", "size": "733MB"},
3332
+ {"id": "mlx-community/Llama-3.2-1B-Instruct-4bit", "name": "Llama 3.2 1B", "tag": "light", "size": "1.3GB"},
3333
+ {"id": "mlx-community/Llama-3.2-3B-Instruct-4bit", "name": "Llama 3.2 3B", "tag": "light", "size": "2.0GB"},
3334
+ {"id": "mlx-community/Phi-4-mini-instruct-4bit", "name": "Phi 4 Mini", "tag": "coding", "size": "2.2GB"},
3335
+ {"id": "mlx-community/Qwen2.5-VL-7B-Instruct-4bit", "name": "Qwen2.5-VL 7B", "tag": "multimodal", "size": "4.4GB"},
3336
+ {"id": "mlx-community/Mistral-7B-Instruct-v0.3-4bit", "name": "Mistral 7B v0.3", "tag": "general", "size": "4.1GB"},
3337
+ {"id": "mlx-community/Llama-3.1-8B-Instruct-4bit", "name": "Llama 3.1 8B", "tag": "general", "size": "4.7GB"},
3338
+ {"id": "mlx-community/gemma-4-e4b-it-4bit", "name": "Gemma 4 E4B", "tag": "multimodal", "size": "5.2GB"},
3339
+ {"id": "mlx-community/gemma-3-12b-it-4bit", "name": "Gemma 3 12B", "tag": "balanced", "size": "8.0GB"},
3340
+ {"id": "mlx-community/phi-4-4bit", "name": "Phi 4", "tag": "coding", "size": "8.3GB"},
3341
+ {"id": "mlx-community/Mistral-Small-24B-Instruct-2501-4bit", "name": "Mistral Small 24B", "tag": "large", "size": "13.3GB"},
3342
+ {"id": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", "name": "Qwen2.5 Coder 32B","tag": "coding", "size": "18.5GB"},
3343
+ {"id": "mlx-community/gemma-4-26b-a4b-it-4bit", "name": "Gemma 4 26B A4B", "tag": "multimodal", "size": "15.6GB"},
3895
3344
  ]
3896
3345
  return {
3897
3346
  "recommended": recommended,
@@ -4204,97 +3653,6 @@ async def search_history(q: str, request: Request):
4204
3653
  grouped[cid]["messages"].append(item)
4205
3654
  return {"results": list(grouped.values())[-30:], "query": q}
4206
3655
 
4207
-
4208
- @app.get("/graph")
4209
- async def knowledge_graph_page(request: Request):
4210
- """Serve the interactive knowledge graph canvas UI."""
4211
- _require_graph()
4212
- require_user(request)
4213
- return FileResponse(STATIC_DIR / "graph.html")
4214
-
4215
-
4216
- @app.get("/knowledge-graph")
4217
- async def knowledge_graph_legacy_page(request: Request):
4218
- """Backward-compatible route for the graph page."""
4219
- _require_graph()
4220
- require_user(request)
4221
- return FileResponse(STATIC_DIR / "graph.html")
4222
-
4223
-
4224
- @app.get("/knowledge-graph/stats")
4225
- async def knowledge_graph_stats(request: Request):
4226
- _require_graph()
4227
- require_user(request)
4228
- return KNOWLEDGE_GRAPH.stats()
4229
-
4230
- @app.get("/knowledge-graph/schema")
4231
- async def knowledge_graph_schema(request: Request):
4232
- _require_graph()
4233
- require_user(request)
4234
- stats = KNOWLEDGE_GRAPH.stats()
4235
- return {
4236
- "legacy_schema_version": stats.get("schema_version"),
4237
- "v2_schema_available": stats.get("v2_schema_available"),
4238
- "v2": stats.get("v2"),
4239
- }
4240
-
4241
-
4242
- @app.get("/knowledge-graph/graph")
4243
- async def knowledge_graph_data(request: Request, limit: int = 300):
4244
- _require_graph()
4245
- require_user(request)
4246
- return KNOWLEDGE_GRAPH.graph(limit)
4247
-
4248
-
4249
- @app.get("/knowledge-graph/search")
4250
- async def knowledge_graph_search(q: str, request: Request, limit: int = 30):
4251
- _require_graph()
4252
- require_user(request)
4253
- if not q or not q.strip():
4254
- return {"query": q, "matches": []}
4255
- return KNOWLEDGE_GRAPH.search(q, limit)
4256
-
4257
-
4258
- @app.get("/knowledge-graph/context")
4259
- async def knowledge_graph_context(q: str, request: Request, limit: int = 6):
4260
- _require_graph()
4261
- require_user(request)
4262
- return {"query": q, "context": KNOWLEDGE_GRAPH.context_for_query(q, limit)}
4263
-
4264
-
4265
- @app.get("/knowledge-graph/neighbors/{node_id:path}")
4266
- async def knowledge_graph_neighbors(node_id: str, request: Request):
4267
- _require_graph()
4268
- require_user(request)
4269
- if not node_id:
4270
- raise HTTPException(status_code=400, detail="node_id required")
4271
- return KNOWLEDGE_GRAPH.neighbors(node_id)
4272
-
4273
-
4274
- @app.post("/knowledge-graph/ingest")
4275
- async def knowledge_graph_ingest(req: KnowledgeGraphIngestRequest, request: Request):
4276
- _require_graph()
4277
- current_user = require_user(request)
4278
- event_type = (req.type or "").strip().lower()
4279
- if event_type not in {"message", "ai_response", "note"}:
4280
- raise HTTPException(status_code=400, detail="지원하는 type: message, ai_response, note")
4281
- role = req.role or ("assistant" if event_type == "ai_response" else "user")
4282
- return KNOWLEDGE_GRAPH.ingest_message(
4283
- role,
4284
- req.content,
4285
- user_email=req.user_email or current_user,
4286
- user_nickname=req.user_nickname,
4287
- source=req.source or "mcp",
4288
- conversation_id=req.conversation_id,
4289
- raw={
4290
- "type": req.type,
4291
- "title": req.title,
4292
- "content": req.content,
4293
- "metadata": req.metadata or {},
4294
- },
4295
- )
4296
-
4297
-
4298
3656
  async def _stream_chat(req: ChatRequest, context: str = "", image_data: str = None) -> AsyncIterator[str]:
4299
3657
  full_response = ""
4300
3658
  async for chunk in router.stream_generate(req.message, context, req.max_tokens, req.temperature, image_data):
@@ -5435,24 +4793,26 @@ async def tools_read_document(req: ToolPathRequest, request: Request):
5435
4793
 
5436
4794
  @app.get("/tools/pdf_pages")
5437
4795
  async def tools_pdf_pages(path: str, request: Request, approval_token: Optional[str] = None):
5438
- """Render PDF pages as base64 PNG images using PyMuPDF."""
4796
+ """Render PDF pages as base64 PNG images using pypdfium2 (Apache-2.0)."""
5439
4797
  current_user = require_user(request)
5440
4798
  _require_local_approval(token=approval_token, path=path, action="read", user_email=current_user)
5441
4799
  target = Path(path).expanduser().resolve()
5442
4800
  if not target.exists() or not target.is_file():
5443
4801
  raise HTTPException(status_code=404, detail="File not found")
5444
- import fitz # PyMuPDF
4802
+ import io
4803
+ import pypdfium2 as pdfium
5445
4804
  doc = None
5446
4805
  try:
5447
- doc = fitz.open(str(target))
4806
+ doc = pdfium.PdfDocument(str(target))
5448
4807
  total = len(doc)
5449
4808
  pages = []
5450
- for i, page in enumerate(doc):
5451
- if i >= 20: # 최대 20페이지
5452
- break
5453
- mat = fitz.Matrix(1.5, 1.5)
5454
- pix = page.get_pixmap(matrix=mat)
5455
- b64 = base64.b64encode(pix.tobytes("png")).decode()
4809
+ for i in range(min(total, 20)): # 최대 20페이지
4810
+ page = doc[i]
4811
+ bitmap = page.render(scale=1.5)
4812
+ pil_image = bitmap.to_pil()
4813
+ buf = io.BytesIO()
4814
+ pil_image.save(buf, format="PNG")
4815
+ b64 = base64.b64encode(buf.getvalue()).decode()
5456
4816
  pages.append({"page": i + 1, "b64": b64})
5457
4817
  return {"total": total, "pages": pages}
5458
4818
  except Exception as e:
@@ -5462,7 +4822,7 @@ async def tools_pdf_pages(path: str, request: Request, approval_token: Optional[
5462
4822
  try:
5463
4823
  doc.close()
5464
4824
  except Exception as e:
5465
- logging.warning("fitz doc close failed: %s", e)
4825
+ logging.warning("pypdfium2 doc close failed: %s", e)
5466
4826
 
5467
4827
 
5468
4828
  @app.get("/tools/download")
@@ -5918,6 +5278,24 @@ async def local_write_endpoint(req: LocalWriteRequest, request: Request):
5918
5278
  return _tool_response(local_write, req.path, req.content)
5919
5279
 
5920
5280
 
5281
+ app.include_router(create_knowledge_graph_router(
5282
+ get_graph=lambda: KNOWLEDGE_GRAPH,
5283
+ require_graph=_require_graph,
5284
+ require_user=require_user,
5285
+ static_dir=STATIC_DIR,
5286
+ ))
5287
+
5288
+ app.include_router(create_local_knowledge_router(
5289
+ get_graph=lambda: KNOWLEDGE_GRAPH,
5290
+ require_graph=_require_graph,
5291
+ require_user=require_user,
5292
+ require_local_user=_require_local_user,
5293
+ local_permission_response=_local_permission_response,
5294
+ require_local_approval=_require_local_approval,
5295
+ watcher=LOCAL_KG_WATCHER,
5296
+ ))
5297
+
5298
+
5921
5299
  @app.get("/tools/chrome_status")
5922
5300
  async def tools_chrome_status(request: Request):
5923
5301
  require_user(request)
@@ -6368,7 +5746,8 @@ async def mcp_recommend(req: McpRecommendRequest, request: Request):
6368
5746
 
6369
5747
  @app.post("/mcp/install")
6370
5748
  async def mcp_install(req: McpInstallRequest, request: Request):
6371
- require_user(request)
5749
+ admin_email, _ = require_admin(request)
5750
+ append_audit_event("mcp_install", user_email=admin_email, mcp_id=req.mcp_id)
6372
5751
  return await install_mcp(req.mcp_id)
6373
5752
 
6374
5753
 
@@ -6465,8 +5844,9 @@ async def mcp_custom_list(request: Request):
6465
5844
 
6466
5845
  @app.post("/mcp/custom")
6467
5846
  async def mcp_custom_add(req: McpCustomRequest, request: Request):
6468
- """Save a custom MCP entry (user-defined)."""
6469
- require_user(request)
5847
+ """Save a custom MCP entry (admin-only)."""
5848
+ admin_email, _ = require_admin(request)
5849
+ append_audit_event("mcp_custom_add", user_email=admin_email, name=req.name, package=req.package)
6470
5850
  if not req.name.strip():
6471
5851
  raise HTTPException(status_code=400, detail="name은 필수입니다.")
6472
5852
  if not req.package.strip():
@@ -6528,8 +5908,9 @@ async def skills_marketplace(request: Request, category: Optional[str] = None, a
6528
5908
 
6529
5909
  @app.post("/skills/install")
6530
5910
  async def skills_install(req: SkillInstallRequest, request: Request):
6531
- """skill을 로컬 skills 디렉터리에 설치 (Apache-2.0 / MIT)"""
6532
- require_user(request)
5911
+ """skill을 로컬 skills 디렉터리에 설치 (Apache-2.0 / MIT, 관리자 전용)"""
5912
+ admin_email, _ = require_admin(request)
5913
+ append_audit_event("skill_install", user_email=admin_email, plugin=req.plugin, skill=req.skill)
6533
5914
  return await install_skill(req.plugin, req.skill)
6534
5915
 
6535
5916
 
@@ -6707,24 +6088,32 @@ async def setup_scan(request: Request):
6707
6088
  primary_model = primary_setup_model(recs)
6708
6089
  if primary_model:
6709
6090
  model_id = primary_model.get("model_id") or (primary_model.get("action") or {}).get("model_id")
6091
+ model_provider, provider_model = parse_model_ref(str(model_id))
6092
+ primary_runtime = "mlx" if model_provider == "local_mlx" else model_provider
6710
6093
  zero_config.setdefault("recommend", {})["model_id"] = model_id
6711
- zero_config["recommend"]["runtime"] = "mlx"
6094
+ zero_config["recommend"]["runtime"] = primary_runtime
6712
6095
  rationale = [
6713
6096
  item for item in zero_config["recommend"].get("rationale", [])
6714
6097
  if not (isinstance(item, str) and item.startswith("RAM ") and "→" in item)
6715
6098
  ]
6716
- rationale.append(f"실제 다운로드 및 로드 가능한 MLX 모델 → {model_id}")
6099
+ rationale.append(f"실제 다운로드 및 로드 가능한 {primary_runtime} 모델 → {model_id}")
6717
6100
  zero_config["recommend"]["rationale"] = rationale
6718
6101
  if isinstance(zero_config.get("plan"), dict):
6102
+ if model_provider == "ollama":
6103
+ command = ["ollama", "pull", provider_model]
6104
+ elif model_provider in {"vllm", "lmstudio", "llamacpp"}:
6105
+ command = ["lattice-ai", "models", "load", str(model_id)]
6106
+ else:
6107
+ command = ["huggingface-cli", "download", str(model_id), "--quiet"]
6719
6108
  zero_config["plan"]["steps"] = [{
6720
6109
  "name": f"weights:{model_id}",
6721
6110
  "why": "추론에 사용할 모델 가중치",
6722
- "command": ["huggingface-cli", "download", model_id, "--quiet"],
6111
+ "command": command,
6723
6112
  "requires_admin": False,
6724
6113
  }]
6725
6114
  if isinstance(zero_config.get("preset"), dict):
6726
6115
  zero_config["preset"].setdefault("model", {})["id"] = model_id
6727
- zero_config["preset"]["model"]["runtime"] = "mlx"
6116
+ zero_config["preset"]["model"]["runtime"] = primary_runtime
6728
6117
  env["zero_config"] = zero_config
6729
6118
  recs.setdefault("summary", {})["zero_config"] = zero_config["recommend"]
6730
6119
  recs["install_plan"] = zero_config["plan"]