ltcai 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,26 +4,16 @@ Apple Silicon (M1-M5) 전용 | mlx-lm 기반
4
4
  """
5
5
 
6
6
  import asyncio
7
- import base64
8
7
  import hashlib
9
- import importlib.util
10
- import io
11
8
  import json
12
9
  import logging
13
10
  import os
14
- import platform
15
- import queue
16
11
  import re
17
12
  import secrets
18
13
  import threading
19
- import shutil
20
14
  import subprocess
21
15
  import sys
22
- import tempfile
23
16
  import time
24
- import urllib.error
25
- import urllib.request
26
- import ipaddress
27
17
  from contextlib import asynccontextmanager
28
18
  from pathlib import Path
29
19
 
@@ -34,22 +24,17 @@ try:
34
24
  except Exception as e:
35
25
  print(f"⚠️ MLX Metal context unavailable: {e}")
36
26
  mx = None
37
- from typing import AsyncIterator, Optional, List, Dict
27
+ from typing import Optional, List, Dict
38
28
 
39
29
  import uvicorn
40
- from fastapi import FastAPI, File, HTTPException, Request, Cookie, UploadFile
30
+ from fastapi import FastAPI, HTTPException, Request
41
31
  from fastapi.middleware.cors import CORSMiddleware
42
- from fastapi.responses import HTMLResponse, FileResponse, StreamingResponse, JSONResponse
43
32
  from fastapi.staticfiles import StaticFiles
44
33
  from pydantic import BaseModel
45
- from PIL import Image
46
34
 
47
- from llm_router import AsyncOpenAI, LLMRouter, OPENAI_COMPATIBLE_PROVIDERS, HF_MODELS_ROOT, ensure_mlx_runtime, hf_model_dir, parse_model_ref, mx, normalize_branding
35
+ from llm_router import LLMRouter, normalize_branding
48
36
  from knowledge_graph import KnowledgeGraphStore, set_llm_router
49
- from knowledge_graph_api import create_knowledge_graph_router
50
- from latticeai.core.context_builder import retrieve_context_for_generation, format_sources_footnote
51
- from latticeai.core.document_generator import detect_document_intent, DocumentGenerationSession
52
- from local_knowledge_api import LocalKnowledgeWatcher, create_local_knowledge_router
37
+ from local_knowledge_api import LocalKnowledgeWatcher
53
38
  from latticeai.core.security import (
54
39
  hash_password,
55
40
  verify_password,
@@ -72,24 +57,7 @@ from latticeai.core.audit import (
72
57
  from latticeai.api.auth import create_auth_router
73
58
  from latticeai.api.admin import create_admin_router
74
59
  from latticeai.api.security_dashboard import create_security_router as _create_security_router
75
- from latticeai.core.model_compat import (
76
- ensure_profile as _ensure_compat_profile,
77
- record_smoke_result as _record_smoke_result,
78
- fast_postprocess as _compat_fast_postprocess,
79
- validate_smoke_response as _validate_smoke_response,
80
- classify_smoke_response as _classify_smoke_response,
81
- list_cached_profiles as _list_compat_profiles,
82
- SMOKE_PROMPT as _SMOKE_PROMPT,
83
- )
84
- from latticeai.core.model_resolution import (
85
- ModelResolution as _ModelResolution,
86
- PrepareState as _PrepareState,
87
- PrepareReport as _PrepareReport,
88
- )
89
- from latticeai.core.graph_curator import (
90
- auto_build_graph_overlay as _auto_build_graph_overlay,
91
- mask_secrets as _curator_mask_secrets,
92
- )
60
+ from latticeai.core.model_compat import list_cached_profiles as _list_compat_profiles
93
61
  from latticeai.core.config import Config
94
62
  from latticeai.core.workspace_os import (
95
63
  WORKSPACE_OS_VERSION,
@@ -104,157 +72,62 @@ from latticeai.core.enterprise import (
104
72
  from latticeai.services.workspace_service import WorkspaceService
105
73
  from latticeai.services.model_service import ModelService
106
74
  from latticeai.services.chat_service import ChatService
75
+ from latticeai.services.model_runtime import (
76
+ CLOUD_VERIFY_TTL_SECONDS,
77
+ ENGINE_MODEL_CATALOG,
78
+ LOCAL_SERVER_PROCESSES,
79
+ MODEL_ENGINE_ALIASES,
80
+ configure_model_runtime,
81
+ download_hf_model,
82
+ engine_status,
83
+ filter_lower_family_versions,
84
+ install_engine,
85
+ local_binary,
86
+ normalize_local_model_request,
87
+ prepare_and_load_model,
88
+ prepare_and_load_model_stream,
89
+ runtime_features,
90
+ sse_event,
91
+ verify_cloud_models,
92
+ ensure_ollama_server,
93
+ )
107
94
  from latticeai.api.workspace import create_workspace_router
108
95
  from latticeai.api.health import create_health_router
109
96
  from latticeai.api.models import create_models_router
110
- from latticeai.api.mcp import create_mcp_router
111
- from latticeai.core.agent import (
112
- AgentState,
113
- AgentRunContext,
114
- AGENT_TERMINAL_STATES,
115
- AgentDeps,
116
- AgentRuntime,
117
- extract_action as _extract_agent_action,
97
+ from latticeai.api.chat import create_chat_router
98
+ from latticeai.api.tools import create_tools_router
99
+ from latticeai.api.static_routes import create_static_routes_router
100
+ from latticeai.api.garden import create_garden_router
101
+ from latticeai.api.setup import create_setup_router
102
+ from latticeai.services.tool_dispatch import (
103
+ LOCAL_WRITE_BLOCKED_PREFIXES as _LOCAL_WRITE_BLOCKED_PREFIXES,
104
+ TOOL_GOVERNANCE,
105
+ TOOL_GOVERNANCE_DEFAULT as _TOOL_GOVERNANCE_DEFAULT,
106
+ agent_risk as _agent_risk,
107
+ check_tool_role as _check_tool_role,
108
+ configure_tool_dispatch,
109
+ get_tool_permission,
110
+ list_tool_permissions,
118
111
  )
119
- from latticeai.core.agent_prompts import (
120
- AGENT_SYSTEM_PROMPT,
121
- CRITIC_PROMPT,
122
- EXECUTOR_PROMPT,
123
- MEMORY_UPDATER_PROMPT,
124
- PLANNER_PROMPT,
125
- )
126
- from latticeai.core.tool_registry import (
127
- MCP_TOOL_DESCRIPTIONS,
128
- ToolPermission,
129
- ToolPolicy,
130
- TOOL_CATALOG_BRIEF as _TOOL_CATALOG_BRIEF,
131
- )
132
- import mcp_registry
112
+ from latticeai.core.tool_registry import TOOL_CATALOG_BRIEF as _TOOL_CATALOG_BRIEF
133
113
  from mcp_registry import (
134
114
  MCP_REGISTRY, _THIRD_PARTY_SKILL_SOURCES, _KNOWN_REPO_LICENSES,
135
115
  _MARKETPLACE_RAW, _MARKETPLACE_API,
136
116
  _fetch_remote_mcp_registry, _get_combined_registry,
137
117
  _extract_skill_desc, _fetch_plugin_skills,
138
118
  _fetch_skills_marketplace, _fetch_plugin_directory,
139
- _OPEN_LICENSES, install_skill, SKILLS_DIR,
140
- )
141
- from p_reinforce import BRAIN_DIR, PReinforceGardener
142
- from setup import get_recommendations, install_stream, open_url, scan_environment
143
- from auto_setup import (
144
- plan as auto_setup_plan,
145
- preset as auto_setup_preset,
146
- probe as auto_setup_probe,
147
- recommend as auto_setup_recommend,
148
- verify as auto_setup_verify,
149
- )
150
- from telegram_bot import broadcast_web_chat
151
- from tools import (
152
- AGENT_ROOT,
153
- DEFAULT_TOOL_REGISTRY,
154
- ToolError,
155
- build_project,
156
- computer_click,
157
- computer_drag,
158
- computer_key,
159
- computer_move,
160
- computer_open_app,
161
- computer_open_url,
162
- computer_screenshot,
163
- computer_scroll,
164
- computer_status,
165
- computer_type,
166
- create_docx,
167
- create_pdf,
168
- create_pptx,
169
- create_xlsx,
170
- read_document,
171
- deploy_project,
172
- desktop_bridge_status,
173
- edit_file,
174
- ensure_agent_root,
175
- execute_tool,
176
- git_diff,
177
- git_log,
178
- git_show,
179
- git_status,
180
- grep,
181
- inspect_html,
182
- knowledge_save,
183
- knowledge_search,
184
- knowledge_tree,
185
- list_dir,
186
- local_list,
187
- local_read,
188
- local_write,
189
- network_status,
190
- obsidian_save,
191
- obsidian_search,
192
- obsidian_tree,
193
- preview_url,
194
- read_file,
195
- run_command,
196
- search_files,
197
- todo_read,
198
- todo_write,
199
- workspace_tree,
200
- write_file,
119
+ install_skill, SKILLS_DIR,
201
120
  )
121
+ from p_reinforce import PReinforceGardener
122
+ from setup import get_recommendations, scan_environment
123
+ from tools import ensure_agent_root
202
124
 
203
125
  try:
204
126
  import keyring
205
127
  except Exception:
206
128
  keyring = None
207
129
 
208
- from datetime import datetime, timedelta
209
- import httpx
210
-
211
- def detect_language(text: str) -> str:
212
- """Detect language: 'ko' (Korean) or 'en' (English)."""
213
- total = max(len(text), 1)
214
- ko = sum(1 for c in text if '가' <= c <= '힣')
215
- if ko / total > 0.05:
216
- return "ko"
217
- return "en"
218
-
219
- _LANG_HINT = {
220
- "ko": "Respond in Korean (한국어로 답변하세요).",
221
- "en": "Respond in English.",
222
- }
223
-
224
- def is_network_status_request(text: str) -> bool:
225
- """사용자가 현재 IP/네트워크 정보를 물었는지 감지합니다."""
226
- t = (text or "").lower()
227
- has_ip = bool(re.search(r"((?<![a-z0-9])ip(?![a-z0-9])|아이피|ip\s*주소|아이피\s*주소|ipconfig|ifconfig|네트워크)", t))
228
- asks_current = any(word in t for word in ["내", "현재", "지금", "local", "로컬", "주소", "address", "뭐", "알려", "확인", "상태"])
229
- return has_ip and asks_current
230
-
231
- def is_current_url_request(text: str) -> bool:
232
- t = (text or "").lower()
233
- has_url = any(word in t for word in ["url", "주소", "링크", "address"])
234
- asks_current = any(word in t for word in ["현재", "지금", "여기", "접속", "페이지", "브라우저", "알려", "뭐"])
235
- return has_url and asks_current
236
-
237
- def is_clear_command(text: str) -> bool:
238
- return (text or "").strip().lower() in {"/clear", "/clear_all"}
239
-
240
- def format_network_status(info: Dict) -> str:
241
- lines = [
242
- f"내부 IP: {info.get('local_ip') or '확인 안 됨'}",
243
- f"외부 IP: {info.get('public_ip') or '확인 안 됨'}",
244
- f"호스트명: {info.get('hostname') or '확인 안 됨'}",
245
- ]
246
- local_ips = info.get("local_ips") or {}
247
- if local_ips:
248
- lines.extend(["", "인터페이스:"])
249
- lines.extend(f"- {name}: {ip}" for name, ip in local_ips.items())
250
- note = info.get("note")
251
- if note:
252
- lines.extend(["", note])
253
- return "\n".join(lines)
254
-
255
- async def single_text_stream(text: str, model: str = "system") -> AsyncIterator[str]:
256
- yield f"data: {json.dumps({'chunk': text, 'model': model}, ensure_ascii=False)}\n\n"
257
- yield "data: [DONE]\n\n"
130
+ from datetime import datetime
258
131
 
259
132
  # ── App-level config — parsed once, in one place (latticeai.core.config) ──────
260
133
  # The module-level names below are kept as a compatibility surface for the rest
@@ -794,88 +667,6 @@ def clear_conversation(conversation_id: str, started_at: Optional[str] = None) -
794
667
  json.dump(kept, f, ensure_ascii=False, indent=2)
795
668
  return {"status": "cleared", "conversation_id": conversation_id, "removed": removed, "kept": len(kept)}
796
669
 
797
- def build_recent_chat_context(
798
- limit: int = 10,
799
- include_image_missing_replies: bool = True,
800
- user_email: Optional[str] = None,
801
- conversation_id: Optional[str] = None,
802
- ) -> str:
803
- history = get_history()
804
- if conversation_id:
805
- history = [item for item in history if item.get("conversation_id") == conversation_id]
806
- if user_email:
807
- history = [item for item in history if item.get("user_email") == user_email or item.get("role") == "assistant"]
808
- history = history[-limit:]
809
- lines = []
810
- for item in history:
811
- role = item.get("role", "user")
812
- content = item.get("content", "")
813
- if not include_image_missing_replies and role == "assistant":
814
- if "이미지" in content and any(word in content for word in ["업로드", "제공", "올려"]):
815
- continue
816
- source = item.get("source")
817
- label = role
818
- if source:
819
- label = f"{role} ({source})"
820
- lines.append(f"{label}: {content}")
821
- return "\n".join(lines)
822
-
823
- def extract_screenshot_context(image_data: Optional[str]) -> str:
824
- if not image_data:
825
- return ""
826
-
827
- lines = ["[SCREENSHOT INGESTION]"]
828
- image_bytes = b""
829
- try:
830
- image_bytes = base64.b64decode(image_data)
831
- image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
832
- lines.append(f"- image_size: {image.width}x{image.height}")
833
- lines.append(f"- image_mode: {image.mode}")
834
- except Exception as e:
835
- lines.append(f"- image_decode_error: {e}")
836
- return "\n".join(lines)
837
-
838
- tesseract_path = shutil.which("tesseract")
839
- if not tesseract_path:
840
- lines.append("- ocr: unavailable; install `tesseract` to enable OCR text extraction.")
841
- return "\n".join(lines)
842
-
843
- temp_path = None
844
- try:
845
- with tempfile.NamedTemporaryFile(prefix="ltcai-screenshot-", suffix=".png", delete=False) as temp:
846
- temp.write(image_bytes)
847
- temp_path = temp.name
848
-
849
- ocr_text = ""
850
- for lang in ("kor+eng", "eng"):
851
- completed = subprocess.run(
852
- [tesseract_path, temp_path, "stdout", "-l", lang, "--psm", "6"],
853
- capture_output=True,
854
- text=True,
855
- timeout=20,
856
- check=False,
857
- )
858
- if completed.returncode == 0 and completed.stdout.strip():
859
- ocr_text = completed.stdout.strip()
860
- lines.append(f"- ocr_language: {lang}")
861
- break
862
-
863
- if ocr_text:
864
- lines.append("- ocr_text:")
865
- lines.append(ocr_text[:4000])
866
- else:
867
- lines.append("- ocr: no text extracted.")
868
- except Exception as e:
869
- lines.append(f"- ocr_error: {e}")
870
- finally:
871
- if temp_path:
872
- try:
873
- Path(temp_path).unlink()
874
- except OSError:
875
- pass
876
-
877
- return "\n".join(lines)
878
-
879
670
  def get_user_role(email: str, users: Optional[Dict] = None) -> str:
880
671
  users = users or load_users()
881
672
  user = users.get(email) or {}
@@ -915,6 +706,63 @@ def enforce_rate_limit(email: str, bucket_key: str) -> None:
915
706
  def _bytes_match_extension(data: bytes, ext: str) -> bool:
916
707
  return _bytes_match_extension_impl(data, ext)
917
708
 
709
+ _LOCAL_APPROVAL_TTL_SECONDS = 5 * 60
710
+ _local_approvals: Dict[str, Dict[str, object]] = {}
711
+
712
+
713
+ def _normalize_local_path_for_approval(path: str) -> str:
714
+ return str(Path(path).expanduser().resolve())
715
+
716
+
717
+ def _content_fingerprint(content: str = "") -> str:
718
+ return hashlib.sha256(content.encode("utf-8")).hexdigest()
719
+
720
+
721
+ def _local_permission_response(path: str, action: str, user_email: str, content: str = "") -> dict:
722
+ normalized = _normalize_local_path_for_approval(path)
723
+ token = secrets.token_urlsafe(24)
724
+ record: Dict[str, object] = {
725
+ "path": normalized,
726
+ "action": action,
727
+ "user_email": user_email,
728
+ "expires_at": time.time() + _LOCAL_APPROVAL_TTL_SECONDS,
729
+ "approved": False,
730
+ }
731
+ if action == "write":
732
+ record["content_hash"] = _content_fingerprint(content)
733
+ _local_approvals[token] = record
734
+ return {
735
+ "permission_required": True,
736
+ "path": path,
737
+ "action": action,
738
+ "approval_token": token,
739
+ "expires_in": _LOCAL_APPROVAL_TTL_SECONDS,
740
+ }
741
+
742
+
743
+ def _require_local_approval(
744
+ *,
745
+ token: Optional[str],
746
+ path: str,
747
+ action: str,
748
+ user_email: str,
749
+ content: str = "",
750
+ ) -> None:
751
+ if not token:
752
+ raise HTTPException(status_code=403, detail="파일 접근 승인 토큰이 필요합니다.")
753
+ record = _local_approvals.get(token)
754
+ if not record or float(record.get("expires_at", 0)) < time.time():
755
+ raise HTTPException(status_code=403, detail="파일 접근 승인이 만료되었거나 유효하지 않습니다.")
756
+ if not record.get("approved"):
757
+ raise HTTPException(status_code=403, detail="파일 접근이 아직 승인되지 않았습니다.")
758
+ if record.get("user_email") != user_email:
759
+ raise HTTPException(status_code=403, detail="다른 사용자의 파일 접근 승인은 사용할 수 없습니다.")
760
+ if record.get("path") != _normalize_local_path_for_approval(path) or record.get("action") != action:
761
+ raise HTTPException(status_code=403, detail="파일 접근 승인 범위가 일치하지 않습니다.")
762
+ if action == "write" and record.get("content_hash") != _content_fingerprint(content):
763
+ raise HTTPException(status_code=403, detail="승인된 파일 내용과 요청 내용이 다릅니다.")
764
+
765
+
918
766
  def require_admin(request: Request) -> tuple[str, Dict]:
919
767
  users = load_users()
920
768
  token = _extract_bearer_token(request)
@@ -1030,8 +878,8 @@ def build_admin_audit_report(users: Dict) -> Dict:
1030
878
 
1031
879
  router = LLMRouter()
1032
880
  set_llm_router(router)
881
+ configure_tool_dispatch(load_users=load_users, get_user_role=get_user_role)
1033
882
  gardener = PReinforceGardener()
1034
- _doc_gen_sessions: dict = {} # conversation_id → DocumentGenerationSession
1035
883
 
1036
884
  async def autoload_default_model() -> None:
1037
885
  if not AUTOLOAD_MODELS:
@@ -1166,6 +1014,40 @@ ensure_agent_root()
1166
1014
  OPEN_REGISTRATION = CONFIG.open_registration
1167
1015
  INVITE_CODE = CONFIG.invite_code
1168
1016
  INVITE_GATE_ENABLED = CONFIG.invite_gate_enabled
1017
+ configure_model_runtime(
1018
+ router=router,
1019
+ APP_MODE=APP_MODE,
1020
+ DEFAULT_HOST=DEFAULT_HOST,
1021
+ DEFAULT_PORT=DEFAULT_PORT,
1022
+ DATA_DIR=DATA_DIR,
1023
+ BASE_DIR=BASE_DIR,
1024
+ ENABLE_TELEGRAM=ENABLE_TELEGRAM,
1025
+ ENABLE_GRAPH=ENABLE_GRAPH,
1026
+ AUTOLOAD_MODELS=AUTOLOAD_MODELS,
1027
+ MODEL_IDLE_UNLOAD_SECONDS=MODEL_IDLE_UNLOAD_SECONDS,
1028
+ ALLOW_LOCAL_MODELS=ALLOW_LOCAL_MODELS,
1029
+ REQUIRE_AUTH=REQUIRE_AUTH,
1030
+ INVITE_GATE_ENABLED=INVITE_GATE_ENABLED,
1031
+ ALLOW_PLAINTEXT_API_KEYS=ALLOW_PLAINTEXT_API_KEYS,
1032
+ CORS_ALLOW_NETWORK=CORS_ALLOW_NETWORK,
1033
+ PUBLIC_MODEL=PUBLIC_MODEL,
1034
+ LOCAL_MODEL=LOCAL_MODEL,
1035
+ IS_PUBLIC_MODE=IS_PUBLIC_MODE,
1036
+ keyring=keyring,
1037
+ get_current_user=get_current_user,
1038
+ get_user_api_key=get_user_api_key,
1039
+ )
1040
+ STATIC_ROUTES = create_static_routes_router(
1041
+ static_dir=STATIC_DIR,
1042
+ invite_gate_enabled=INVITE_GATE_ENABLED,
1043
+ invite_code=INVITE_CODE,
1044
+ app_mode=APP_MODE,
1045
+ model_router=router,
1046
+ require_user=require_user,
1047
+ )
1048
+ ui_file_response = STATIC_ROUTES.ui_file_response
1049
+ local_sysinfo = STATIC_ROUTES.local_sysinfo
1050
+ app.include_router(STATIC_ROUTES.router)
1169
1051
 
1170
1052
  # ── Auth & Admin routers (latticeai.api) ─────────────────────────────────────
1171
1053
  app.include_router(create_auth_router(
@@ -1239,329 +1121,11 @@ app.include_router(_create_security_router(
1239
1121
  append_audit_event=append_audit_event,
1240
1122
  ))
1241
1123
 
1242
- def ui_file_response(path: Path) -> FileResponse:
1243
- response = FileResponse(path)
1244
- response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
1245
- response.headers["Pragma"] = "no-cache"
1246
- response.headers["Expires"] = "0"
1247
- return response
1248
-
1249
- @app.get("/")
1250
- async def root(request: Request, code: Optional[str] = None, authorized: Optional[str] = Cookie(None)):
1251
- """로그인/회원가입 페이지. 초대 게이트 활성화 시 코드 검증 후 진입."""
1252
- if not INVITE_GATE_ENABLED:
1253
- return ui_file_response(STATIC_DIR / "account.html")
1254
-
1255
- # 1. 이미 쿠키로 인증된 경우
1256
- if authorized == "true":
1257
- return ui_file_response(STATIC_DIR / "account.html")
1258
-
1259
- # 2. 초대 코드가 일치하는 경우 (최초 진입)
1260
- if code == INVITE_CODE:
1261
- response = ui_file_response(STATIC_DIR / "account.html")
1262
- response.set_cookie(key="authorized", value="true", httponly=True, samesite="lax", max_age=60*60*24*7)
1263
- return response
1264
-
1265
- # 3. 인증 실패 시 차단 화면
1266
- return HTMLResponse(content=f"""
1267
- <body style="background:#0f1115; color:white; display:flex; flex-direction:column; align-items:center; justify-content:center; height:100vh; font-family:sans-serif;">
1268
- <div style="background:#16191f; padding:40px; border-radius:24px; border:1px solid rgba(255,255,255,0.1); text-align:center; box-shadow: 0 20px 40px rgba(0,0,0,0.5);">
1269
- <div style="font-size:48px; margin-bottom:20px;">🔒</div>
1270
- <h1 style="color:#378ADD; margin:0; font-size:24px;">Invitation Required</h1>
1271
- <p style="color:#94a3b8; margin:20px 0; line-height:1.6;">이 서비스는 비공개로 운영되고 있습니다.<br>선생님께 받은 <b>초대용 전용 링크</b>를 통해 접속해 주세요.</p>
1272
- <div style="margin-top:30px; padding-top:20px; border-top:1px solid rgba(255,255,255,0.05); font-size:11px; color:rgba(255,255,255,0.2); letter-spacing:1px;">LATTICE AI</div>
1273
- </div>
1274
- </body>
1275
- """, status_code=403)
1276
-
1277
-
1278
- @app.get("/account")
1279
- async def account_page():
1280
- """Direct login/register page route used by logout and manual navigation."""
1281
- return ui_file_response(STATIC_DIR / "account.html")
1282
-
1283
-
1284
- @app.get("/manifest.json")
1285
- async def manifest():
1286
- p = STATIC_DIR / "manifest.json"
1287
- if not p.exists():
1288
- raise HTTPException(status_code=404)
1289
- return FileResponse(str(p), media_type="application/manifest+json")
1290
-
1291
-
1292
- @app.get("/sw.js")
1293
- async def service_worker():
1294
- p = STATIC_DIR / "sw.js"
1295
- if not p.exists():
1296
- raise HTTPException(status_code=404)
1297
- resp = FileResponse(str(p), media_type="application/javascript")
1298
- resp.headers["Service-Worker-Allowed"] = "/"
1299
- return resp
1300
-
1301
-
1302
- @app.get("/chat")
1303
- async def chat_page(request: Request):
1304
- return ui_file_response(STATIC_DIR / "chat.html")
1305
-
1306
-
1307
- @app.get("/admin")
1308
- async def admin_page():
1309
- admin_path = STATIC_DIR / "admin.html"
1310
- if not admin_path.exists():
1311
- raise HTTPException(status_code=404, detail="Admin UI not found.")
1312
- response = FileResponse(admin_path)
1313
- response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
1314
- return response
1315
-
1316
- # /workspace and /onboarding UI pages are served by the workspace router
1317
- # (latticeai.api.workspace), included below after its dependencies are defined.
1318
-
1319
- @app.get("/status")
1320
- async def status():
1321
- """서버 상태 및 현재 로드된 모델 정보를 반환합니다."""
1322
- return {
1323
- "message": "🧠 Lattice AI MLX Server is running!",
1324
- "status": "online",
1325
- "mode": APP_MODE,
1326
- "loaded_model": router._current or "None"
1327
- }
1328
-
1329
-
1330
- @app.get("/local/sysinfo")
1331
- async def local_sysinfo(request: Request):
1332
- """CPU / RAM / GPU(MLX) 사용량을 반환합니다."""
1333
- require_user(request)
1334
- import subprocess, re as _re
1335
- result = {"cpu_pct": 0.0, "ram_pct": 0.0, "gpu_mem_pct": 0.0, "gpu_mem_gb": 0.0}
1336
- try:
1337
- # CPU
1338
- top_out = subprocess.run(["top", "-l", "1", "-n", "0"], capture_output=True, text=True, timeout=4).stdout
1339
- for line in top_out.splitlines():
1340
- if "CPU usage" in line:
1341
- m = _re.search(r"([\d.]+)% user.*?([\d.]+)% sys", line)
1342
- if m:
1343
- result["cpu_pct"] = round(float(m.group(1)) + float(m.group(2)), 1)
1344
- # RAM
1345
- vm_out = subprocess.run(["vm_stat"], capture_output=True, text=True, timeout=4).stdout
1346
- page_size = 16384
1347
- pages: dict = {}
1348
- for line in vm_out.splitlines():
1349
- for key in ["Pages free", "Pages active", "Pages inactive", "Pages wired down", "Pages occupied by compressor"]:
1350
- if line.startswith(key):
1351
- m = _re.search(r"(\d+)", line)
1352
- if m:
1353
- pages[key] = int(m.group(1))
1354
- total = sum(pages.values())
1355
- used = total - pages.get("Pages free", 0)
1356
- result["ram_pct"] = round(used / total * 100, 1) if total else 0.0
1357
- # GPU (MLX / Apple Silicon unified memory)
1358
- try:
1359
- import mlx.core as _mx
1360
- hw_out = subprocess.run(["sysctl", "-n", "hw.memsize"], capture_output=True, text=True, timeout=2).stdout
1361
- total_bytes = int(hw_out.strip())
1362
- gpu_bytes = _mx.get_active_memory() + _mx.get_cache_memory()
1363
- result["gpu_mem_gb"] = round(gpu_bytes / (1024 ** 3), 2)
1364
- result["gpu_mem_pct"] = round(gpu_bytes / total_bytes * 100, 1) if total_bytes else 0.0
1365
- except Exception:
1366
- pass
1367
- except Exception as e:
1368
- result["error"] = str(e)
1369
- return result
1370
-
1371
-
1372
-
1124
+ # ── Static UI/status routes moved to latticeai.api.static_routes ──
1373
1125
 
1374
1126
  # ── Request / Response Models ──────────────────────────────────────────────────
1375
1127
 
1376
- class ChatRequest(BaseModel):
1377
- message: str
1378
- conversation_id: Optional[str] = None
1379
- client_url: Optional[str] = None
1380
- model: Optional[str] = None
1381
- max_tokens: int = 2048
1382
- temperature: float = 0.2
1383
- stream: bool = True
1384
- context: Optional[str] = None
1385
- source: Optional[str] = None
1386
- user_email: Optional[str] = None
1387
- user_nickname: Optional[str] = None
1388
- image_data: Optional[str] = None # Base64 이미지 데이터 (VLM용)
1389
-
1390
-
1391
- # Model/engine request models moved to latticeai.api.models (v1.3.0).
1392
-
1393
- # Workspace request models moved to latticeai.api.workspace (v1.2.0 modularization).
1394
-
1395
-
1396
- class GardenRequest(BaseModel):
1397
- raw_data: str
1398
- category: Optional[str] = None # 10_Wiki / 00_Raw / Skills
1399
-
1400
-
1401
- class AgentRequest(BaseModel):
1402
- message: str
1403
- conversation_id: Optional[str] = None
1404
- source: Optional[str] = None
1405
- max_steps: int = 25
1406
- temperature: float = 0.1
1407
- user_email: Optional[str] = None
1408
- user_nickname: Optional[str] = None
1409
- # Multi-LLM pipeline: per-phase model override (None = use current loaded model)
1410
- planning_model: Optional[str] = None
1411
- executing_model: Optional[str] = None
1412
- reviewing_model: Optional[str] = None
1413
- # When True: pause after planning and wait for /agent/resume
1414
- human_in_loop: bool = False
1415
-
1416
-
1417
- class AgentResumeRequest(BaseModel):
1418
- context_id: str
1419
- approved: bool = True
1420
- modified_plan: Optional[dict] = None
1421
- executing_model: Optional[str] = None
1422
- reviewing_model: Optional[str] = None
1423
-
1424
-
1425
- class AgentEvalRequest(BaseModel):
1426
- skill: str
1427
- case_id: Optional[str] = None
1428
-
1429
-
1430
- # AgentState / AgentRunContext / AGENT_TERMINAL_STATES are defined in
1431
- # latticeai.core.agent and imported at the top of this module.
1432
-
1433
- # Pending agent contexts waiting for human approval: context_id → (ctx, req, lang_hint, current_user)
1434
- _pending_agents: dict[str, tuple] = {}
1435
- _pending_agents_lock = threading.Lock()
1436
-
1437
-
1438
- class ToolPathRequest(BaseModel):
1439
- path: str = "."
1440
- approval_token: Optional[str] = None
1441
-
1442
-
1443
- class ToolWriteFileRequest(BaseModel):
1444
- path: str
1445
- content: str
1446
-
1447
-
1448
- class ToolRunCommandRequest(BaseModel):
1449
- command: str
1450
- cwd: Optional[str] = "."
1451
-
1452
-
1453
- class ToolScriptRequest(BaseModel):
1454
- cwd: Optional[str] = "."
1455
- script: str = "build"
1456
-
1457
-
1458
- class ToolSearchFilesRequest(BaseModel):
1459
- query: str
1460
- path: str = "."
1461
- max_results: int = 20
1462
-
1463
-
1464
- class ToolReadFileRequest(BaseModel):
1465
- path: str
1466
- offset: int = 0
1467
- limit: int = 0
1468
- line_numbers: bool = True
1469
-
1470
-
1471
- class ToolEditFileRequest(BaseModel):
1472
- path: str
1473
- old_string: str
1474
- new_string: str
1475
- replace_all: bool = False
1476
-
1477
-
1478
- class ToolGrepRequest(BaseModel):
1479
- pattern: str
1480
- path: str = "."
1481
- glob: Optional[str] = None
1482
- max_results: int = 50
1483
- case_insensitive: bool = False
1484
- context_lines: int = 0
1485
-
1486
-
1487
- class ToolTodoWriteRequest(BaseModel):
1488
- todos: List[Dict] = []
1489
-
1490
-
1491
- class ToolWorkspaceTreeRequest(BaseModel):
1492
- path: str = "."
1493
- max_depth: int = 3
1494
-
1495
-
1496
- class ToolClearHistoryRequest(BaseModel):
1497
- keep_last: int = 0
1498
-
1499
-
1500
- class ToolKnowledgeSaveRequest(BaseModel):
1501
- content: str
1502
- folder: str = "00_Raw"
1503
- title: Optional[str] = None
1504
-
1505
-
1506
- class ToolKnowledgeSearchRequest(BaseModel):
1507
- query: str
1508
- max_results: int = 5
1509
-
1510
-
1511
- class ToolDocxRequest(BaseModel):
1512
- title: str = ""
1513
- body: str = ""
1514
- filename: str = "document.docx"
1515
-
1516
-
1517
- class ToolXlsxRequest(BaseModel):
1518
- rows: List[List] = []
1519
- filename: str = "spreadsheet.xlsx"
1520
- sheet_name: str = "Sheet1"
1521
-
1522
-
1523
- class ToolPptxRequest(BaseModel):
1524
- title: str = ""
1525
- slides: List[Dict] = []
1526
- filename: str = "presentation.pptx"
1527
-
1528
-
1529
- class ToolPdfRequest(BaseModel):
1530
- title: str = ""
1531
- body: str = ""
1532
- filename: str = "document.pdf"
1533
-
1534
-
1535
- class LocalAccessRequest(BaseModel):
1536
- path: str
1537
- approved: bool = False
1538
- approval_token: Optional[str] = None
1539
-
1540
-
1541
- class LocalWriteRequest(BaseModel):
1542
- path: str
1543
- content: str
1544
- approved: bool = False
1545
- approval_token: Optional[str] = None
1546
-
1547
-
1548
-
1549
- class ToolGitDiffRequest(BaseModel):
1550
- path: Optional[str] = None
1551
- cwd: Optional[str] = "."
1552
-
1553
-
1554
- class ToolGitLogRequest(BaseModel):
1555
- max_count: int = 5
1556
- cwd: Optional[str] = "."
1557
-
1558
-
1559
- class ToolGitShowRequest(BaseModel):
1560
- revision: str = "HEAD"
1561
- cwd: Optional[str] = "."
1562
-
1563
-
1564
- # ── Workspace OS 1.0 API ─────────────────────────────────────────────────────
1128
+ # ── Workspace OS API ──────────────────────────────────────────────────────────
1565
1129
 
1566
1130
  def _workspace_settings_payload() -> Dict:
1567
1131
  return {
@@ -1625,3750 +1189,108 @@ app.include_router(create_workspace_router(
1625
1189
 
1626
1190
  # ── Health & Info ──────────────────────────────────────────────────────────────
1627
1191
 
1628
- ENGINE_INSTALLERS = {
1629
- "local_mlx": {
1630
- "command": [sys.executable, "-m", "pip", "install", "--upgrade", "mlx-lm", "mlx-vlm", "huggingface_hub[cli]"],
1631
- "label": "Install MLX runtime",
1632
- },
1633
- "openai": {
1634
- "command": [sys.executable, "-m", "pip", "install", "openai"],
1635
- "label": "Install OpenAI-compatible SDK",
1636
- },
1637
- "openrouter": {
1638
- "command": [sys.executable, "-m", "pip", "install", "openai"],
1639
- "label": "Install OpenAI-compatible SDK",
1640
- },
1641
- "groq": {
1642
- "command": [sys.executable, "-m", "pip", "install", "openai"],
1643
- "label": "Install OpenAI-compatible SDK",
1644
- },
1645
- "together": {
1646
- "command": [sys.executable, "-m", "pip", "install", "openai"],
1647
- "label": "Install OpenAI-compatible SDK",
1648
- },
1649
- "xai": {
1650
- "command": [sys.executable, "-m", "pip", "install", "openai"],
1651
- "label": "Install OpenAI-compatible SDK",
1652
- },
1653
- "ollama": {
1654
- "command": ["brew", "install", "ollama"],
1655
- "label": "Install Ollama",
1656
- "requires_binary": "brew",
1657
- },
1658
- "vllm": {
1659
- "command": [sys.executable, "-m", "pip", "install", "vllm", "huggingface_hub[cli]"],
1660
- "label": "Install vLLM runtime",
1661
- },
1662
- "lmstudio": {
1663
- "command": ["brew", "install", "--cask", "lm-studio"],
1664
- "label": "Install LM Studio",
1665
- "requires_binary": "brew",
1666
- },
1667
- "llamacpp": {
1668
- "command": ["brew", "install", "llama.cpp"],
1669
- "label": "Install llama.cpp",
1670
- "requires_binary": "brew",
1671
- },
1672
- }
1673
-
1674
- ENGINE_MODEL_CATALOG = {
1675
- "local_mlx": [
1676
- {"id": "mlx-community/SmolLM-1.7B-Instruct-4bit", "name": "SmolLM 1.7B", "family": "SmolLM", "tag": "local-light", "size": "963MB", "pullable": True},
1677
- {"id": "mlx-community/gemma-3-1b-it-4bit", "name": "Gemma 3 1B", "family": "Gemma 3", "tag": "local-light", "size": "733MB", "pullable": True},
1678
- {"id": "mlx-community/Llama-3.2-1B-Instruct-4bit", "name": "Llama 3.2 1B", "family": "Llama 3.x", "tag": "local-light", "size": "1.3GB", "pullable": True},
1679
- {"id": "mlx-community/gemma-2-2b-it-4bit", "name": "Gemma 2 2B", "family": "Gemma 2", "tag": "local-light", "size": "1.6GB", "pullable": True},
1680
- {"id": "mlx-community/gemma-4-e2b-4bit", "name": "Gemma 4 E2B Base", "family": "Gemma 4", "tag": "local-vlm", "size": "3.6GB", "pullable": True},
1681
- {"id": "mlx-community/gemma-4-e2b-it-4bit", "name": "Gemma 4 E2B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "3.6GB", "pullable": True},
1682
- {"id": "mlx-community/gemma-4-e4b-4bit", "name": "Gemma 4 E4B Base", "family": "Gemma 4", "tag": "local-vlm", "size": "5.2GB", "pullable": True},
1683
- {"id": "mlx-community/gemma-4-e4b-it-4bit", "name": "Gemma 4 E4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "5.2GB", "pullable": True},
1684
- {"id": "mlx-community/Qwen3-VL-4B-Instruct-4bit", "name": "Qwen3-VL 4B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "2.7GB", "pullable": True},
1685
- {"id": "mlx-community/Qwen3-VL-8B-Instruct-4bit", "name": "Qwen3-VL 8B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "4.8GB", "pullable": True},
1686
- {"id": "mlx-community/Qwen2.5-VL-7B-Instruct-4bit", "name": "Qwen2.5-VL 7B", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "4.4GB", "pullable": True},
1687
- {"id": "mlx-community/gemma-3-4b-it-4bit", "name": "Gemma 3 4B", "family": "Gemma 3", "tag": "local-vlm", "size": "3.3GB", "pullable": True},
1688
- {"id": "mlx-community/Llama-3.2-3B-Instruct-4bit", "name": "Llama 3.2 3B", "family": "Llama 3.x", "tag": "local-general", "size": "2.0GB", "pullable": True},
1689
- {"id": "mlx-community/Llama-3.1-8B-Instruct-4bit", "name": "Llama 3.1 8B", "family": "Llama 3.1", "tag": "local-general", "size": "4.7GB", "pullable": True},
1690
- {"id": "mlx-community/gemma-2-9b-it-4bit", "name": "Gemma 2 9B", "family": "Gemma 2", "tag": "local-general", "size": "5.4GB", "pullable": True},
1691
- {"id": "mlx-community/gemma-3-12b-it-4bit", "name": "Gemma 3 12B", "family": "Gemma 3", "tag": "local-vlm", "size": "8.0GB", "pullable": True},
1692
- {"id": "mlx-community/Phi-3.5-mini-instruct-4bit", "name": "Phi 3.5 Mini", "family": "Phi", "tag": "local-coding", "size": "2.2GB", "pullable": True},
1693
- {"id": "mlx-community/Phi-4-mini-instruct-4bit", "name": "Phi 4 Mini", "family": "Phi", "tag": "local-coding", "size": "2.2GB", "pullable": True},
1694
- {"id": "mlx-community/phi-4-4bit", "name": "Phi 4", "family": "Phi", "tag": "local-coding", "size": "8.3GB", "pullable": True},
1695
- {"id": "mlx-community/Mistral-7B-Instruct-v0.3-4bit", "name": "Mistral 7B Instruct v0.3", "family": "Mistral", "tag": "local-general", "size": "4.1GB", "pullable": True},
1696
- {"id": "mlx-community/Ministral-8B-Instruct-2410-4bit", "name": "Ministral 8B Instruct", "family": "Mistral", "tag": "local-general", "size": "4.5GB", "pullable": True},
1697
- {"id": "mlx-community/Mistral-Small-24B-Instruct-2501-4bit", "name": "Mistral Small 24B", "family": "Mistral", "tag": "local-large", "size": "13.3GB", "pullable": True},
1698
- {"id": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", "name": "Qwen2.5 Coder 32B", "family": "Qwen2.5", "tag": "local-coding", "size": "18.5GB", "pullable": True},
1699
- {"id": "mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit", "name": "Qwen3-VL 30B A3B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "18GB", "pullable": True},
1700
- {"id": "mlx-community/gemma-3-27b-it-4bit", "name": "Gemma 3 27B", "family": "Gemma 3", "tag": "local-vlm", "size": "17GB", "pullable": True},
1701
- {"id": "mlx-community/gemma-4-26b-a4b-it-4bit", "name": "Gemma 4 26B A4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "15.6GB", "pullable": True},
1702
- {"id": "mlx-community/gemma-4-31b-it-4bit", "name": "Gemma 4 31B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "18.4GB", "pullable": True},
1703
- {"id": "mlx-community/gpt-oss-20b-MXFP4-Q8", "name": "GPT-OSS 20B", "family": "GPT-OSS", "tag": "local-reasoning", "size": "12.1GB", "pullable": True},
1704
- {"id": "mlx-community/gpt-oss-120b-MXFP4-Q4", "name": "GPT-OSS 120B", "family": "GPT-OSS", "tag": "local-large", "size": "62.3GB", "pullable": True},
1705
- {"id": "mlx-community/Llama-3.3-70B-Instruct-4bit", "name": "Llama 3.3 70B", "family": "Llama 3.x", "tag": "local-general", "size": "40GB+", "pullable": True},
1706
- {"id": "mlx-community/Llama-3.1-70B-Instruct-4bit", "name": "Llama 3.1 70B", "family": "Llama 3.1", "tag": "local-general", "size": "40GB+", "pullable": True},
1707
- ],
1708
- "ollama": [
1709
- {"id": "ollama:qwen3-vl:4b", "name": "Qwen3-VL 4B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
1710
- {"id": "ollama:qwen3-vl:8b", "name": "Qwen3-VL 8B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
1711
- {"id": "ollama:qwen3-vl:30b", "name": "Qwen3-VL 30B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
1712
- {"id": "ollama:gpt-oss:20b", "name": "GPT-OSS 20B via Ollama", "family": "GPT-OSS", "tag": "local-reasoning", "size": "pull required", "pullable": True},
1713
- {"id": "ollama:gpt-oss:120b", "name": "GPT-OSS 120B via Ollama", "family": "GPT-OSS", "tag": "local-large", "size": "pull required", "pullable": True},
1714
- {"id": "ollama:hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M", "name": "Gemma 4 31B Q4 via Ollama", "family": "Gemma 4", "tag": "local-vlm", "size": "18.7GB", "pullable": True},
1715
- {"id": "ollama:qwen3:8b", "name": "Qwen3 8B via Ollama", "family": "Qwen", "tag": "local-server", "size": "pull required", "pullable": True},
1716
- {"id": "ollama:qwen2.5-coder:14b", "name": "Qwen2.5 Coder 14B via Ollama", "family": "Qwen", "tag": "local-coding", "size": "pull required", "pullable": True},
1717
- {"id": "ollama:gemma3:1b", "name": "Gemma 3 1B via Ollama", "family": "Gemma", "tag": "local-light", "size": "pull required", "pullable": True},
1718
- {"id": "ollama:gemma3:4b", "name": "Gemma 3 4B via Ollama", "family": "Gemma", "tag": "local-server", "size": "pull required", "pullable": True},
1719
- {"id": "ollama:gemma3:4b-it-q4_K_M", "name": "Gemma 3 4B q4_K_M via Ollama", "family": "Gemma", "tag": "quantized", "size": "pull required", "pullable": True},
1720
- {"id": "ollama:gemma3:12b", "name": "Gemma 3 12B via Ollama", "family": "Gemma", "tag": "local-server", "size": "pull required", "pullable": True},
1721
- {"id": "ollama:gemma3:12b-it-q4_K_M", "name": "Gemma 3 12B q4_K_M via Ollama", "family": "Gemma", "tag": "quantized", "size": "pull required", "pullable": True},
1722
- {"id": "ollama:gemma3:27b", "name": "Gemma 3 27B via Ollama", "family": "Gemma", "tag": "local-large", "size": "pull required", "pullable": True},
1723
- {"id": "ollama:llama3.2:1b", "name": "Llama 3.2 1B via Ollama", "family": "Llama 3.x", "tag": "local-light", "size": "pull required", "pullable": True},
1724
- {"id": "ollama:llama3.2:3b", "name": "Llama 3.2 3B via Ollama", "family": "Llama 3.x", "tag": "local-server", "size": "pull required", "pullable": True},
1725
- {"id": "ollama:llama3.1:8b", "name": "Llama 3.1 8B via Ollama", "family": "Llama 3.1", "tag": "local-server", "size": "pull required", "pullable": True},
1726
- {"id": "ollama:llama3.1:8b-instruct-q4_0", "name": "Llama 3.1 8B q4_0 via Ollama", "family": "Llama 3.1", "tag": "quantized", "size": "pull required", "pullable": True},
1727
- {"id": "ollama:llama3.1:8b-instruct-q8_0", "name": "Llama 3.1 8B q8_0 via Ollama", "family": "Llama 3.1", "tag": "quantized", "size": "pull required", "pullable": True},
1728
- {"id": "ollama:llama3.1:70b", "name": "Llama 3.1 70B via Ollama", "family": "Llama 3.1", "tag": "local-server", "size": "pull required", "pullable": True},
1729
- {"id": "ollama:llama3.3:70b", "name": "Llama 3.3 70B via Ollama", "family": "Llama 3.x", "tag": "local-large", "size": "pull required", "pullable": True},
1730
- {"id": "ollama:mistral:7b", "name": "Mistral 7B via Ollama", "family": "Mistral", "tag": "local-server", "size": "pull required", "pullable": True},
1731
- {"id": "ollama:mixtral:8x7b", "name": "Mixtral 8x7B via Ollama", "family": "Mistral", "tag": "local-large", "size": "pull required", "pullable": True},
1732
- {"id": "ollama:phi4-mini", "name": "Phi 4 Mini via Ollama", "family": "Phi", "tag": "local-coding", "size": "pull required", "pullable": True},
1733
- {"id": "ollama:phi4", "name": "Phi 4 via Ollama", "family": "Phi", "tag": "local-coding", "size": "pull required", "pullable": True},
1734
- {"id": "ollama:smollm2:1.7b", "name": "SmolLM2 1.7B via Ollama", "family": "SmolLM", "tag": "local-light", "size": "pull required", "pullable": True},
1735
- ],
1736
- "vllm": [
1737
- {"id": "vllm:openai/gpt-oss-20b", "name": "GPT-OSS 20B via vLLM", "family": "GPT-OSS", "tag": "local-reasoning", "size": "server model", "pullable": True},
1738
- {"id": "vllm:openai/gpt-oss-120b", "name": "GPT-OSS 120B via vLLM", "family": "GPT-OSS", "tag": "local-large", "size": "server model", "pullable": True},
1739
- {"id": "vllm:Qwen/Qwen3-VL-4B-Instruct", "name": "Qwen3-VL 4B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
1740
- {"id": "vllm:Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen3-VL 8B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
1741
- {"id": "vllm:Qwen/Qwen3-VL-30B-A3B-Instruct", "name": "Qwen3-VL 30B A3B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
1742
- {"id": "vllm:Qwen/Qwen2.5-VL-7B-Instruct", "name": "Qwen2.5-VL 7B via vLLM", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
1743
- {"id": "vllm:google/gemma-2-2b", "name": "Gemma 2 2B Base via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
1744
- {"id": "vllm:google/gemma-2-2b-it", "name": "Gemma 2 2B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
1745
- {"id": "vllm:google/gemma-2-9b", "name": "Gemma 2 9B Base via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
1746
- {"id": "vllm:google/gemma-2-9b-it", "name": "Gemma 2 9B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
1747
- {"id": "vllm:google/gemma-3-4b-it", "name": "Gemma 3 4B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
1748
- {"id": "vllm:google/gemma-3-12b-it", "name": "Gemma 3 12B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
1749
- {"id": "vllm:microsoft/Phi-3.5-mini-instruct", "name": "Phi 3.5 Mini via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
1750
- {"id": "vllm:microsoft/Phi-4-mini-instruct", "name": "Phi 4 Mini via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
1751
- {"id": "vllm:microsoft/phi-4", "name": "Phi 4 via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
1752
- {"id": "vllm:mistralai/Mistral-7B-Instruct-v0.3", "name": "Mistral 7B via vLLM", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
1753
- {"id": "vllm:mistralai/Ministral-8B-Instruct-2410", "name": "Ministral 8B via vLLM", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
1754
- {"id": "vllm:mistralai/Mistral-Small-24B-Instruct-2501", "name": "Mistral Small 24B via vLLM", "family": "Mistral", "tag": "local-large", "size": "server model", "pullable": True},
1755
- {"id": "vllm:meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B via vLLM", "family": "Llama 3.x", "tag": "local-server", "size": "server model", "pullable": True},
1756
- {"id": "vllm:meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B via vLLM", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
1757
- {"id": "vllm:meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B via vLLM", "family": "Llama 3.x", "tag": "local-large", "size": "server model", "pullable": True},
1758
- {"id": "vllm:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B via vLLM", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
1759
- ],
1760
- "lmstudio": [
1761
- {"id": "lmstudio:openai/gpt-oss-20b", "name": "GPT-OSS 20B via LM Studio", "family": "GPT-OSS", "tag": "local-reasoning", "size": "server model", "pullable": True},
1762
- {"id": "lmstudio:openai/gpt-oss-120b", "name": "GPT-OSS 120B via LM Studio", "family": "GPT-OSS", "tag": "local-large", "size": "server model", "pullable": True},
1763
- {"id": "lmstudio:ggml-org/gemma-4-31B-it-GGUF", "name": "Gemma 4 31B 4-bit via LM Studio", "family": "Gemma 4", "tag": "local-vlm", "size": "server model", "pullable": True},
1764
- {"id": "lmstudio:Qwen/Qwen3-VL-4B-Instruct", "name": "Qwen3-VL 4B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
1765
- {"id": "lmstudio:Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen3-VL 8B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
1766
- {"id": "lmstudio:Qwen/Qwen3-VL-30B-A3B-Instruct", "name": "Qwen3-VL 30B A3B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
1767
- {"id": "lmstudio:Qwen/Qwen2.5-VL-7B-Instruct", "name": "Qwen2.5-VL 7B via LM Studio", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
1768
- {"id": "lmstudio:google/gemma-2-2b-it", "name": "Gemma 2 2B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
1769
- {"id": "lmstudio:google/gemma-2-9b-it", "name": "Gemma 2 9B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
1770
- {"id": "lmstudio:google/gemma-3-4b-it", "name": "Gemma 3 4B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
1771
- {"id": "lmstudio:google/gemma-3-12b-it", "name": "Gemma 3 12B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
1772
- {"id": "lmstudio:microsoft/Phi-3.5-mini-instruct", "name": "Phi 3.5 Mini via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
1773
- {"id": "lmstudio:microsoft/Phi-4-mini-instruct", "name": "Phi 4 Mini via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
1774
- {"id": "lmstudio:microsoft/phi-4", "name": "Phi 4 via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
1775
- {"id": "lmstudio:mistralai/Mistral-7B-Instruct-v0.3", "name": "Mistral 7B via LM Studio", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
1776
- {"id": "lmstudio:mistralai/Ministral-8B-Instruct-2410", "name": "Ministral 8B via LM Studio", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
1777
- {"id": "lmstudio:mistralai/Mistral-Small-24B-Instruct-2501", "name": "Mistral Small 24B via LM Studio", "family": "Mistral", "tag": "local-large", "size": "server model", "pullable": True},
1778
- {"id": "lmstudio:meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B via LM Studio", "family": "Llama 3.x", "tag": "local-server", "size": "server model", "pullable": True},
1779
- {"id": "lmstudio:meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B via LM Studio", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
1780
- {"id": "lmstudio:meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B via LM Studio", "family": "Llama 3.x", "tag": "local-large", "size": "server model", "pullable": True},
1781
- {"id": "lmstudio:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B via LM Studio", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
1782
- ],
1783
- "llamacpp": [
1784
- {"id": "llamacpp:ggml-org/gpt-oss-20b-GGUF", "name": "GPT-OSS 20B GGUF via llama.cpp", "family": "GPT-OSS", "tag": "gguf-q4", "size": "gguf", "pullable": True},
1785
- {"id": "llamacpp:ggml-org/gpt-oss-120b-GGUF", "name": "GPT-OSS 120B GGUF via llama.cpp", "family": "GPT-OSS", "tag": "gguf-q4", "size": "gguf", "pullable": True},
1786
- {"id": "llamacpp:ggml-org/gemma-4-31B-it-GGUF", "name": "Gemma 4 31B GGUF via llama.cpp", "family": "Gemma 4", "tag": "gguf-q4", "size": "gguf", "pullable": True},
1787
- {"id": "llamacpp:Qwen/Qwen3-VL-4B-Instruct-GGUF", "name": "Qwen3-VL 4B GGUF via llama.cpp", "family": "Qwen3-VL", "tag": "gguf-vlm", "size": "gguf", "pullable": True},
1788
- {"id": "llamacpp:Qwen/Qwen3-VL-8B-Instruct-GGUF", "name": "Qwen3-VL 8B GGUF via llama.cpp", "family": "Qwen3-VL", "tag": "gguf-vlm", "size": "gguf", "pullable": True},
1789
- {"id": "llamacpp:unsloth/gemma-2-2b-it-GGUF", "name": "Gemma 2 2B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
1790
- {"id": "llamacpp:unsloth/gemma-2-9b-it-GGUF", "name": "Gemma 2 9B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
1791
- {"id": "llamacpp:unsloth/gemma-3-4b-it-GGUF", "name": "Gemma 3 4B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
1792
- {"id": "llamacpp:bartowski/Mistral-7B-Instruct-v0.3-GGUF", "name": "Mistral 7B GGUF via llama.cpp", "family": "Mistral", "tag": "gguf-q4", "size": "gguf", "pullable": True},
1793
- {"id": "llamacpp:bartowski/Phi-3.5-mini-instruct-GGUF", "name": "Phi 3.5 Mini GGUF via llama.cpp", "family": "Phi", "tag": "gguf-q4", "size": "gguf", "pullable": True},
1794
- {"id": "llamacpp:bartowski/phi-4-GGUF", "name": "Phi 4 GGUF via llama.cpp", "family": "Phi", "tag": "gguf-q4", "size": "gguf", "pullable": True},
1795
- {"id": "llamacpp:bartowski/Llama-3.2-3B-Instruct-GGUF", "name": "Llama 3.2 3B GGUF via llama.cpp", "family": "Llama 3.x", "tag": "gguf-q4", "size": "gguf", "pullable": True},
1796
- {"id": "llamacpp:bartowski/Llama-3.1-8B-Instruct-GGUF", "name": "Llama 3.1 8B GGUF via llama.cpp", "family": "Llama 3.1", "tag": "local-server", "size": "gguf", "pullable": True},
1797
- {"id": "llamacpp:bartowski/Llama-3.3-70B-Instruct-GGUF", "name": "Llama 3.3 70B GGUF via llama.cpp", "family": "Llama 3.x", "tag": "local-large", "size": "gguf", "pullable": True},
1798
- {"id": "llamacpp:bartowski/Llama-3.1-70B-Instruct-GGUF", "name": "Llama 3.1 70B GGUF via llama.cpp", "family": "Llama 3.1", "tag": "local-server", "size": "gguf", "pullable": True},
1799
- ],
1800
- }
1801
-
1802
- MODEL_ENGINE_ALIASES = {
1803
- "gpt-oss-20b": {
1804
- "local_mlx": "mlx-community/gpt-oss-20b-MXFP4-Q8",
1805
- "ollama": "gpt-oss:20b",
1806
- "vllm": "openai/gpt-oss-20b",
1807
- "lmstudio": "openai/gpt-oss-20b",
1808
- "llamacpp": "ggml-org/gpt-oss-20b-GGUF",
1809
- },
1810
- "openai/gpt-oss-20b": {
1811
- "local_mlx": "mlx-community/gpt-oss-20b-MXFP4-Q8",
1812
- "ollama": "gpt-oss:20b",
1813
- "vllm": "openai/gpt-oss-20b",
1814
- "lmstudio": "openai/gpt-oss-20b",
1815
- "llamacpp": "ggml-org/gpt-oss-20b-GGUF",
1816
- },
1817
- "gpt-oss-120b": {
1818
- "local_mlx": "mlx-community/gpt-oss-120b-MXFP4-Q4",
1819
- "ollama": "gpt-oss:120b",
1820
- "vllm": "openai/gpt-oss-120b",
1821
- "lmstudio": "openai/gpt-oss-120b",
1822
- "llamacpp": "ggml-org/gpt-oss-120b-GGUF",
1823
- },
1824
- "openai/gpt-oss-120b": {
1825
- "local_mlx": "mlx-community/gpt-oss-120b-MXFP4-Q4",
1826
- "ollama": "gpt-oss:120b",
1827
- "vllm": "openai/gpt-oss-120b",
1828
- "lmstudio": "openai/gpt-oss-120b",
1829
- "llamacpp": "ggml-org/gpt-oss-120b-GGUF",
1830
- },
1831
- "gemma-4-31b-it-4bit": {
1832
- "local_mlx": "mlx-community/gemma-4-31b-it-4bit",
1833
- "ollama": "hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M",
1834
- "vllm": "suitch/gemma-4-31B-it-4bit",
1835
- "lmstudio": "ggml-org/gemma-4-31B-it-GGUF",
1836
- "llamacpp": "ggml-org/gemma-4-31B-it-GGUF",
1837
- },
1838
- "suitch/gemma-4-31b-it-4bit": {
1839
- "local_mlx": "mlx-community/gemma-4-31b-it-4bit",
1840
- "ollama": "hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M",
1841
- "vllm": "suitch/gemma-4-31B-it-4bit",
1842
- "lmstudio": "ggml-org/gemma-4-31B-it-GGUF",
1843
- "llamacpp": "ggml-org/gemma-4-31B-it-GGUF",
1844
- },
1845
- "mlx-community/gemma-4-31b-it-4bit": {
1846
- "local_mlx": "mlx-community/gemma-4-31b-it-4bit",
1847
- "ollama": "hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M",
1848
- "vllm": "suitch/gemma-4-31B-it-4bit",
1849
- "lmstudio": "ggml-org/gemma-4-31B-it-GGUF",
1850
- "llamacpp": "ggml-org/gemma-4-31B-it-GGUF",
1851
- },
1852
- }
1853
-
1854
- _VERSIONED_MODEL_PATTERNS = (
1855
- ("gemma", re.compile(r"\bgemma[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
1856
- ("qwen", re.compile(r"\bqwen[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
1857
- ("llama", re.compile(r"\bllama[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
1858
- ("phi", re.compile(r"\bphi[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
1192
+ # ── Model runtime/provider helpers moved to latticeai.services.model_runtime ──
1193
+ # ── Health / status / engine-summary router (latticeai.api.health, v1.2.0) ───
1194
+ # /health, /mode, /runtime_features, /engines(GET) now live in the health router.
1195
+ # Heavier engine mutation endpoints remain below in server_app.
1196
+ MODEL_SERVICE = ModelService(
1197
+ model_router=router,
1198
+ runtime_features=runtime_features,
1199
+ is_public=IS_PUBLIC_MODE,
1859
1200
  )
1201
+ app.include_router(create_health_router(
1202
+ model_service=MODEL_SERVICE,
1203
+ engine_status=engine_status,
1204
+ get_current_user=get_current_user,
1205
+ require_auth=REQUIRE_AUTH,
1206
+ app_version=APP_VERSION,
1207
+ app_mode=APP_MODE,
1208
+ ))
1860
1209
 
1861
1210
 
1862
- def _version_tuple(raw: str) -> tuple[int, ...]:
1863
- return tuple(int(part) for part in raw.split(".") if part.isdigit())
1864
-
1865
-
1866
- def _model_family_version(model: Dict[str, object]) -> Optional[tuple[str, tuple[int, ...]]]:
1867
- text = " ".join(str(model.get(key) or "") for key in ("family", "name", "id"))
1868
- for family, pattern in _VERSIONED_MODEL_PATTERNS:
1869
- match = pattern.search(text)
1870
- if match:
1871
- version = _version_tuple(match.group(1))
1872
- if version:
1873
- return family, version
1874
- return None
1875
-
1876
-
1877
- def filter_lower_family_versions(models: List[Dict[str, object]]) -> List[Dict[str, object]]:
1878
- max_versions: Dict[str, tuple[int, ...]] = {}
1879
- detected: List[tuple[Dict[str, object], Optional[tuple[str, tuple[int, ...]]]]] = []
1880
- for model in models:
1881
- version_info = _model_family_version(model)
1882
- detected.append((model, version_info))
1883
- if not version_info:
1884
- continue
1885
- family, version = version_info
1886
- if version > max_versions.get(family, (0,)):
1887
- max_versions[family] = version
1888
- return [
1889
- model for model, version_info in detected
1890
- if not version_info or version_info[1] >= max_versions.get(version_info[0], version_info[1])
1891
- ]
1892
-
1893
- def _update_env_file(env_file: Path, key: str, value: str) -> None:
1894
- lines = []
1895
- found = False
1896
- if env_file.exists():
1897
- for line in env_file.read_text(encoding="utf-8").splitlines():
1898
- if line.startswith(f"{key}="):
1899
- lines.append(f"{key}={value}")
1900
- found = True
1901
- else:
1902
- lines.append(line)
1903
- if not found:
1904
- lines.append(f"{key}={value}")
1905
- env_file.write_text("\n".join(lines) + "\n", encoding="utf-8")
1906
-
1907
-
1908
- LOCAL_SERVER_PROCESSES: Dict[str, subprocess.Popen] = {}
1909
- VLLM_METAL_ENV = Path.home() / ".venv-vllm-metal"
1910
- VLLM_METAL_BIN = VLLM_METAL_ENV / "bin" / "vllm"
1911
- VLLM_METAL_PYTHON = VLLM_METAL_ENV / "bin" / "python"
1912
- LMSTUDIO_BUNDLED_CLI = Path("/Applications/LM Studio.app/Contents/Resources/app/.webpack/lms")
1913
-
1914
- def windows_binary_candidates(binary: str) -> List[Path]:
1915
- local_appdata = os.environ.get("LOCALAPPDATA", "")
1916
- program_files = os.environ.get("ProgramFiles", r"C:\Program Files")
1917
- program_files_x86 = os.environ.get("ProgramFiles(x86)", r"C:\Program Files (x86)")
1918
- candidates = {
1919
- "ollama": [
1920
- Path(local_appdata) / "Programs" / "Ollama" / "ollama.exe" if local_appdata else None,
1921
- Path(program_files) / "Ollama" / "ollama.exe",
1922
- ],
1923
- "lms": [
1924
- Path(local_appdata) / "Programs" / "LM Studio" / "resources" / "app" / ".webpack" / "lms.exe" if local_appdata else None,
1925
- Path(program_files) / "LM Studio" / "resources" / "app" / ".webpack" / "lms.exe",
1926
- ],
1927
- "nvidia-smi": [
1928
- Path(program_files) / "NVIDIA Corporation" / "NVSMI" / "nvidia-smi.exe",
1929
- Path(program_files_x86) / "NVIDIA Corporation" / "NVSMI" / "nvidia-smi.exe",
1930
- ],
1931
- }
1932
- return [item for item in candidates.get(binary, []) if item is not None]
1933
-
1934
-
1935
- def local_binary(binary: str) -> Optional[str]:
1936
- found = shutil.which(binary)
1937
- if found:
1938
- return found
1939
- if platform.system() == "Windows":
1940
- for candidate in windows_binary_candidates(binary):
1941
- if candidate.exists():
1942
- return str(candidate)
1943
- return None
1944
-
1945
-
1946
- def find_lmstudio_cli() -> Optional[str]:
1947
- cli = local_binary("lms")
1948
- if cli:
1949
- return cli
1950
- if LMSTUDIO_BUNDLED_CLI.exists():
1951
- return str(LMSTUDIO_BUNDLED_CLI)
1952
- return None
1953
-
1954
-
1955
- def vllm_executable() -> Optional[str]:
1956
- found = shutil.which("vllm")
1957
- if found:
1958
- return found
1959
- if VLLM_METAL_BIN.exists():
1960
- return str(VLLM_METAL_BIN)
1961
- return None
1962
-
1963
-
1964
- def vllm_metal_python() -> Optional[str]:
1965
- if VLLM_METAL_PYTHON.exists():
1966
- return str(VLLM_METAL_PYTHON)
1967
- return None
1968
-
1969
-
1970
- def _json_request(
1971
- url: str,
1972
- *,
1973
- method: str = "GET",
1974
- payload: Optional[Dict[str, object]] = None,
1975
- headers: Optional[Dict[str, str]] = None,
1976
- timeout: float = 10.0,
1977
- ) -> Dict[str, object]:
1978
- data = None
1979
- req_headers = dict(headers or {})
1980
- if payload is not None:
1981
- data = json.dumps(payload).encode("utf-8")
1982
- req_headers.setdefault("Content-Type", "application/json")
1983
- req = urllib.request.Request(url, data=data, headers=req_headers, method=method)
1984
- with urllib.request.urlopen(req, timeout=timeout) as res:
1985
- raw = res.read().decode("utf-8", errors="replace")
1986
- if not raw.strip():
1987
- return {}
1988
- return json.loads(raw)
1989
-
1990
-
1991
- def lmstudio_api_base() -> str:
1992
- return (os.getenv("LMSTUDIO_BASE_URL") or OPENAI_COMPATIBLE_PROVIDERS["lmstudio"]["base_url"]).rstrip("/")
1993
-
1211
+ # ── Model / Engine router (latticeai.api.models, v1.3.0) ─────────────────────
1212
+ app.include_router(create_models_router(
1213
+ model_router=router,
1214
+ require_user=require_user,
1215
+ get_current_user=get_current_user,
1216
+ load_users=load_users,
1217
+ get_user_role=get_user_role,
1218
+ install_engine=install_engine,
1219
+ verify_cloud_models=verify_cloud_models,
1220
+ normalize_local_model_request=normalize_local_model_request,
1221
+ download_hf_model=download_hf_model,
1222
+ prepare_and_load_model=prepare_and_load_model,
1223
+ prepare_and_load_model_stream=prepare_and_load_model_stream,
1224
+ sse_event=sse_event,
1225
+ ensure_ollama_server=ensure_ollama_server,
1226
+ local_binary=local_binary,
1227
+ engine_status=engine_status,
1228
+ filter_lower_family_versions=filter_lower_family_versions,
1229
+ list_compat_profiles=_list_compat_profiles,
1230
+ set_user_api_key=set_user_api_key,
1231
+ engine_model_catalog=ENGINE_MODEL_CATALOG,
1232
+ model_engine_aliases=MODEL_ENGINE_ALIASES,
1233
+ cloud_verify_ttl_seconds=CLOUD_VERIFY_TTL_SECONDS,
1234
+ is_public_mode=IS_PUBLIC_MODE,
1235
+ allow_local_models=ALLOW_LOCAL_MODELS,
1236
+ require_auth=REQUIRE_AUTH,
1237
+ ))
1994
1238
 
1995
- def lmstudio_native_api_base() -> str:
1996
- base = lmstudio_api_base()
1997
- return base[:-3] if base.endswith("/v1") else base
1998
1239
 
1240
+ # ── Chat / Completion ──────────────────────────────────────────────────────────
1999
1241
 
2000
- def ensure_lmstudio_server() -> None:
2001
- base_url = lmstudio_native_api_base()
2002
- try:
2003
- _json_request(f"{base_url}/api/v1/models", headers={"Authorization": "Bearer lmstudio"}, timeout=2.5)
2004
- return
2005
- except Exception:
2006
- pass
1242
+ app.include_router(create_chat_router(
1243
+ config=CONFIG,
1244
+ model_router=router,
1245
+ chat_service=CHAT_SERVICE,
1246
+ workspace_store=WORKSPACE_OS,
1247
+ workspace_graph=_workspace_graph,
1248
+ gardener=gardener,
1249
+ require_user=require_user,
1250
+ enforce_rate_limit=enforce_rate_limit,
1251
+ get_history_user=get_history_user,
1252
+ save_to_history=save_to_history,
1253
+ append_audit_event=append_audit_event,
1254
+ clear_history=clear_history,
1255
+ clear_conversation=clear_conversation,
1256
+ get_history=get_history,
1257
+ group_history_conversations=group_history_conversations,
1258
+ get_conversation_messages=get_conversation_messages,
1259
+ conversation_title=conversation_title,
1260
+ load_users=load_users,
1261
+ get_user_role=get_user_role,
1262
+ enable_graph=ENABLE_GRAPH,
1263
+ knowledge_graph=KNOWLEDGE_GRAPH,
1264
+ public_model=PUBLIC_MODEL,
1265
+ base_dir=BASE_DIR,
1266
+ ))
2007
1267
 
2008
- cli = find_lmstudio_cli()
2009
- if not cli:
2010
- raise HTTPException(status_code=400, detail="LM Studio CLI를 찾지 못했습니다. LM Studio를 설치한 뒤 다시 시도하세요.")
2011
-
2012
- try:
2013
- subprocess.Popen(
2014
- [cli, "server", "start"],
2015
- stdout=subprocess.DEVNULL,
2016
- stderr=subprocess.DEVNULL,
2017
- start_new_session=True,
2018
- )
2019
- except Exception as e:
2020
- raise HTTPException(status_code=500, detail=f"LM Studio 서버 시작 실패: {e}")
2021
-
2022
- deadline = time.time() + 45
2023
- while time.time() < deadline:
2024
- try:
2025
- _json_request(f"{base_url}/api/v1/models", headers={"Authorization": "Bearer lmstudio"}, timeout=2.5)
2026
- return
2027
- except Exception:
2028
- time.sleep(1)
2029
- raise HTTPException(status_code=500, detail="LM Studio Local Server를 자동으로 시작하지 못했습니다.")
2030
-
2031
-
2032
- _LMSTUDIO_MODELS_CACHE: List[Dict[str, object]] = []
2033
- _LMSTUDIO_MODELS_CACHE_TS: float = 0.0
2034
- _LMSTUDIO_MODELS_CACHE_TTL: float = 10.0
2035
-
2036
-
2037
- def get_lmstudio_models(*, force: bool = False) -> List[Dict[str, object]]:
2038
- global _LMSTUDIO_MODELS_CACHE, _LMSTUDIO_MODELS_CACHE_TS
2039
- if not force and time.monotonic() - _LMSTUDIO_MODELS_CACHE_TS < _LMSTUDIO_MODELS_CACHE_TTL:
2040
- return _LMSTUDIO_MODELS_CACHE
2041
- try:
2042
- payload = _json_request(
2043
- f"{lmstudio_native_api_base()}/api/v1/models",
2044
- headers={"Authorization": f"Bearer {os.getenv('LMSTUDIO_API_KEY') or 'lmstudio'}"},
2045
- timeout=2.5,
2046
- )
2047
- except Exception:
2048
- return _LMSTUDIO_MODELS_CACHE
2049
- models = payload.get("models")
2050
- _LMSTUDIO_MODELS_CACHE = models if isinstance(models, list) else []
2051
- _LMSTUDIO_MODELS_CACHE_TS = time.monotonic()
2052
- return _LMSTUDIO_MODELS_CACHE
2053
-
2054
-
2055
- def _lmstudio_candidate_keys(model_name: str) -> List[str]:
2056
- raw = model_name.strip()
2057
- if not raw:
2058
- return []
2059
- slug = raw.split("/")[-1].lower()
2060
- slug = slug.replace("-gguf", "").replace("-awq", "")
2061
- parts = [p for p in slug.split("-") if p]
2062
- candidates = [raw.lower(), slug]
2063
- if parts:
2064
- candidates.append("-".join(parts[: min(4, len(parts))]))
2065
- return list(dict.fromkeys(candidates))
2066
-
2067
-
2068
- def _find_lmstudio_model_key(model_name: str, models: List[Dict[str, object]]) -> Optional[str]:
2069
- if not models:
2070
- return None
2071
- candidate_keys = _lmstudio_candidate_keys(model_name)
2072
- exact = []
2073
- fuzzy = []
2074
- for item in models:
2075
- if not isinstance(item, dict):
2076
- continue
2077
- key = str(item.get("key") or "").strip()
2078
- display_name = str(item.get("display_name") or "").strip()
2079
- haystacks = [key.lower(), display_name.lower()]
2080
- if any(raw == key.lower() for raw in candidate_keys):
2081
- exact.append(key)
2082
- continue
2083
- if any(token and token in hay for token in candidate_keys for hay in haystacks):
2084
- fuzzy.append(key)
2085
- return (exact or fuzzy or [None])[0]
2086
-
2087
-
2088
- def ensure_lmstudio_model(model_name: str) -> Dict[str, object]:
2089
- ensure_lmstudio_server()
2090
- auth_header = {"Authorization": f"Bearer {os.getenv('LMSTUDIO_API_KEY') or 'lmstudio'}"}
2091
- models = get_lmstudio_models()
2092
- found_key = _find_lmstudio_model_key(model_name, models)
2093
- model_key = found_key or model_name
2094
-
2095
- if not found_key:
2096
- try:
2097
- job = _json_request(
2098
- f"{lmstudio_native_api_base()}/api/v1/models/download",
2099
- method="POST",
2100
- payload={"model": model_name},
2101
- headers=auth_header,
2102
- timeout=30,
2103
- )
2104
- except urllib.error.HTTPError as e:
2105
- detail = e.read().decode("utf-8", errors="replace")[-2000:]
2106
- raise HTTPException(status_code=500, detail=f"LM Studio 모델 다운로드 실패: {detail or e.reason}")
2107
- except Exception as e:
2108
- raise HTTPException(status_code=500, detail=f"LM Studio 모델 다운로드 실패: {e}")
2109
-
2110
- status = str(job.get("status") or "")
2111
- job_id = str(job.get("job_id") or "")
2112
- if status not in {"completed", "already_downloaded"} and job_id:
2113
- deadline = time.time() + 3600
2114
- while time.time() < deadline:
2115
- polled = _json_request(
2116
- f"{lmstudio_native_api_base()}/api/v1/models/download/status/{job_id}",
2117
- headers=auth_header,
2118
- timeout=30,
2119
- )
2120
- polled_status = str(polled.get("status") or "")
2121
- if polled_status == "completed":
2122
- break
2123
- if polled_status == "failed":
2124
- raise HTTPException(status_code=500, detail=f"LM Studio 모델 다운로드 실패: {polled}")
2125
- time.sleep(2)
2126
- else:
2127
- raise HTTPException(status_code=408, detail="LM Studio 모델 다운로드 시간이 초과되었습니다.")
2128
-
2129
- models = get_lmstudio_models(force=True)
2130
- model_key = _find_lmstudio_model_key(model_name, models) or model_name
2131
-
2132
- target = next((item for item in models if isinstance(item, dict) and item.get("key") == model_key), None)
2133
- loaded_instances = target.get("loaded_instances") if isinstance(target, dict) else None
2134
- if loaded_instances:
2135
- return {"provider": "lmstudio", "model": model_name, "resolved_model": model_key, "server_ready": True, "cached": True}
2136
-
2137
- try:
2138
- loaded = _json_request(
2139
- f"{lmstudio_native_api_base()}/api/v1/models/load",
2140
- method="POST",
2141
- payload={"model": model_key, "context_length": 4096},
2142
- headers=auth_header,
2143
- timeout=120,
2144
- )
2145
- except urllib.error.HTTPError as e:
2146
- detail = e.read().decode("utf-8", errors="replace")[-2000:]
2147
- raise HTTPException(status_code=500, detail=f"LM Studio 모델 로드 실패: {detail or e.reason}")
2148
- except Exception as e:
2149
- raise HTTPException(status_code=500, detail=f"LM Studio 모델 로드 실패: {e}")
2150
-
2151
- if str(loaded.get("status") or "") != "loaded":
2152
- raise HTTPException(status_code=500, detail=f"LM Studio 모델 로드 실패: {loaded}")
2153
-
2154
- return {
2155
- "provider": "lmstudio",
2156
- "model": model_name,
2157
- "resolved_model": model_key,
2158
- "instance_id": loaded.get("instance_id"),
2159
- "server_ready": True,
2160
- "cached": False,
2161
- }
2162
-
2163
- def engine_support_status(engine: str) -> Dict[str, object]:
2164
- if engine != "vllm":
2165
- return {"supported": True, "reason": None}
2166
- is_apple_silicon = sys.platform == "darwin" and platform.machine() == "arm64"
2167
- if sys.platform.startswith("win"):
2168
- return {"supported": False, "reason": "vLLM은 Windows native 자동 설치보다 WSL2/Linux 환경을 권장합니다."}
2169
- if sys.platform == "darwin" and not is_apple_silicon:
2170
- return {"supported": False, "reason": "vLLM Metal 자동 설치는 Apple Silicon macOS에서만 지원됩니다."}
2171
- if sys.version_info >= (3, 13) and is_apple_silicon:
2172
- return {"supported": True, "reason": "현재 환경에서는 vLLM Metal 전용 런타임으로 설치합니다."}
2173
- if sys.version_info >= (3, 13):
2174
- return {"supported": False, "reason": "vLLM 설치는 현재 Python 3.13 이하 또는 별도 전용 런타임이 필요합니다."}
2175
- return {"supported": True, "reason": None}
2176
-
2177
- def hf_model_ready(repo_id: str, provider: str = "local_mlx") -> bool:
2178
- model_dir = hf_model_dir(repo_id)
2179
- if provider == "vllm" and (not model_dir.exists() or not model_dir.is_dir()):
2180
- hf_cache_repo = Path.home() / ".cache" / "huggingface" / "hub" / f"models--{repo_id.replace('/', '--')}"
2181
- if hf_cache_repo.exists() and any(hf_cache_repo.glob("snapshots/*")):
2182
- return True
2183
- return False
2184
- if not model_dir.exists() or not model_dir.is_dir():
2185
- return False
2186
- if provider == "llamacpp":
2187
- return any(model_dir.rglob("*.gguf"))
2188
- has_config = (model_dir / "config.json").exists()
2189
- has_weights = any(model_dir.glob("*.safetensors")) or any(model_dir.glob("*.bin"))
2190
- has_tokenizer = (
2191
- (model_dir / "tokenizer.json").exists()
2192
- or (model_dir / "tokenizer.model").exists()
2193
- or (model_dir / "tokenizer_config.json").exists()
2194
- )
2195
- return has_config and has_weights and has_tokenizer
2196
-
2197
-
2198
- def model_download_progress_payload(
2199
- stage: str,
2200
- message: str,
2201
- *,
2202
- percent: Optional[float] = None,
2203
- detail: Optional[str] = None,
2204
- downloaded_bytes: Optional[int] = None,
2205
- total_bytes: Optional[int] = None,
2206
- eta_seconds: Optional[float] = None,
2207
- file: Optional[str] = None,
2208
- indeterminate: bool = False,
2209
- ) -> Dict[str, object]:
2210
- payload: Dict[str, object] = {
2211
- "stage": stage,
2212
- "message": message,
2213
- "indeterminate": indeterminate,
2214
- "ts": time.time(),
2215
- }
2216
- if percent is not None:
2217
- payload["percent"] = max(0, min(100, round(float(percent), 1)))
2218
- if detail:
2219
- payload["detail"] = detail
2220
- if downloaded_bytes is not None:
2221
- payload["downloaded_bytes"] = max(0, int(downloaded_bytes))
2222
- if total_bytes is not None:
2223
- payload["total_bytes"] = max(0, int(total_bytes))
2224
- if eta_seconds is not None:
2225
- payload["eta_seconds"] = max(0, round(float(eta_seconds)))
2226
- if file:
2227
- payload["file"] = file
2228
- return payload
2229
-
2230
-
2231
- def estimate_eta_seconds(started_at: float, percent: Optional[float]) -> Optional[float]:
2232
- if percent is None or percent <= 0 or percent >= 100:
2233
- return None
2234
- elapsed = max(0.0, time.time() - started_at)
2235
- return elapsed * (100.0 - percent) / percent
2236
-
2237
-
2238
- def hf_repo_files_with_sizes(repo_id: str) -> List[Dict[str, object]]:
2239
- from huggingface_hub import HfApi
2240
-
2241
- api = HfApi()
2242
- try:
2243
- info = api.model_info(repo_id, files_metadata=True)
2244
- files = []
2245
- for sibling in getattr(info, "siblings", []) or []:
2246
- name = str(getattr(sibling, "rfilename", "") or "").strip()
2247
- if not name or name.endswith("/"):
2248
- continue
2249
- files.append({"name": name, "size": int(getattr(sibling, "size", 0) or 0)})
2250
- if files:
2251
- return files
2252
- except TypeError:
2253
- pass
2254
- except Exception as e:
2255
- logging.warning("huggingface model_info failed for %s: %s", repo_id, e)
2256
-
2257
- return [{"name": str(name), "size": 0} for name in api.list_repo_files(repo_id) if str(name).strip()]
2258
-
2259
-
2260
- def download_hf_model(
2261
- repo_id: str,
2262
- provider: str = "local_mlx",
2263
- progress_emit=None,
2264
- ) -> Dict[str, object]:
2265
- if importlib.util.find_spec("huggingface_hub") is None:
2266
- raise HTTPException(status_code=400, detail="huggingface_hub가 없습니다. 먼저 MLX runtime 설치를 진행해 주세요.")
2267
-
2268
- target_dir = hf_model_dir(repo_id)
2269
- if hf_model_ready(repo_id, provider):
2270
- if progress_emit:
2271
- progress_emit(model_download_progress_payload(
2272
- "download",
2273
- "이미 다운로드된 모델을 확인했습니다.",
2274
- percent=100,
2275
- downloaded_bytes=0,
2276
- total_bytes=0,
2277
- eta_seconds=0,
2278
- ))
2279
- return {"model": repo_id, "path": str(target_dir), "cached": True}
2280
-
2281
- target_dir.mkdir(parents=True, exist_ok=True)
2282
- try:
2283
- from huggingface_hub import hf_hub_download
2284
-
2285
- started_at = time.time()
2286
- all_files = hf_repo_files_with_sizes(repo_id)
2287
- if provider == "llamacpp":
2288
- ggufs = sorted(
2289
- [item for item in all_files if str(item["name"]).lower().endswith(".gguf")],
2290
- key=lambda item: str(item["name"]),
2291
- )
2292
- if not ggufs:
2293
- raise RuntimeError("GGUF 파일을 찾지 못했습니다.")
2294
- preference = ("q4_k_m", "q4_0", "q4_k_s", "q3_k_m", "q2_k")
2295
- selected_files = [
2296
- next(
2297
- (item for pref in preference for item in ggufs if pref in str(item["name"]).lower()),
2298
- ggufs[0],
2299
- )
2300
- ]
2301
- else:
2302
- selected_files = all_files
2303
-
2304
- total_bytes = sum(int(item.get("size") or 0) for item in selected_files) or None
2305
- downloaded_bytes = 0
2306
- total_files = max(1, len(selected_files))
2307
- if progress_emit:
2308
- progress_emit(model_download_progress_payload(
2309
- "download",
2310
- "모델 파일 정보를 확인했습니다.",
2311
- percent=0,
2312
- downloaded_bytes=0,
2313
- total_bytes=total_bytes,
2314
- indeterminate=total_bytes is None,
2315
- ))
2316
-
2317
- for index, item in enumerate(selected_files, start=1):
2318
- filename = str(item["name"])
2319
- size = int(item.get("size") or 0)
2320
- tqdm_class = None
2321
- if progress_emit:
2322
- current_percent = (
2323
- (downloaded_bytes / total_bytes) * 100 if total_bytes else ((index - 1) / total_files) * 100
2324
- )
2325
- progress_emit(model_download_progress_payload(
2326
- "download",
2327
- "모델 다운로드 중입니다.",
2328
- percent=current_percent,
2329
- detail=filename,
2330
- downloaded_bytes=downloaded_bytes,
2331
- total_bytes=total_bytes,
2332
- eta_seconds=estimate_eta_seconds(started_at, current_percent),
2333
- file=filename,
2334
- indeterminate=total_bytes is None and total_files <= 1,
2335
- ))
2336
- try:
2337
- from tqdm.auto import tqdm as base_tqdm
2338
-
2339
- downloaded_before = downloaded_bytes
2340
- last_emit = {"at": 0.0, "percent": -1.0}
2341
-
2342
- def emit_byte_progress(done_bytes: float) -> None:
2343
- done = max(0, int(done_bytes or 0))
2344
- if total_bytes:
2345
- aggregate = min(total_bytes, downloaded_before + done)
2346
- percent = (aggregate / total_bytes) * 100
2347
- else:
2348
- file_total = size or done
2349
- file_ratio = min(1.0, done / file_total) if file_total else 0.0
2350
- aggregate = downloaded_before + done
2351
- percent = ((index - 1) + file_ratio) / total_files * 100
2352
- now = time.time()
2353
- if percent < 100 and now - last_emit["at"] < 0.5 and percent - last_emit["percent"] < 0.3:
2354
- return
2355
- last_emit["at"] = now
2356
- last_emit["percent"] = percent
2357
- progress_emit(model_download_progress_payload(
2358
- "download",
2359
- "모델 다운로드 중입니다.",
2360
- percent=percent,
2361
- detail=filename,
2362
- downloaded_bytes=aggregate,
2363
- total_bytes=total_bytes,
2364
- eta_seconds=estimate_eta_seconds(started_at, percent),
2365
- file=filename,
2366
- indeterminate=total_bytes is None and total_files <= 1,
2367
- ))
2368
-
2369
- class ProgressTqdm(base_tqdm):
2370
- def update(self, n=1):
2371
- result = super().update(n)
2372
- emit_byte_progress(float(getattr(self, "n", 0) or 0))
2373
- return result
2374
-
2375
- tqdm_class = ProgressTqdm
2376
- except Exception:
2377
- tqdm_class = None
2378
- local_path = hf_hub_download(
2379
- repo_id=repo_id,
2380
- filename=filename,
2381
- local_dir=str(target_dir),
2382
- tqdm_class=tqdm_class,
2383
- )
2384
- if size <= 0:
2385
- try:
2386
- size = Path(local_path).stat().st_size
2387
- except OSError:
2388
- size = 0
2389
- downloaded_bytes += size
2390
- if progress_emit:
2391
- current_percent = (
2392
- (downloaded_bytes / total_bytes) * 100 if total_bytes else (index / total_files) * 100
2393
- )
2394
- progress_emit(model_download_progress_payload(
2395
- "download",
2396
- "모델 다운로드 중입니다.",
2397
- percent=current_percent,
2398
- detail=filename,
2399
- downloaded_bytes=downloaded_bytes,
2400
- total_bytes=total_bytes,
2401
- eta_seconds=estimate_eta_seconds(started_at, current_percent),
2402
- file=filename,
2403
- indeterminate=False,
2404
- ))
2405
-
2406
- if progress_emit:
2407
- progress_emit(model_download_progress_payload(
2408
- "download",
2409
- "모델 다운로드가 완료되었습니다.",
2410
- percent=100,
2411
- downloaded_bytes=downloaded_bytes,
2412
- total_bytes=total_bytes or downloaded_bytes,
2413
- eta_seconds=0,
2414
- ))
2415
- except Exception as e:
2416
- raise HTTPException(status_code=500, detail=f"{repo_id} 다운로드 실패: {str(e)[-2000:]}")
2417
-
2418
- if not hf_model_ready(repo_id, provider):
2419
- raise HTTPException(status_code=500, detail=f"{repo_id} 다운로드가 완료되지 않았습니다. 모델 파일을 찾지 못했습니다.")
2420
-
2421
- return {"model": repo_id, "path": str(target_dir), "cached": False}
2422
-
2423
-
2424
- def pull_ollama_model_with_progress(model_name: str, progress_emit=None) -> Dict[str, object]:
2425
- ollama = local_binary("ollama")
2426
- if not ollama:
2427
- raise HTTPException(status_code=400, detail="Ollama가 설치되지 않았습니다.")
2428
- started_at = time.time()
2429
- if progress_emit:
2430
- progress_emit(model_download_progress_payload(
2431
- "download",
2432
- "Ollama 모델 다운로드를 시작합니다.",
2433
- percent=0,
2434
- detail=model_name,
2435
- indeterminate=True,
2436
- ))
2437
- process = subprocess.Popen(
2438
- [ollama, "pull", model_name],
2439
- stdout=subprocess.PIPE,
2440
- stderr=subprocess.STDOUT,
2441
- text=True,
2442
- bufsize=1,
2443
- )
2444
- last_percent: Optional[float] = None
2445
- lines: List[str] = []
2446
- try:
2447
- assert process.stdout is not None
2448
- for raw_line in process.stdout:
2449
- for part in re.split(r"[\r\n]+", raw_line):
2450
- line = part.strip()
2451
- if not line:
2452
- continue
2453
- lines.append(line)
2454
- match = re.search(r"(\d{1,3}(?:\.\d+)?)\s*%", line)
2455
- if match:
2456
- last_percent = min(100.0, float(match.group(1)))
2457
- if progress_emit:
2458
- progress_emit(model_download_progress_payload(
2459
- "download",
2460
- "Ollama 모델 다운로드 중입니다.",
2461
- percent=last_percent,
2462
- detail=line[-180:],
2463
- eta_seconds=estimate_eta_seconds(started_at, last_percent),
2464
- indeterminate=False,
2465
- ))
2466
- elif progress_emit:
2467
- progress_emit(model_download_progress_payload(
2468
- "download",
2469
- "Ollama 모델 다운로드 중입니다.",
2470
- percent=last_percent,
2471
- detail=line[-180:],
2472
- eta_seconds=estimate_eta_seconds(started_at, last_percent),
2473
- indeterminate=last_percent is None,
2474
- ))
2475
- returncode = process.wait()
2476
- except Exception:
2477
- process.kill()
2478
- raise
2479
-
2480
- if returncode != 0:
2481
- tail = "\n".join(lines[-12:])
2482
- raise HTTPException(status_code=500, detail=tail[-2000:] or "Ollama 모델 다운로드 실패")
2483
-
2484
- if progress_emit:
2485
- progress_emit(model_download_progress_payload(
2486
- "download",
2487
- "Ollama 모델 다운로드가 완료되었습니다.",
2488
- percent=100,
2489
- detail=model_name,
2490
- eta_seconds=0,
2491
- indeterminate=False,
2492
- ))
2493
- return {"provider": "ollama", "model": model_name, "returncode": returncode}
2494
-
2495
-
2496
- def get_ollama_pulled_models() -> set:
2497
- ollama = local_binary("ollama")
2498
- if not ollama:
2499
- return set()
2500
- try:
2501
- result = subprocess.run([ollama, "list"], capture_output=True, text=True, timeout=5, check=False)
2502
- pulled = set()
2503
- for line in result.stdout.splitlines()[1:]:
2504
- parts = line.split()
2505
- if parts:
2506
- pulled.add(parts[0])
2507
- return pulled
2508
- except Exception:
2509
- return set()
2510
-
2511
-
2512
- def get_openai_compatible_server_models(provider: str) -> List[str]:
2513
- if provider == "lmstudio":
2514
- models = []
2515
- for item in get_lmstudio_models():
2516
- if not isinstance(item, dict):
2517
- continue
2518
- key = str(item.get("key") or "").strip()
2519
- loaded_instances = item.get("loaded_instances") or []
2520
- if loaded_instances:
2521
- instance_ids = [
2522
- str(instance.get("id") or "").strip()
2523
- for instance in loaded_instances
2524
- if isinstance(instance, dict) and instance.get("id")
2525
- ]
2526
- models.extend(instance_ids or ([key] if key else []))
2527
- return list(dict.fromkeys([model for model in models if model]))
2528
-
2529
- config = OPENAI_COMPATIBLE_PROVIDERS.get(provider) or {}
2530
- base_url = os.getenv(config.get("base_url_env", "")) if config.get("base_url_env") else None
2531
- base_url = (base_url or config.get("base_url") or "").rstrip("/")
2532
- if not base_url:
2533
- return []
2534
-
2535
- api_key = os.getenv(config.get("env_key", "")) or config.get("api_key_fallback") or provider
2536
- req = urllib.request.Request(
2537
- f"{base_url}/models",
2538
- headers={"Authorization": f"Bearer {api_key}"},
2539
- method="GET",
2540
- )
2541
- try:
2542
- with urllib.request.urlopen(req, timeout=2.5) as res:
2543
- payload = json.loads(res.read().decode("utf-8", errors="replace"))
2544
- except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError):
2545
- return []
2546
-
2547
- models = []
2548
- for item in payload.get("data") or []:
2549
- model_id = item.get("id") if isinstance(item, dict) else None
2550
- if model_id:
2551
- models.append(str(model_id))
2552
- return models
2553
-
2554
-
2555
- def ensure_ollama_server() -> None:
2556
- ollama = local_binary("ollama")
2557
- if not ollama:
2558
- raise HTTPException(status_code=400, detail="Ollama가 설치되지 않았습니다.")
2559
- try:
2560
- probe = subprocess.run([ollama, "list"], capture_output=True, text=True, timeout=3, check=False)
2561
- if probe.returncode == 0:
2562
- return
2563
- except Exception:
2564
- pass
2565
- subprocess.Popen(
2566
- [ollama, "serve"],
2567
- stdout=subprocess.DEVNULL,
2568
- stderr=subprocess.DEVNULL,
2569
- start_new_session=True,
2570
- )
2571
- deadline = time.time() + 20
2572
- while time.time() < deadline:
2573
- try:
2574
- probe = subprocess.run([ollama, "list"], capture_output=True, text=True, timeout=3, check=False)
2575
- if probe.returncode == 0:
2576
- return
2577
- except Exception:
2578
- pass
2579
- time.sleep(0.5)
2580
- raise HTTPException(status_code=500, detail="Ollama 서버를 자동으로 시작하지 못했습니다.")
2581
-
2582
-
2583
- def wait_for_openai_compatible_server(provider: str, model_name: Optional[str] = None, timeout: int = 45) -> bool:
2584
- deadline = time.time() + timeout
2585
- while time.time() < deadline:
2586
- models = get_openai_compatible_server_models(provider)
2587
- if models and (not model_name or model_name in models):
2588
- return True
2589
- time.sleep(1)
2590
- return False
2591
-
2592
-
2593
- def ensure_vllm_server(model_name: str) -> None:
2594
- served_models = get_openai_compatible_server_models("vllm")
2595
- if model_name in served_models:
2596
- return
2597
- vllm_bin = vllm_executable()
2598
- vllm_metal_py = vllm_metal_python()
2599
- if not vllm_bin and not vllm_metal_py and importlib.util.find_spec("vllm") is None:
2600
- raise HTTPException(status_code=400, detail="vLLM runtime이 설치되지 않았습니다.")
2601
-
2602
- local_dir = hf_model_dir(model_name)
2603
- if not vllm_metal_py and not hf_model_ready(model_name, "vllm"):
2604
- download_hf_model(model_name, "vllm")
2605
-
2606
- running = LOCAL_SERVER_PROCESSES.get("vllm")
2607
- if running and running.poll() is None:
2608
- running.terminate()
2609
- try:
2610
- running.wait(timeout=10)
2611
- except subprocess.TimeoutExpired:
2612
- running.kill()
2613
- elif served_models:
2614
- raise HTTPException(status_code=409, detail="다른 vLLM 서버가 이미 실행 중입니다. 현재 서버를 종료한 뒤 다시 시도하세요.")
2615
-
2616
- running = LOCAL_SERVER_PROCESSES.get("vllm")
2617
- if running and running.poll() is None:
2618
- return
2619
-
2620
- _host_args = ["--host", "127.0.0.1", "--port", "8000"]
2621
- if vllm_metal_py:
2622
- command = [vllm_metal_py, "-m", "vllm_metal.server", "--model", model_name, *_host_args]
2623
- elif vllm_bin:
2624
- command = [vllm_bin, "serve", str(local_dir), "--served-model-name", model_name, *_host_args]
2625
- else:
2626
- command = [sys.executable, "-m", "vllm.entrypoints.openai.api_server", "--model", str(local_dir), "--served-model-name", model_name, *_host_args]
2627
- LOCAL_SERVER_PROCESSES["vllm"] = subprocess.Popen(
2628
- command,
2629
- stdout=subprocess.DEVNULL,
2630
- stderr=subprocess.DEVNULL,
2631
- start_new_session=True,
2632
- )
2633
- if not wait_for_openai_compatible_server("vllm", model_name, timeout=90):
2634
- raise HTTPException(status_code=500, detail="vLLM 서버가 모델을 자동 로드하지 못했습니다.")
2635
-
2636
-
2637
- def ensure_llamacpp_server(model_name: str) -> None:
2638
- served_models = get_openai_compatible_server_models("llamacpp")
2639
- if model_name in served_models:
2640
- return
2641
- running = LOCAL_SERVER_PROCESSES.get("llamacpp")
2642
- if running and running.poll() is None:
2643
- running.terminate()
2644
- try:
2645
- running.wait(timeout=10)
2646
- except subprocess.TimeoutExpired:
2647
- running.kill()
2648
- elif served_models:
2649
- raise HTTPException(status_code=409, detail="다른 llama.cpp 서버가 이미 실행 중입니다. 현재 서버를 종료한 뒤 다시 시도하세요.")
2650
- if not shutil.which("llama-server"):
2651
- raise HTTPException(status_code=400, detail="llama.cpp가 설치되지 않았습니다.")
2652
- if not hf_model_ready(model_name, "llamacpp"):
2653
- download_hf_model(model_name, "llamacpp")
2654
-
2655
- gguf_files = sorted(hf_model_dir(model_name).rglob("*.gguf"))
2656
- if not gguf_files:
2657
- raise HTTPException(status_code=500, detail="다운로드된 GGUF 파일을 찾지 못했습니다.")
2658
-
2659
- preferred = next((p for p in gguf_files if "q4_k_m" in p.name.lower()), None)
2660
- model_file = preferred or gguf_files[0]
2661
- LOCAL_SERVER_PROCESSES["llamacpp"] = subprocess.Popen(
2662
- [
2663
- "llama-server",
2664
- "-m",
2665
- str(model_file),
2666
- "--alias",
2667
- model_name,
2668
- "--host",
2669
- "127.0.0.1",
2670
- "--port",
2671
- "8080",
2672
- ],
2673
- stdout=subprocess.DEVNULL,
2674
- stderr=subprocess.DEVNULL,
2675
- start_new_session=True,
2676
- )
2677
- if not wait_for_openai_compatible_server("llamacpp", model_name, timeout=45):
2678
- raise HTTPException(status_code=500, detail="llama.cpp 서버가 모델을 자동 로드하지 못했습니다.")
2679
-
2680
-
2681
- def engine_installed(engine: str) -> bool:
2682
- if engine == "local_mlx":
2683
- return bool(importlib.util.find_spec("mlx") and importlib.util.find_spec("mlx_lm"))
2684
- if engine == "ollama":
2685
- return local_binary("ollama") is not None
2686
- if engine == "vllm":
2687
- return vllm_metal_python() is not None or vllm_executable() is not None or importlib.util.find_spec("vllm") is not None
2688
- if engine == "lmstudio":
2689
- return find_lmstudio_cli() is not None or Path("/Applications/LM Studio.app").exists()
2690
- if engine == "llamacpp":
2691
- return shutil.which("llama-server") is not None
2692
- if engine in {"openai", "openrouter", "groq", "together", "xai"}:
2693
- return AsyncOpenAI is not None
2694
- return False
2695
-
2696
- def engine_status() -> List[Dict]:
2697
- cloud_models = router.detected_cloud_models()
2698
- cloud_by_provider = {}
2699
- for model in cloud_models:
2700
- cloud_by_provider.setdefault(model["provider"], []).append(model)
2701
-
2702
- ollama_installed = engine_installed("ollama")
2703
- pulled = get_ollama_pulled_models() if ollama_installed else set()
2704
- ollama_models = []
2705
- for m in ENGINE_MODEL_CATALOG["ollama"]:
2706
- pull_name = m["id"].removeprefix("ollama:")
2707
- ollama_models.append({**m, "pulled": pull_name in pulled})
2708
- ollama_models = filter_lower_family_versions(ollama_models)
2709
-
2710
- HF_MODELS_ROOT.mkdir(parents=True, exist_ok=True)
2711
- mlx_models = []
2712
- for m in ENGINE_MODEL_CATALOG.get("local_mlx", []):
2713
- repo_id = m["id"]
2714
- mlx_models.append({**m, "pulled": hf_model_ready(repo_id, "local_mlx")})
2715
- mlx_models = filter_lower_family_versions(mlx_models)
2716
-
2717
- vllm_models = []
2718
- for m in ENGINE_MODEL_CATALOG.get("vllm", []):
2719
- repo_id = m["id"].removeprefix("vllm:")
2720
- vllm_models.append({**m, "pulled": hf_model_ready(repo_id, "vllm")})
2721
- vllm_models = filter_lower_family_versions(vllm_models)
2722
-
2723
- lmstudio_models = []
2724
- downloaded_lmstudio = get_lmstudio_models()
2725
- downloaded_by_key = {}
2726
- for item in downloaded_lmstudio:
2727
- if not isinstance(item, dict):
2728
- continue
2729
- key = str(item.get("key") or "").strip()
2730
- if not key:
2731
- continue
2732
- downloaded_by_key[key] = item
2733
- loaded_instances = item.get("loaded_instances") or []
2734
- lmstudio_models.append({
2735
- "id": f"lmstudio:{key}",
2736
- "name": item.get("display_name") or f"LM Studio · {key}",
2737
- "family": item.get("architecture") or item.get("publisher") or "LM Studio",
2738
- "tag": "loaded-server-model" if loaded_instances else "downloaded",
2739
- "size": item.get("params_string") or item.get("format") or "LM Studio",
2740
- "pullable": True,
2741
- "pulled": True,
2742
- })
2743
-
2744
- if not lmstudio_models:
2745
- for m in ENGINE_MODEL_CATALOG.get("lmstudio", []):
2746
- lmstudio_models.append({**m, "pulled": False})
2747
- else:
2748
- known_ids = {item["id"] for item in lmstudio_models}
2749
- for m in ENGINE_MODEL_CATALOG.get("lmstudio", []):
2750
- repo_id = m["id"].removeprefix("lmstudio:")
2751
- if f"lmstudio:{repo_id}" not in known_ids and repo_id not in downloaded_by_key:
2752
- lmstudio_models.append({**m, "pulled": False})
2753
- lmstudio_models = filter_lower_family_versions(lmstudio_models)
2754
-
2755
- llamacpp_models = []
2756
- for m in ENGINE_MODEL_CATALOG.get("llamacpp", []):
2757
- repo_id = m["id"].removeprefix("llamacpp:")
2758
- llamacpp_models.append({**m, "pulled": hf_model_ready(repo_id, "llamacpp")})
2759
- llamacpp_models = filter_lower_family_versions(llamacpp_models)
2760
-
2761
- local_server_specs = [
2762
- {
2763
- "id": "vllm",
2764
- "name": "vLLM",
2765
- "description": "vLLM OpenAI 호환 서버(예: http://localhost:8000/v1)에 연결합니다.",
2766
- "requires": "VLLM_BASE_URL",
2767
- "note": engine_support_status("vllm").get("reason"),
2768
- },
2769
- {
2770
- "id": "lmstudio",
2771
- "name": "LM Studio",
2772
- "description": "LM Studio 로컬 OpenAI 호환 서버에 연결합니다.",
2773
- "requires": "LMSTUDIO_BASE_URL",
2774
- "note": (
2775
- "다운로드된 모델은 자동 감지하고, 선택 시 필요하면 다운로드 후 바로 로드합니다."
2776
- if downloaded_lmstudio else
2777
- "LM Studio 설치 후 모델을 선택하면 Local Server 시작, 다운로드, 로드를 자동으로 진행합니다."
2778
- ),
2779
- "server_ready": bool(downloaded_lmstudio),
2780
- },
2781
- {
2782
- "id": "llamacpp",
2783
- "name": "llama.cpp",
2784
- "description": "llama.cpp 서버(OpenAI 호환 /v1)에 연결합니다.",
2785
- "requires": "LLAMACPP_BASE_URL",
2786
- },
2787
- ]
2788
-
2789
- engines = [
2790
- {
2791
- "id": "local_mlx",
2792
- "name": "MLX",
2793
- "kind": "local",
2794
- "description": "Apple Silicon GPU에서 MLX/MLX-VLM 모델을 직접 실행합니다.",
2795
- "installed": engine_installed("local_mlx"),
2796
- "installable": True,
2797
- "install_label": ENGINE_INSTALLERS["local_mlx"]["label"],
2798
- "models": mlx_models,
2799
- },
2800
- {
2801
- "id": "ollama",
2802
- "name": "Ollama",
2803
- "kind": "local-server",
2804
- "description": "Ollama 로컬 서버를 OpenAI 호환 엔진처럼 사용합니다.",
2805
- "installed": ollama_installed,
2806
- "installable": True,
2807
- "install_label": ENGINE_INSTALLERS["ollama"]["label"],
2808
- "models": ollama_models,
2809
- },
2810
- ]
2811
- for spec in local_server_specs:
2812
- support = engine_support_status(spec["id"])
2813
- engines.append({
2814
- "id": spec["id"],
2815
- "name": spec["name"],
2816
- "kind": "local-server",
2817
- "description": spec["description"],
2818
- "installed": engine_installed(spec["id"]),
2819
- "supported": support["supported"],
2820
- "support_reason": support["reason"],
2821
- "installable": support["supported"] and spec["id"] in ENGINE_INSTALLERS,
2822
- "install_label": ENGINE_INSTALLERS.get(spec["id"], {}).get("label"),
2823
- "requires": spec["requires"],
2824
- "models": (
2825
- vllm_models if spec["id"] == "vllm"
2826
- else lmstudio_models if spec["id"] == "lmstudio"
2827
- else llamacpp_models if spec["id"] == "llamacpp"
2828
- else ENGINE_MODEL_CATALOG.get(spec["id"], [])
2829
- ),
2830
- "note": spec.get("note") or support["reason"] or f"{spec['requires']} 설정 시 활성화됩니다.",
2831
- "server_ready": spec.get("server_ready"),
2832
- })
2833
- for provider in ["openai", "openrouter", "groq", "together", "xai"]:
2834
- env_key = next((item.get("requires") for item in cloud_by_provider.get(provider, []) if item.get("requires")), None)
2835
- provider_models = []
2836
- for model in cloud_by_provider.get(provider, []):
2837
- cache = CLOUD_VERIFY_CACHE.get(model.get("id"))
2838
- provider_models.append({
2839
- **model,
2840
- "verified": cache.get("ok") if cache else None,
2841
- "verify_reason": cache.get("reason") if cache else None,
2842
- })
2843
- engines.append({
2844
- "id": provider,
2845
- "name": provider.title(),
2846
- "kind": "cloud",
2847
- "description": "OpenAI 호환 Chat Completions API로 cloud LLM을 실행합니다.",
2848
- "installed": engine_installed(provider),
2849
- "installable": True,
2850
- "install_label": ENGINE_INSTALLERS[provider]["label"],
2851
- "requires": env_key,
2852
- "models": provider_models,
2853
- })
2854
- return engines
2855
-
2856
- def runtime_features() -> Dict:
2857
- return {
2858
- "mode": APP_MODE,
2859
- "public": IS_PUBLIC_MODE,
2860
- "host": DEFAULT_HOST,
2861
- "port": DEFAULT_PORT,
2862
- "data_dir": str(DATA_DIR),
2863
- "telegram_enabled": ENABLE_TELEGRAM,
2864
- "graph_enabled": ENABLE_GRAPH,
2865
- "autoload_models": AUTOLOAD_MODELS,
2866
- "model_idle_unload_seconds": MODEL_IDLE_UNLOAD_SECONDS,
2867
- "model_memory_policy": router.model_memory_policy(),
2868
- "allow_local_models": ALLOW_LOCAL_MODELS,
2869
- "security": {
2870
- "host": DEFAULT_HOST,
2871
- "require_auth": REQUIRE_AUTH,
2872
- "invite_gate_enabled": INVITE_GATE_ENABLED,
2873
- "keyring_available": keyring is not None,
2874
- "plaintext_api_keys_allowed": ALLOW_PLAINTEXT_API_KEYS,
2875
- "cors_allow_network": CORS_ALLOW_NETWORK,
2876
- },
2877
- "default_model": PUBLIC_MODEL if IS_PUBLIC_MODE else LOCAL_MODEL,
2878
- "local_only_features": {
2879
- "mlx": ALLOW_LOCAL_MODELS and not IS_PUBLIC_MODE,
2880
- "telegram_bridge": ENABLE_TELEGRAM,
2881
- "desktop_chrome_bridge": not IS_PUBLIC_MODE,
2882
- "computer_use_bridge": not IS_PUBLIC_MODE,
2883
- },
2884
- "public_features": {
2885
- "web_ui": True,
2886
- "openai_compatible_models": True,
2887
- "persistent_data_dir": str(DATA_DIR),
2888
- },
2889
- }
2890
-
2891
- def install_engine(engine: str) -> Dict:
2892
- if engine not in ENGINE_INSTALLERS:
2893
- raise HTTPException(status_code=400, detail="지원하지 않는 엔진입니다.")
2894
- installer = ENGINE_INSTALLERS[engine]
2895
- required_binary = installer.get("requires_binary")
2896
- if required_binary and shutil.which(required_binary) is None:
2897
- raise HTTPException(status_code=400, detail=f"{required_binary}가 설치되어 있지 않아 자동 설치할 수 없습니다.")
2898
- command = installer["command"]
2899
- run_kwargs = {
2900
- "cwd": str(BASE_DIR),
2901
- "capture_output": True,
2902
- "text": True,
2903
- "timeout": 900,
2904
- "check": False,
2905
- }
2906
-
2907
- if engine == "vllm" and sys.platform == "darwin" and platform.machine() == "arm64":
2908
- command = [
2909
- "/bin/bash",
2910
- "-lc",
2911
- "set -euo pipefail; "
2912
- "if [ ! -x /opt/homebrew/bin/python3.12 ]; then brew install python@3.12; fi; "
2913
- "/opt/homebrew/bin/python3.12 -m venv ~/.venv-vllm-metal; "
2914
- "~/.venv-vllm-metal/bin/pip install -U pip setuptools wheel; "
2915
- "~/.venv-vllm-metal/bin/pip install vllm-metal",
2916
- ]
2917
- try:
2918
- completed = subprocess.run(command, **run_kwargs)
2919
- except subprocess.TimeoutExpired:
2920
- raise HTTPException(status_code=408, detail="엔진 설치 시간이 초과되었습니다.")
2921
- result = {
2922
- "engine": engine,
2923
- "command": " ".join(command),
2924
- "returncode": completed.returncode,
2925
- "stdout": completed.stdout[-12000:],
2926
- "stderr": completed.stderr[-12000:],
2927
- "installed": engine_installed(engine),
2928
- }
2929
- ollama = local_binary("ollama")
2930
- if engine == "ollama" and completed.returncode == 0 and ollama:
2931
- # Skip if already running to avoid orphan daemons.
2932
- already_up = False
2933
- try:
2934
- probe = subprocess.run([ollama, "list"], capture_output=True, timeout=2, check=False)
2935
- already_up = probe.returncode == 0
2936
- except Exception:
2937
- already_up = False
2938
- if already_up:
2939
- result["daemon_started"] = "already_running"
2940
- else:
2941
- try:
2942
- # Detach so the daemon survives this request but doesn't become our zombie.
2943
- subprocess.Popen(
2944
- [ollama, "serve"],
2945
- stdout=subprocess.DEVNULL,
2946
- stderr=subprocess.DEVNULL,
2947
- start_new_session=True,
2948
- )
2949
- result["daemon_started"] = True
2950
- except Exception as e:
2951
- logging.warning("ollama serve spawn failed: %s", e)
2952
- result["daemon_started"] = False
2953
- return result
2954
-
2955
-
2956
- def _resolve_model_alias(model_id: str, engine: Optional[str] = None) -> str:
2957
- raw = model_id.strip()
2958
- engine_hint = (engine or "").strip().lower()
2959
- provider: Optional[str] = None
2960
- model_name = raw
2961
- if ":" in raw:
2962
- prefix, rest = raw.split(":", 1)
2963
- prefix = prefix.strip().lower()
2964
- if prefix in {"ollama", "vllm", "lmstudio", "llamacpp", "local_mlx", "mlx"}:
2965
- provider = "local_mlx" if prefix in {"local_mlx", "mlx"} else prefix
2966
- model_name = rest.strip()
2967
- provider = provider or ("local_mlx" if engine_hint in {"", "local_mlx", "mlx"} else engine_hint)
2968
- aliases = MODEL_ENGINE_ALIASES.get(model_name.lower())
2969
- if not aliases:
2970
- return raw
2971
- mapped = aliases.get(provider)
2972
- if not mapped:
2973
- return raw
2974
- return mapped if provider == "local_mlx" else f"{provider}:{mapped}"
2975
-
2976
-
2977
- def normalize_local_model_request(model_id: str, engine: Optional[str] = None) -> str:
2978
- model_id = _resolve_model_alias(model_id, engine)
2979
- engine = (engine or "").strip().lower()
2980
- if engine in {"local_mlx", "mlx"} and model_id.startswith(("local_mlx:", "mlx:")):
2981
- return model_id.split(":", 1)[1].strip()
2982
- if engine and engine not in {"local_mlx", "mlx"} and ":" not in model_id:
2983
- return f"{engine}:{model_id}"
2984
- return model_id
2985
-
2986
-
2987
- def ensure_engine_ready(engine: str) -> Dict[str, object]:
2988
- engine = "local_mlx" if engine == "mlx" else engine
2989
- if engine not in ENGINE_INSTALLERS and engine not in OPENAI_COMPATIBLE_PROVIDERS:
2990
- raise HTTPException(status_code=400, detail=f"지원하지 않는 엔진입니다: {engine}")
2991
- support = engine_support_status(engine)
2992
- if not support["supported"]:
2993
- raise HTTPException(status_code=400, detail=str(support["reason"]))
2994
-
2995
- if engine_installed(engine):
2996
- if engine == "local_mlx":
2997
- ensure_mlx_runtime()
2998
- return {"engine": engine, "installed": True, "installed_now": False}
2999
-
3000
- if engine not in ENGINE_INSTALLERS:
3001
- raise HTTPException(status_code=400, detail=f"{engine} 엔진 설치 방법이 등록되어 있지 않습니다.")
3002
-
3003
- result = install_engine(engine)
3004
- if result.get("returncode") not in (0, None) or not engine_installed(engine):
3005
- detail = result.get("stderr") or result.get("stdout") or f"{engine} 설치에 실패했습니다."
3006
- raise HTTPException(status_code=500, detail=str(detail)[-2000:])
3007
-
3008
- if engine == "local_mlx":
3009
- ensure_mlx_runtime()
3010
- return {"engine": engine, "installed": True, "installed_now": True, "install": result}
3011
-
3012
-
3013
- def build_model_resolution(
3014
- input_id: str,
3015
- engine: Optional[str],
3016
- *,
3017
- user_email: Optional[str] = None,
3018
- display_name: Optional[str] = None,
3019
- ) -> _ModelResolution:
3020
- """피드백 #1/#2 공용 ModelResolution 생성기.
3021
-
3022
- 사용자가 클릭한 input_id + engine 힌트를 받아 모든 단계가 공유할
3023
- canonical identity를 만든다.
3024
- """
3025
- normalized = normalize_local_model_request(input_id, engine)
3026
- return _ModelResolution.from_request(
3027
- normalized,
3028
- engine=engine,
3029
- user_email=user_email,
3030
- display_name=display_name or input_id,
3031
- engine_aliases=MODEL_ENGINE_ALIASES,
3032
- )
3033
-
3034
-
3035
- _LOCAL_SMOKE_ENGINES = {"local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"}
3036
-
3037
-
3038
- async def _smoke_test_loaded_model(
3039
- resolution: _ModelResolution,
3040
- *,
3041
- api_key_override: Optional[str] = None,
3042
- ) -> Dict[str, object]:
3043
- """로드 직후 짧은 채팅 테스트를 돌려 ready_to_chat 여부를 판정한다.
3044
-
3045
- Cloud(OpenAI/Anthropic/OpenRouter 등) 모델은 사용자 비용 발생 가능성 때문에 skip.
3046
- 실패해도 예외를 던지지 않는다. 결과는 compat_cache에도 기록된다.
3047
- """
3048
- if (resolution.engine or "").lower() not in _LOCAL_SMOKE_ENGINES:
3049
- profile = _ensure_compat_profile(resolution.load_id, resolution.engine)
3050
- return {
3051
- "ok": True,
3052
- "reason": "skipped (cloud model — smoke test would incur cost)",
3053
- "answer": None,
3054
- "profile": profile.to_dict(),
3055
- "skipped": True,
3056
- }
3057
- try:
3058
- text = await asyncio.wait_for(
3059
- router.generate(
3060
- _SMOKE_PROMPT,
3061
- context=None,
3062
- max_tokens=128,
3063
- temperature=0.1,
3064
- ),
3065
- timeout=30,
3066
- )
3067
- except Exception as exc: # pragma: no cover - generator may not exist on all engines
3068
- reason = str(exc)[:200] or "generation_failed"
3069
- profile = _record_smoke_result(
3070
- resolution.load_id, resolution.engine, False, reason, status="failed"
3071
- )
3072
- return {
3073
- "ok": False,
3074
- "status": "failed",
3075
- "reason": reason,
3076
- "answer": None,
3077
- "profile": profile.to_dict(),
3078
- }
3079
-
3080
- profile = _ensure_compat_profile(resolution.load_id, resolution.engine)
3081
- cleaned = _compat_fast_postprocess(str(text or ""), profile.to_dict())
3082
- # item 3-3: ok / degraded / failed 3분류. degraded는 채팅은 가능하다.
3083
- status, reason = _classify_smoke_response(cleaned)
3084
- ok = status != "failed"
3085
- profile = _record_smoke_result(
3086
- resolution.load_id, resolution.engine, ok, reason, status=status
3087
- )
3088
- return {
3089
- "ok": ok,
3090
- "status": status,
3091
- "reason": reason,
3092
- "answer": cleaned,
3093
- "profile": profile.to_dict(),
3094
- }
3095
-
3096
-
3097
- async def prepare_and_load_model(
3098
- model_id: str,
3099
- request: Request,
3100
- engine: Optional[str] = None,
3101
- user_email: Optional[str] = None,
3102
- adapter_path: Optional[str] = None,
3103
- draft_model_id: Optional[str] = None,
3104
- ) -> Dict[str, object]:
3105
- model_id = normalize_local_model_request(model_id, engine)
3106
- if not model_id:
3107
- raise HTTPException(status_code=400, detail="모델 식별자가 비어 있습니다.")
3108
-
3109
- # 피드백 #1: ModelResolution을 모든 단계가 공유한다.
3110
- resolution = _ModelResolution.from_request(
3111
- model_id,
3112
- engine=engine,
3113
- user_email=user_email or get_current_user(request),
3114
- engine_aliases=MODEL_ENGINE_ALIASES,
3115
- )
3116
-
3117
- parsed_provider, parsed_model = parse_model_ref(model_id)
3118
- if parsed_provider == "mlx":
3119
- parsed_provider = "local_mlx"
3120
-
3121
- local_engines = {"local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"}
3122
- install_result: Dict[str, object] = {}
3123
- download_result: Optional[Dict[str, object]] = None
3124
-
3125
- if parsed_provider in local_engines:
3126
- install_result = ensure_engine_ready(parsed_provider)
3127
-
3128
- if parsed_provider == "local_mlx":
3129
- explicit_path = Path(parsed_model).expanduser()
3130
- if not explicit_path.exists() and not hf_model_ready(parsed_model, "local_mlx"):
3131
- download_result = download_hf_model(parsed_model, "local_mlx")
3132
- elif parsed_provider == "ollama":
3133
- ensure_ollama_server()
3134
- ollama = local_binary("ollama")
3135
- if not ollama:
3136
- raise HTTPException(status_code=400, detail="Ollama가 설치되지 않았습니다.")
3137
- if parsed_model not in get_ollama_pulled_models():
3138
- completed = subprocess.run(
3139
- [ollama, "pull", parsed_model],
3140
- capture_output=True,
3141
- text=True,
3142
- timeout=900,
3143
- check=False,
3144
- )
3145
- if completed.returncode != 0:
3146
- raise HTTPException(status_code=500, detail=completed.stderr[-2000:] or "Ollama 모델 다운로드 실패")
3147
- download_result = {"provider": "ollama", "model": parsed_model, "returncode": completed.returncode}
3148
- elif parsed_provider == "vllm":
3149
- ensure_vllm_server(parsed_model)
3150
- download_result = {"provider": "vllm", "model": parsed_model, "server_ready": True}
3151
- elif parsed_provider == "llamacpp":
3152
- ensure_llamacpp_server(parsed_model)
3153
- download_result = {"provider": "llamacpp", "model": parsed_model, "server_ready": True}
3154
- elif parsed_provider == "lmstudio":
3155
- ensured = ensure_lmstudio_model(parsed_model)
3156
- resolved_model = str(
3157
- ensured.get("instance_id")
3158
- or ensured.get("resolved_model")
3159
- or parsed_model
3160
- ).strip()
3161
- parsed_model = resolved_model
3162
- model_id = f"lmstudio:{resolved_model}"
3163
- download_result = ensured
3164
-
3165
- effective_email = (user_email or get_current_user(request) or "").strip()
3166
- user_api_key = get_user_api_key(effective_email, parsed_provider) if parsed_provider != "local_mlx" else None
3167
- msg = await router.load_model(
3168
- model_id,
3169
- adapter_path,
3170
- draft_model_id=draft_model_id,
3171
- api_key_override=user_api_key,
3172
- owner=effective_email or None,
3173
- )
3174
- # 피드백 #1/#2: 로드 직후 ModelResolution을 실제 current로 동기화하고 smoke test 수행.
3175
- resolution.update_after_load(actual_current=router.current_model_id)
3176
- smoke_result: Dict[str, object] = {}
3177
- ready_to_chat = True
3178
- compat_status = "ok"
3179
- try:
3180
- smoke_result = await _smoke_test_loaded_model(resolution, api_key_override=user_api_key)
3181
- ready_to_chat = bool(smoke_result.get("ok"))
3182
- # item 3-3: smoke 결과의 3분류(ok/degraded/failed)를 그대로 노출한다.
3183
- compat_status = str(smoke_result.get("status") or ("ok" if ready_to_chat else "degraded"))
3184
- except Exception as exc: # never break load on smoke test failures
3185
- logging.warning("smoke test failed for %s: %s", resolution.load_id, exc)
3186
- compat_status = "unknown"
3187
- return {
3188
- "status": "ok",
3189
- "message": msg,
3190
- "model": model_id,
3191
- "current": router.current_model_id,
3192
- "engine": parsed_provider,
3193
- "installed_now": bool(install_result.get("installed_now")),
3194
- "download": download_result,
3195
- "resolution": resolution.to_dict(),
3196
- "downloaded": True,
3197
- "loaded": True,
3198
- "ready_to_chat": ready_to_chat,
3199
- "compatibility_status": compat_status,
3200
- "smoke_test": smoke_result,
3201
- }
3202
-
3203
-
3204
- def sse_event(event: str, data: Dict[str, object]) -> str:
3205
- return f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"
3206
-
3207
-
3208
- async def prepare_and_load_model_stream(
3209
- model_id: str,
3210
- request: Request,
3211
- engine: Optional[str] = None,
3212
- user_email: Optional[str] = None,
3213
- ) -> AsyncIterator[str]:
3214
- model_id = normalize_local_model_request(model_id, engine)
3215
- if not model_id:
3216
- raise HTTPException(status_code=400, detail="모델 식별자가 비어 있습니다.")
3217
-
3218
- parsed_provider, parsed_model = parse_model_ref(model_id)
3219
- if parsed_provider == "mlx":
3220
- parsed_provider = "local_mlx"
3221
-
3222
- work_queue: "queue.Queue[Dict[str, object]]" = queue.Queue()
3223
- work_result: Dict[str, object] = {}
3224
-
3225
- def emit_progress(payload: Dict[str, object]) -> None:
3226
- work_queue.put({"kind": "progress", "data": payload})
3227
-
3228
- def blocking_prepare() -> None:
3229
- try:
3230
- local_engines = {"local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"}
3231
- install_result: Dict[str, object] = {}
3232
- download_result: Optional[Dict[str, object]] = None
3233
- prepared_model_id = model_id
3234
- prepared_model_name = parsed_model
3235
-
3236
- if parsed_provider in local_engines:
3237
- emit_progress(model_download_progress_payload(
3238
- "engine",
3239
- "실행 엔진을 확인하는 중입니다.",
3240
- percent=2,
3241
- indeterminate=True,
3242
- ))
3243
- install_result = ensure_engine_ready(parsed_provider)
3244
- emit_progress(model_download_progress_payload(
3245
- "engine",
3246
- "실행 엔진 준비가 완료되었습니다.",
3247
- percent=10,
3248
- indeterminate=False,
3249
- ))
3250
-
3251
- if parsed_provider == "local_mlx":
3252
- explicit_path = Path(parsed_model).expanduser()
3253
- if explicit_path.exists():
3254
- download_result = {"model": parsed_model, "path": str(explicit_path), "cached": True}
3255
- emit_progress(model_download_progress_payload(
3256
- "download",
3257
- "로컬 모델 경로를 확인했습니다.",
3258
- percent=100,
3259
- detail=str(explicit_path),
3260
- eta_seconds=0,
3261
- ))
3262
- elif not hf_model_ready(parsed_model, "local_mlx"):
3263
- download_result = download_hf_model(parsed_model, "local_mlx", progress_emit=emit_progress)
3264
- else:
3265
- download_result = {"model": parsed_model, "path": str(hf_model_dir(parsed_model)), "cached": True}
3266
- emit_progress(model_download_progress_payload(
3267
- "download",
3268
- "이미 다운로드된 모델을 확인했습니다.",
3269
- percent=100,
3270
- eta_seconds=0,
3271
- ))
3272
- elif parsed_provider == "ollama":
3273
- emit_progress(model_download_progress_payload(
3274
- "engine",
3275
- "Ollama 서버를 확인하는 중입니다.",
3276
- percent=12,
3277
- indeterminate=True,
3278
- ))
3279
- ensure_ollama_server()
3280
- if parsed_model not in get_ollama_pulled_models():
3281
- download_result = pull_ollama_model_with_progress(parsed_model, progress_emit=emit_progress)
3282
- else:
3283
- download_result = {"provider": "ollama", "model": parsed_model, "cached": True}
3284
- emit_progress(model_download_progress_payload(
3285
- "download",
3286
- "이미 다운로드된 Ollama 모델을 확인했습니다.",
3287
- percent=100,
3288
- detail=parsed_model,
3289
- eta_seconds=0,
3290
- ))
3291
- elif parsed_provider == "vllm":
3292
- if not hf_model_ready(parsed_model, "vllm"):
3293
- download_result = download_hf_model(parsed_model, "vllm", progress_emit=emit_progress)
3294
- else:
3295
- download_result = {"provider": "vllm", "model": parsed_model, "cached": True}
3296
- emit_progress(model_download_progress_payload(
3297
- "download",
3298
- "이미 다운로드된 모델을 확인했습니다.",
3299
- percent=100,
3300
- detail=parsed_model,
3301
- eta_seconds=0,
3302
- ))
3303
- emit_progress(model_download_progress_payload(
3304
- "server",
3305
- "vLLM 서버를 시작하는 중입니다.",
3306
- percent=92,
3307
- indeterminate=True,
3308
- ))
3309
- ensure_vllm_server(parsed_model)
3310
- download_result = {**(download_result or {}), "provider": "vllm", "model": parsed_model, "server_ready": True}
3311
- elif parsed_provider == "llamacpp":
3312
- if not hf_model_ready(parsed_model, "llamacpp"):
3313
- download_result = download_hf_model(parsed_model, "llamacpp", progress_emit=emit_progress)
3314
- else:
3315
- download_result = {"provider": "llamacpp", "model": parsed_model, "cached": True}
3316
- emit_progress(model_download_progress_payload(
3317
- "download",
3318
- "이미 다운로드된 GGUF 모델을 확인했습니다.",
3319
- percent=100,
3320
- detail=parsed_model,
3321
- eta_seconds=0,
3322
- ))
3323
- emit_progress(model_download_progress_payload(
3324
- "server",
3325
- "llama.cpp 서버를 시작하는 중입니다.",
3326
- percent=92,
3327
- indeterminate=True,
3328
- ))
3329
- ensure_llamacpp_server(parsed_model)
3330
- download_result = {**(download_result or {}), "provider": "llamacpp", "model": parsed_model, "server_ready": True}
3331
- elif parsed_provider == "lmstudio":
3332
- emit_progress(model_download_progress_payload(
3333
- "download",
3334
- "LM Studio 모델을 확인하는 중입니다.",
3335
- percent=35,
3336
- indeterminate=True,
3337
- ))
3338
- ensured = ensure_lmstudio_model(parsed_model)
3339
- resolved_model = str(
3340
- ensured.get("instance_id")
3341
- or ensured.get("resolved_model")
3342
- or parsed_model
3343
- ).strip()
3344
- prepared_model_name = resolved_model
3345
- prepared_model_id = f"lmstudio:{resolved_model}"
3346
- download_result = ensured
3347
- else:
3348
- emit_progress(model_download_progress_payload(
3349
- "engine",
3350
- "모델 연결을 준비하는 중입니다.",
3351
- percent=30,
3352
- indeterminate=True,
3353
- ))
3354
-
3355
- work_result.update({
3356
- "model_id": prepared_model_id,
3357
- "parsed_provider": parsed_provider,
3358
- "parsed_model": prepared_model_name,
3359
- "install_result": install_result,
3360
- "download_result": download_result,
3361
- })
3362
- work_queue.put({"kind": "done"})
3363
- except HTTPException as exc:
3364
- work_queue.put({"kind": "error", "status_code": exc.status_code, "detail": exc.detail})
3365
- except Exception as exc:
3366
- logging.exception("model prepare stream worker failed")
3367
- work_queue.put({"kind": "error", "status_code": 500, "detail": str(exc)[-2000:]})
3368
-
3369
- worker = threading.Thread(target=blocking_prepare, daemon=True)
3370
- worker.start()
3371
-
3372
- while True:
3373
- item = await asyncio.to_thread(work_queue.get)
3374
- kind = item.get("kind")
3375
- if kind == "progress":
3376
- yield sse_event("progress", item["data"])
3377
- elif kind == "error":
3378
- raise HTTPException(
3379
- status_code=int(item.get("status_code") or 500),
3380
- detail=item.get("detail") or "모델 준비에 실패했습니다.",
3381
- )
3382
- elif kind == "done":
3383
- break
3384
-
3385
- prepared_model_id = str(work_result.get("model_id") or model_id)
3386
- prepared_provider = str(work_result.get("parsed_provider") or parsed_provider)
3387
- install_result = work_result.get("install_result") or {}
3388
- download_result = work_result.get("download_result")
3389
-
3390
- yield sse_event("progress", model_download_progress_payload(
3391
- "load",
3392
- "모델을 메모리에 로드하는 중입니다.",
3393
- percent=96,
3394
- indeterminate=True,
3395
- ))
3396
-
3397
- effective_email = (user_email or get_current_user(request) or "").strip()
3398
- user_api_key = get_user_api_key(effective_email, prepared_provider) if prepared_provider != "local_mlx" else None
3399
- msg = await router.load_model(
3400
- prepared_model_id,
3401
- None,
3402
- draft_model_id=None,
3403
- api_key_override=user_api_key,
3404
- owner=effective_email or None,
3405
- )
3406
- # 피드백 #1/#2: SSE에도 ModelResolution과 smoke test 결과를 같이 내려준다.
3407
- resolution_stream = _ModelResolution.from_request(
3408
- prepared_model_id,
3409
- engine=prepared_provider,
3410
- user_email=effective_email or None,
3411
- engine_aliases=MODEL_ENGINE_ALIASES,
3412
- )
3413
- resolution_stream.update_after_load(actual_current=router.current_model_id)
3414
- yield sse_event("progress", model_download_progress_payload(
3415
- "smoke_test",
3416
- "채팅 호환성 테스트 중입니다.",
3417
- percent=98,
3418
- indeterminate=True,
3419
- ))
3420
- smoke_result: Dict[str, object] = {}
3421
- ready_to_chat = True
3422
- compat_status = "ok"
3423
- try:
3424
- smoke_result = await _smoke_test_loaded_model(resolution_stream, api_key_override=user_api_key)
3425
- ready_to_chat = bool(smoke_result.get("ok"))
3426
- # item 3-3: smoke 결과의 3분류(ok/degraded/failed)를 그대로 노출한다.
3427
- compat_status = str(smoke_result.get("status") or ("ok" if ready_to_chat else "degraded"))
3428
- except Exception as exc:
3429
- logging.warning("smoke test (stream) failed for %s: %s", resolution_stream.load_id, exc)
3430
- compat_status = "unknown"
3431
- result = {
3432
- "status": "ok",
3433
- "message": msg,
3434
- "model": prepared_model_id,
3435
- "current": router.current_model_id,
3436
- "engine": prepared_provider,
3437
- "installed_now": bool(isinstance(install_result, dict) and install_result.get("installed_now")),
3438
- "download": download_result,
3439
- "resolution": resolution_stream.to_dict(),
3440
- "downloaded": True,
3441
- "loaded": True,
3442
- "ready_to_chat": ready_to_chat,
3443
- "compatibility_status": compat_status,
3444
- "smoke_test": smoke_result,
3445
- }
3446
- yield sse_event("progress", model_download_progress_payload(
3447
- "done",
3448
- "모델 준비가 완료되었습니다.",
3449
- percent=100,
3450
- eta_seconds=0,
3451
- ))
3452
- yield sse_event("done", result)
3453
-
3454
-
3455
- CLOUD_VERIFY_CACHE: Dict[str, Dict] = {}
3456
- CLOUD_VERIFY_TTL_SECONDS = 600
3457
-
3458
- async def _probe_cloud_model(model_ref: str) -> Dict[str, object]:
3459
- provider, model_name = parse_model_ref(model_ref)
3460
- config = OPENAI_COMPATIBLE_PROVIDERS.get(provider)
3461
- if not config:
3462
- return {"ok": False, "reason": f"Unsupported provider: {provider}"}
3463
-
3464
- api_key = os.getenv(config["env_key"]) or config.get("api_key_fallback")
3465
- if not api_key:
3466
- return {"ok": False, "reason": f"Missing API key: {config['env_key']}"}
3467
-
3468
- base_url = os.getenv(config.get("base_url_env", "")) if config.get("base_url_env") else None
3469
- base_url = base_url or config.get("base_url")
3470
- client_kwargs = {"api_key": api_key}
3471
- if base_url:
3472
- client_kwargs["base_url"] = base_url
3473
-
3474
- try:
3475
- client = AsyncOpenAI(**client_kwargs)
3476
- await asyncio.wait_for(
3477
- client.chat.completions.create(
3478
- model=model_name,
3479
- messages=[{"role": "user", "content": "ping"}],
3480
- max_tokens=1,
3481
- temperature=0,
3482
- ),
3483
- timeout=15,
3484
- )
3485
- return {"ok": True, "reason": "ok"}
3486
- except Exception as e:
3487
- return {"ok": False, "reason": str(e)[:220]}
3488
-
3489
-
3490
- async def verify_cloud_models(force: bool = False, provider_filter: Optional[str] = None) -> Dict[str, Dict]:
3491
- now = time.time()
3492
- cloud_items = [item for item in router.detected_cloud_models() if item.get("tag") == "cloud"]
3493
- if provider_filter:
3494
- cloud_items = [item for item in cloud_items if item.get("provider") == provider_filter]
3495
-
3496
- results: Dict[str, Dict] = {}
3497
- for item in cloud_items:
3498
- model_ref = item["id"]
3499
- cached = CLOUD_VERIFY_CACHE.get(model_ref)
3500
- if not force and cached and (now - cached.get("ts", 0) <= CLOUD_VERIFY_TTL_SECONDS):
3501
- results[model_ref] = cached
3502
- continue
3503
- if item.get("available") is False:
3504
- record = {"ok": False, "reason": item.get("requires") or "API key missing", "ts": now}
3505
- CLOUD_VERIFY_CACHE[model_ref] = record
3506
- results[model_ref] = record
3507
- continue
3508
- probe = await _probe_cloud_model(model_ref)
3509
- record = {"ok": bool(probe.get("ok")), "reason": probe.get("reason", ""), "ts": now}
3510
- CLOUD_VERIFY_CACHE[model_ref] = record
3511
- results[model_ref] = record
3512
- return results
3513
-
3514
- # ── Health / status / engine-summary router (latticeai.api.health, v1.2.0) ───
3515
- # /health, /mode, /runtime_features, /engines(GET) now live in the health router.
3516
- # Heavier engine mutation endpoints remain below in server_app.
3517
- MODEL_SERVICE = ModelService(
3518
- model_router=router,
3519
- runtime_features=runtime_features,
3520
- is_public=IS_PUBLIC_MODE,
3521
- )
3522
- app.include_router(create_health_router(
3523
- model_service=MODEL_SERVICE,
3524
- engine_status=engine_status,
3525
- get_current_user=get_current_user,
3526
- require_auth=REQUIRE_AUTH,
3527
- app_version=APP_VERSION,
3528
- app_mode=APP_MODE,
3529
- ))
3530
-
3531
-
3532
- # ── Model / Engine router (latticeai.api.models, v1.3.0) ─────────────────────
3533
- app.include_router(create_models_router(
3534
- model_router=router,
3535
- require_user=require_user,
3536
- get_current_user=get_current_user,
3537
- load_users=load_users,
3538
- get_user_role=get_user_role,
3539
- install_engine=install_engine,
3540
- verify_cloud_models=verify_cloud_models,
3541
- normalize_local_model_request=normalize_local_model_request,
3542
- download_hf_model=download_hf_model,
3543
- prepare_and_load_model=prepare_and_load_model,
3544
- prepare_and_load_model_stream=prepare_and_load_model_stream,
3545
- sse_event=sse_event,
3546
- ensure_ollama_server=ensure_ollama_server,
3547
- local_binary=local_binary,
3548
- engine_status=engine_status,
3549
- filter_lower_family_versions=filter_lower_family_versions,
3550
- list_compat_profiles=_list_compat_profiles,
3551
- set_user_api_key=set_user_api_key,
3552
- engine_model_catalog=ENGINE_MODEL_CATALOG,
3553
- model_engine_aliases=MODEL_ENGINE_ALIASES,
3554
- cloud_verify_ttl_seconds=CLOUD_VERIFY_TTL_SECONDS,
3555
- is_public_mode=IS_PUBLIC_MODE,
3556
- allow_local_models=ALLOW_LOCAL_MODELS,
3557
- require_auth=REQUIRE_AUTH,
3558
- ))
3559
-
3560
-
3561
- # ── Chat / Completion ──────────────────────────────────────────────────────────
3562
-
3563
- @app.post("/chat")
3564
- async def chat(req: ChatRequest, request: Request):
3565
- current_user = require_user(request)
3566
- enforce_rate_limit(current_user, "chat")
3567
- img_len = len(req.image_data) if req.image_data else 0
3568
- print(
3569
- f"🧪 /chat request: stream={req.stream} image_data_len={img_len} "
3570
- f"message_len={len(req.message or '')}"
3571
- )
3572
- effective_email = req.user_email or current_user or None
3573
- history_user = get_history_user(effective_email, req.user_nickname)
3574
-
3575
- if is_network_status_request(req.message):
3576
- history_message = f"{req.message}\n[Image attached]" if req.image_data else req.message
3577
- save_to_history("user", history_message, source=req.source or "web", conversation_id=req.conversation_id, **history_user)
3578
- try:
3579
- answer = format_network_status(network_status())
3580
- except ToolError as exc:
3581
- answer = f"네트워크 정보를 확인하지 못했습니다: {exc}"
3582
- save_to_history("assistant", answer, source=req.source or "web", conversation_id=req.conversation_id, **history_user)
3583
- if req.source != "telegram":
3584
- asyncio.create_task(broadcast_web_chat("user", req.message))
3585
- asyncio.create_task(broadcast_web_chat("assistant", answer))
3586
- if req.stream:
3587
- return StreamingResponse(
3588
- single_text_stream(answer),
3589
- media_type="text/event-stream",
3590
- headers={"X-Model": "network_status"},
3591
- )
3592
- return JSONResponse(content={"response": answer})
3593
-
3594
- if is_clear_command(req.message):
3595
- command = req.message.strip().lower()
3596
- clear_scope = "all" if command == "/clear_all" else "conversation"
3597
- if ENABLE_GRAPH and KNOWLEDGE_GRAPH:
3598
- try:
3599
- KNOWLEDGE_GRAPH.ingest_event(
3600
- "ClearEvent",
3601
- f"{command} requested",
3602
- user_email=effective_email,
3603
- user_nickname=req.user_nickname,
3604
- source=req.source or "web",
3605
- conversation_id=req.conversation_id,
3606
- metadata={"command": command, "scope": clear_scope},
3607
- )
3608
- except Exception as e:
3609
- logging.warning("knowledge graph clear event ingest failed: %s", e)
3610
- if command == "/clear_all":
3611
- result = clear_history(0)
3612
- answer = f"채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와 지식 그래프/RAG 데이터는 유지됩니다."
3613
- else:
3614
- if req.conversation_id:
3615
- result = clear_conversation(req.conversation_id)
3616
- answer = f"현재 대화방 채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와 지식 그래프/RAG 데이터는 유지됩니다."
3617
- else:
3618
- result = clear_history(0)
3619
- answer = f"채팅창을 정리했습니다. 화면에서 제거 {result.get('removed', 0)}개. 감사 로그와 지식 그래프/RAG 데이터는 유지됩니다."
3620
- append_audit_event(
3621
- "clear_command",
3622
- user_email=effective_email,
3623
- user_nickname=req.user_nickname,
3624
- source=req.source or "web",
3625
- conversation_id=req.conversation_id,
3626
- command=command,
3627
- scope=clear_scope,
3628
- removed=result.get("removed", 0),
3629
- kept=result.get("kept", 0),
3630
- )
3631
- if req.stream:
3632
- return StreamingResponse(
3633
- single_text_stream(answer),
3634
- media_type="text/event-stream",
3635
- headers={"X-Model": "history"},
3636
- )
3637
- return JSONResponse(content={"response": answer})
3638
-
3639
- if is_current_url_request(req.message) and req.client_url:
3640
- answer = f"현재 페이지 URL: {req.client_url}"
3641
- save_to_history("user", req.message, source=req.source or "web", conversation_id=req.conversation_id, **history_user)
3642
- save_to_history("assistant", answer, source=req.source or "web", conversation_id=req.conversation_id, **history_user)
3643
- if req.source != "telegram":
3644
- asyncio.create_task(broadcast_web_chat("user", req.message))
3645
- asyncio.create_task(broadcast_web_chat("assistant", answer))
3646
- if req.stream:
3647
- return StreamingResponse(
3648
- single_text_stream(answer),
3649
- media_type="text/event-stream",
3650
- headers={"X-Model": "client_url"},
3651
- )
3652
- return JSONResponse(content={"response": answer})
3653
-
3654
- if not router.current_model_id:
3655
- detail = "No model loaded. Call /models/load first."
3656
- if IS_PUBLIC_MODE:
3657
- detail = f"No public model loaded. Set OPENAI_API_KEY and LATTICEAI_PUBLIC_MODEL={PUBLIC_MODEL}, or call /models/load with an OpenAI-compatible model."
3658
- raise HTTPException(status_code=400, detail=detail)
3659
-
3660
- if req.model and req.model != router.current_model_id:
3661
- if req.model not in router.loaded_model_ids:
3662
- raise HTTPException(status_code=404, detail=f"Model '{req.model}' not loaded.")
3663
- router.switch_model(req.model)
3664
-
3665
- lang = detect_language(req.message)
3666
- context = f"[LANGUAGE: {_LANG_HINT[lang]}]\n" + (req.context or "")
3667
- try:
3668
- knowledge_context = gardener.get_relevant_context(req.message)
3669
- if knowledge_context:
3670
- context += f"\n\n[LOCAL KNOWLEDGE BASE]\n{knowledge_context}"
3671
- print(f"📖 Context reinforced with local knowledge.")
3672
- except Exception as e:
3673
- logging.warning("Knowledge reinforcement skipped: %s", e)
3674
-
3675
- is_doc_gen = detect_document_intent(req.message)
3676
- doc_gen_context_result = None
3677
-
3678
- try:
3679
- if ENABLE_GRAPH and KNOWLEDGE_GRAPH:
3680
- if is_doc_gen:
3681
- doc_gen_context_result = retrieve_context_for_generation(
3682
- KNOWLEDGE_GRAPH, req.message, max_results=10, max_hops=2,
3683
- )
3684
- graph_md = doc_gen_context_result.get("context_markdown", "")
3685
- if graph_md:
3686
- context += f"\n\n[KNOWLEDGE GRAPH — Document Generation Context]\n{graph_md}"
3687
- print("📝 Document generation context retrieved from knowledge graph.")
3688
- else:
3689
- graph_context = KNOWLEDGE_GRAPH.context_for_query(req.message)
3690
- if graph_context:
3691
- context += f"\n\n[KNOWLEDGE GRAPH]\n{graph_context}"
3692
- print("🕸️ Context reinforced with knowledge graph.")
3693
- except Exception as e:
3694
- logging.warning("Knowledge graph reinforcement skipped: %s", e)
3695
-
3696
- if req.image_data:
3697
- screenshot_context = extract_screenshot_context(req.image_data)
3698
- if screenshot_context:
3699
- context += f"\n\n{screenshot_context}"
3700
-
3701
- if CONFIG.auto_read_chat_paths:
3702
- _file_path_re = re.compile(r'(?:^|[\s\'\"(])((~|/[\w.])[^\s\'")\]]*)', re.MULTILINE)
3703
- for _m in _file_path_re.finditer(req.message or ""):
3704
- _fpath = _m.group(1).strip()
3705
- try:
3706
- _result = local_read(_fpath)
3707
- _fcontent = _result.get("content", "")
3708
- if _fcontent:
3709
- context += f"\n\n[FILE: {_fpath}]\n```\n{_fcontent[:6000]}\n```"
3710
- print(f"📂 Auto-injected file context: {_fpath}")
3711
- except Exception:
3712
- pass
3713
-
3714
- trace_seed = CHAT_SERVICE.build_graph_trace(
3715
- req.message,
3716
- KNOWLEDGE_GRAPH if (ENABLE_GRAPH and KNOWLEDGE_GRAPH) else None,
3717
- context,
3718
- )
3719
-
3720
- history_message = f"{req.message}\n[Image attached]" if req.image_data else req.message
3721
- save_to_history("user", history_message, source=req.source or "web", conversation_id=req.conversation_id, **history_user)
3722
- if req.source != "telegram":
3723
- asyncio.create_task(broadcast_web_chat("user", req.message))
3724
-
3725
- if is_doc_gen and ENABLE_GRAPH and KNOWLEDGE_GRAPH:
3726
- conv_key = req.conversation_id or "default"
3727
- session = _doc_gen_sessions.get(conv_key)
3728
- if session is None:
3729
- session = DocumentGenerationSession()
3730
- _doc_gen_sessions[conv_key] = session
3731
- graph_md = (doc_gen_context_result or {}).get("context_markdown", "")
3732
- system_prompt = session.get_system_prompt(graph_md)
3733
- sources = (doc_gen_context_result or {}).get("sources", [])
3734
- footnote = format_sources_footnote(sources)
3735
-
3736
- if req.stream:
3737
- async def _stream_doc_gen():
3738
- collected = []
3739
- async for chunk in router.stream_generate_document(
3740
- req.message, system_prompt,
3741
- max_tokens=req.max_tokens or 8192,
3742
- temperature=req.temperature or 0.3,
3743
- ):
3744
- collected.append(chunk)
3745
- yield f"data: {json.dumps({'text': chunk}, ensure_ascii=False)}\n\n"
3746
- full_text = "".join(collected)
3747
- if footnote:
3748
- yield f"data: {json.dumps({'text': footnote}, ensure_ascii=False)}\n\n"
3749
- full_text += footnote
3750
- session.update(graph_md, full_text, req.conversation_id)
3751
- save_to_history("assistant", full_text, source=req.source or "web", conversation_id=req.conversation_id, **history_user)
3752
- trace_record = CHAT_SERVICE.record_trace(
3753
- question=req.message,
3754
- response=full_text,
3755
- conversation_id=req.conversation_id,
3756
- user_email=effective_email,
3757
- trace=trace_seed,
3758
- )
3759
- if req.source != "telegram":
3760
- asyncio.create_task(broadcast_web_chat("assistant", full_text))
3761
- yield f"data: {json.dumps({'text': '', 'trace_id': trace_record['id'], 'trace': trace_record}, ensure_ascii=False)}\n\n"
3762
- yield "data: [DONE]\n\n"
3763
- return StreamingResponse(
3764
- _stream_doc_gen(),
3765
- media_type="text/event-stream",
3766
- headers={"X-Model": router.current_model_id, "X-Doc-Gen": "true"},
3767
- )
3768
- else:
3769
- result = await router.generate_document(
3770
- req.message, system_prompt,
3771
- max_tokens=req.max_tokens or 8192,
3772
- temperature=req.temperature or 0.3,
3773
- )
3774
- if footnote:
3775
- result += footnote
3776
- session.update(graph_md, result, req.conversation_id)
3777
- save_to_history("assistant", str(result), source=req.source or "web", conversation_id=req.conversation_id, **history_user)
3778
- trace_record = CHAT_SERVICE.record_trace(
3779
- question=req.message,
3780
- response=str(result),
3781
- conversation_id=req.conversation_id,
3782
- user_email=effective_email,
3783
- trace=trace_seed,
3784
- )
3785
- if req.source != "telegram":
3786
- asyncio.create_task(broadcast_web_chat("assistant", str(result)))
3787
- return JSONResponse(content={"response": str(result), "trace_id": trace_record["id"], "trace": trace_record})
3788
-
3789
- if req.stream:
3790
- recent_context = build_recent_chat_context(user_email=effective_email, conversation_id=req.conversation_id)
3791
- stream_context = context
3792
- if recent_context:
3793
- stream_context = f"[RECENT CONVERSATION]\n{recent_context}\n\n{context}".strip()
3794
- return StreamingResponse(
3795
- _stream_chat(req, stream_context, req.image_data, trace_seed=trace_seed, effective_email=effective_email),
3796
- media_type="text/event-stream",
3797
- headers={"X-Model": router.current_model_id},
3798
- )
3799
- else:
3800
- if req.image_data:
3801
- recent_context = build_recent_chat_context(
3802
- limit=6,
3803
- include_image_missing_replies=False,
3804
- user_email=effective_email,
3805
- conversation_id=req.conversation_id,
3806
- )
3807
- full_context = f"[RECENT CONVERSATION]\n{recent_context}\n\n{context}".strip() if recent_context else context
3808
- else:
3809
- history_context = build_recent_chat_context(user_email=effective_email, conversation_id=req.conversation_id)
3810
- full_context = f"{history_context}\n{context}" if context else history_context
3811
-
3812
- result = await router.generate(req.message, full_context, req.max_tokens, req.temperature, req.image_data)
3813
-
3814
- save_to_history("assistant", str(result), source=req.source or "web", conversation_id=req.conversation_id, **history_user)
3815
- trace_record = CHAT_SERVICE.record_trace(
3816
- question=req.message,
3817
- response=str(result),
3818
- conversation_id=req.conversation_id,
3819
- user_email=effective_email,
3820
- trace=trace_seed,
3821
- )
3822
- if req.source != "telegram":
3823
- asyncio.create_task(broadcast_web_chat("assistant", str(result)))
3824
-
3825
- return JSONResponse(content={"response": str(result), "trace_id": trace_record["id"], "trace": trace_record})
3826
-
3827
-
3828
- @app.get("/history")
3829
- async def fetch_history(request: Request):
3830
- """웹 화면에서 이전 대화를 불러올 수 있도록 히스토리를 반환합니다."""
3831
- require_user(request)
3832
- return get_history()
3833
-
3834
- @app.get("/history/conversations")
3835
- async def fetch_history_conversations(request: Request):
3836
- """저장된 히스토리를 대화 단위로 묶어 반환합니다."""
3837
- require_user(request)
3838
- return group_history_conversations()
3839
-
3840
- @app.get("/history/conversations/{conversation_id:path}")
3841
- async def fetch_history_conversation(conversation_id: str, request: Request):
3842
- """선택한 대화의 메시지를 반환합니다."""
3843
- require_user(request)
3844
- messages = get_conversation_messages(conversation_id)
3845
- if not messages:
3846
- raise HTTPException(status_code=404, detail="대화를 찾을 수 없습니다.")
3847
- return {"id": conversation_id, "messages": messages}
3848
-
3849
-
3850
- @app.delete("/history/conversations/{conversation_id:path}")
3851
- async def delete_history_conversation(conversation_id: str, request: Request):
3852
- """선택한 대화방의 메시지만 삭제합니다."""
3853
- email = require_user(request)
3854
- result = clear_conversation(conversation_id, request.query_params.get("started_at"))
3855
- append_audit_event(
3856
- "conversation_delete",
3857
- user_email=email,
3858
- conversation_id=conversation_id,
3859
- started_at=request.query_params.get("started_at"),
3860
- removed=result.get("removed", 0),
3861
- kept=result.get("kept", 0),
3862
- )
3863
- return result
3864
-
3865
-
3866
- @app.delete("/history")
3867
- async def delete_history(request: Request, keep_last: int = 0):
3868
- email = require_user(request)
3869
- result = clear_history(keep_last)
3870
- append_audit_event(
3871
- "history_delete",
3872
- user_email=email,
3873
- keep_last=keep_last,
3874
- removed=result.get("removed", 0),
3875
- kept=result.get("kept", 0),
3876
- )
3877
- return result
3878
-
3879
- @app.get("/history/search")
3880
- async def search_history(q: str, request: Request):
3881
- """키워드로 채팅 히스토리를 검색합니다."""
3882
- require_user(request)
3883
- if not q or not q.strip():
3884
- return {"results": [], "query": q}
3885
- q_lower = q.strip().lower()
3886
- history = get_history()
3887
- matches = [item for item in history if q_lower in (item.get("content") or "").lower()]
3888
- grouped: Dict[str, Dict] = {}
3889
- for item in matches:
3890
- cid = item.get("conversation_id") or "legacy"
3891
- if cid not in grouped:
3892
- grouped[cid] = {"conversation_id": cid, "title": conversation_title(item), "messages": []}
3893
- grouped[cid]["messages"].append(item)
3894
- return {"results": list(grouped.values())[-30:], "query": q}
3895
-
3896
- async def _stream_chat(
3897
- req: ChatRequest,
3898
- context: str = "",
3899
- image_data: str = None,
3900
- *,
3901
- trace_seed: Optional[Dict] = None,
3902
- effective_email: Optional[str] = None,
3903
- ) -> AsyncIterator[str]:
3904
- full_response = ""
3905
- async for chunk in router.stream_generate(req.message, context, req.max_tokens, req.temperature, image_data):
3906
- clean_chunk = chunk
3907
- if hasattr(chunk, "text"):
3908
- clean_chunk = chunk.text
3909
- elif isinstance(chunk, str) and "text='" in chunk:
3910
- try:
3911
- clean_chunk = chunk.split("text='")[1].split("', token=")[0].replace('\\n', '\n').replace('\\\\n', '\n')
3912
- except Exception:
3913
- pass
3914
-
3915
- full_response += str(clean_chunk)
3916
- yield f"data: {json.dumps({'chunk': clean_chunk, 'model': router.current_model_id}, ensure_ascii=False)}\n\n"
3917
- history_user = get_history_user(req.user_email, req.user_nickname)
3918
- save_to_history("assistant", full_response, source=req.source or "web", conversation_id=req.conversation_id, **history_user)
3919
- trace_record = CHAT_SERVICE.record_trace(
3920
- question=req.message,
3921
- response=full_response,
3922
- conversation_id=req.conversation_id,
3923
- user_email=effective_email or req.user_email,
3924
- trace=trace_seed or CHAT_SERVICE.build_graph_trace(
3925
- req.message,
3926
- KNOWLEDGE_GRAPH if (ENABLE_GRAPH and KNOWLEDGE_GRAPH) else None,
3927
- context,
3928
- ),
3929
- )
3930
- if req.source != "telegram":
3931
- asyncio.create_task(broadcast_web_chat("assistant", full_response))
3932
- yield f"data: {json.dumps({'chunk': '', 'model': router.current_model_id, 'trace_id': trace_record['id'], 'trace': trace_record}, ensure_ascii=False)}\n\n"
3933
- yield "data: [DONE]\n\n"
3934
-
3935
-
3936
- # ── Local Computer Agent ──────────────────────────────────────────────────────
3937
-
3938
- # ── Agent Tool Registry / Governance ──────────────────────────────────────────
3939
-
3940
- _FILE_CREATE_ACTIONS = set(DEFAULT_TOOL_REGISTRY.file_create_actions)
3941
- TOOL_GOVERNANCE: Dict[str, ToolPolicy] = dict(DEFAULT_TOOL_REGISTRY.governance)
3942
- _TOOL_GOVERNANCE_DEFAULT: ToolPolicy = DEFAULT_TOOL_REGISTRY.default_policy
3943
- ADMIN_ONLY_TOOLS: frozenset[str] = DEFAULT_TOOL_REGISTRY.admin_only_tools
3944
- _LOCAL_WRITE_BLOCKED_PREFIXES = DEFAULT_TOOL_REGISTRY.local_write_blocked_prefixes
3945
- _RISK_LEVEL_MAP = DEFAULT_TOOL_REGISTRY.risk_level_map
3946
-
3947
-
3948
- def _agent_policy(action_name: str, args: dict) -> ToolPolicy:
3949
- return DEFAULT_TOOL_REGISTRY.policy_for(action_name, args)
3950
-
3951
-
3952
- def _agent_risk(action_name: str, args: dict) -> str:
3953
- return DEFAULT_TOOL_REGISTRY.risk_level(action_name, args)
3954
-
3955
-
3956
- def get_tool_permission(name: str, args: Optional[dict] = None) -> ToolPermission:
3957
- return DEFAULT_TOOL_REGISTRY.permission(name, args or {})
3958
-
3959
-
3960
- def list_tool_permissions() -> list:
3961
- return DEFAULT_TOOL_REGISTRY.permissions()
3962
-
3963
-
3964
- # Tools that require admin role -- computer control + shell execution
3965
- def _check_tool_role(tool_name: str, current_user: str) -> None:
3966
- if tool_name not in ADMIN_ONLY_TOOLS:
3967
- return
3968
- users = load_users()
3969
- if get_user_role(current_user, users) != "admin":
3970
- raise HTTPException(
3971
- status_code=403,
3972
- detail=f"'{tool_name}' 툴은 관리자 전용입니다.",
3973
- )
3974
-
3975
-
3976
- def _collect_created_files(transcript: list) -> list:
3977
- files = []
3978
- for step in transcript:
3979
- if step.get("action") in _FILE_CREATE_ACTIONS:
3980
- result = step.get("result", {})
3981
- if isinstance(result.get("created_files"), list):
3982
- for rel_path in result["created_files"]:
3983
- files.append({
3984
- "path": rel_path,
3985
- "filename": Path(rel_path).name,
3986
- "bytes": 0,
3987
- "action": step["action"],
3988
- })
3989
- continue
3990
- path = result.get("path")
3991
- if path:
3992
- files.append({
3993
- "path": path,
3994
- "filename": Path(path).name,
3995
- "bytes": result.get("bytes", 0),
3996
- "action": step["action"],
3997
- })
3998
- return files
3999
-
4000
-
4001
- # ── Agent Runtime wiring ──────────────────────────────────────────────────────
4002
- # The Discover→Plan→Implement→Verify state machine lives in
4003
- # latticeai.core.agent. server.py wires the ports (LLM, tools, governance,
4004
- # audit, prompts) into one AgentRuntime and keeps only the HTTP glue below.
4005
-
4006
- def _build_agent_runtime() -> AgentRuntime:
4007
- deps = AgentDeps(
4008
- generate_as=router.generate_as,
4009
- generate=router.generate,
4010
- execute_tool=execute_tool,
4011
- policy_for=_agent_policy,
4012
- risk_level=lambda policy: _RISK_LEVEL_MAP.get(policy["risk"], "medium"),
4013
- check_role=_check_tool_role,
4014
- tool_governance=TOOL_GOVERNANCE,
4015
- file_create_actions=frozenset(_FILE_CREATE_ACTIONS),
4016
- recent_chat_context=build_recent_chat_context,
4017
- clear_history=clear_history,
4018
- knowledge_save=knowledge_save,
4019
- audit=append_audit_event,
4020
- planner_prompt=PLANNER_PROMPT,
4021
- executor_prompt=EXECUTOR_PROMPT,
4022
- critic_prompt=CRITIC_PROMPT,
4023
- memory_updater_prompt=MEMORY_UPDATER_PROMPT,
4024
- agent_root=AGENT_ROOT,
4025
- )
4026
- return AgentRuntime(deps)
4027
-
4028
-
4029
- _AGENT_RUNTIME = _build_agent_runtime()
4030
-
4031
-
4032
- # ── Eval harness ──────────────────────────────────────────────────────────────
4033
-
4034
- @app.post("/agent/eval")
4035
- async def agent_eval(req: AgentEvalRequest, request: Request):
4036
- """Run a skill's eval cases from schema.json and return pass/fail per case."""
4037
- require_user(request)
4038
- skill_dir = BASE_DIR / "skills" / req.skill
4039
- schema_path = skill_dir / "schema.json"
4040
- if not schema_path.exists():
4041
- raise HTTPException(404, detail=f"Skill '{req.skill}' not found or missing schema.json")
4042
-
4043
- schema = json.loads(schema_path.read_text(encoding="utf-8"))
4044
- eval_cases = schema.get("evals", [])
4045
- if req.case_id:
4046
- eval_cases = [c for c in eval_cases if c.get("id") == req.case_id]
4047
- if not eval_cases:
4048
- return {"skill": req.skill, "total": 0, "passed": 0, "failed": 0, "results": [],
4049
- "message": "No eval cases defined in schema.json"}
4050
-
4051
- action_name = schema.get("action", req.skill)
4052
- results = []
4053
- for case in eval_cases:
4054
- case_id = case.get("id", "?")
4055
- try:
4056
- result = execute_tool(action_name, case.get("input", {}))
4057
- criteria = case.get("pass_criteria", "")
4058
- if "success == true" in criteria:
4059
- passed = result.get("success") is True
4060
- elif "success == false" in criteria:
4061
- passed = result.get("success") is False
4062
- else:
4063
- passed = True # manual review required
4064
- results.append({"id": case_id, "description": case.get("description", ""),
4065
- "passed": passed, "result": result, "pass_criteria": criteria})
4066
- except Exception as exc:
4067
- results.append({"id": case_id, "description": case.get("description", ""),
4068
- "passed": False, "error": str(exc),
4069
- "pass_criteria": case.get("pass_criteria", "")})
4070
-
4071
- n_passed = sum(1 for r in results if r.get("passed") is True)
4072
- return {
4073
- "skill": req.skill, "action": action_name,
4074
- "total": len(results), "passed": n_passed, "failed": len(results) - n_passed,
4075
- "results": results,
4076
- }
4077
-
4078
-
4079
- @app.post("/agent")
4080
- async def agent(req: AgentRequest, request: Request):
4081
- """Natural-language local agent.
4082
-
4083
- State machine:
4084
- IDLE → PLANNING → WAITING_APPROVAL → EXECUTING → VERIFYING
4085
- ↓ ↓
4086
- FAILED DONE | EXECUTING(retry) | ROLLBACK
4087
-
4088
- FAILED
4089
- """
4090
- current_user = require_user(request)
4091
- enforce_rate_limit(current_user, "agent")
4092
- if not router.current_model_id:
4093
- raise HTTPException(status_code=400, detail="No model loaded. Call /models/load first.")
4094
-
4095
- ensure_agent_root()
4096
- lang = detect_language(req.message)
4097
- lang_hint = _LANG_HINT[lang]
4098
- max_steps = max(1, min(req.max_steps, 50))
4099
- max_retry = 3
4100
-
4101
- ctx = AgentRunContext()
4102
- ctx.executing_model = req.executing_model
4103
- ctx.reviewing_model = req.reviewing_model
4104
-
4105
- # PLANNING phase
4106
- ctx.state = AgentState.PLANNING
4107
- ctx.state_history.append(ctx.state.value)
4108
- await _AGENT_RUNTIME.plan(ctx, req, lang_hint, current_user, model_id=req.planning_model)
4109
-
4110
- # Human-in-the-loop: pause after planning, return plan to UI
4111
- if req.human_in_loop:
4112
- context_id = secrets.token_urlsafe(16)
4113
- with _pending_agents_lock:
4114
- _pending_agents[context_id] = (ctx, req, lang_hint, current_user)
4115
- return {
4116
- "status": "waiting_approval",
4117
- "context_id": context_id,
4118
- "plan": ctx.plan,
4119
- "steps": ctx.transcript,
4120
- "state_history": ctx.state_history,
4121
- "planning_model": req.planning_model or router.current_model_id,
4122
- "executing_model": req.executing_model or router.current_model_id,
4123
- "reviewing_model": req.reviewing_model or router.current_model_id,
4124
- }
4125
-
4126
- # Auto-approve and run to completion (default behaviour)
4127
- _AGENT_RUNTIME.approve(ctx, current_user)
4128
- return await _agent_finish(ctx, req, lang_hint, current_user, max_steps, max_retry)
4129
-
4130
-
4131
- async def _agent_finish(
4132
- ctx: AgentRunContext, req: AgentRequest, lang_hint: str,
4133
- current_user: str, max_steps: int, max_retry: int,
4134
- ) -> dict:
4135
- """HTTP glue: drive the runtime to a terminal state, persist, shape the response."""
4136
- await _AGENT_RUNTIME.run_to_completion(ctx, req, lang_hint, current_user, max_steps, max_retry)
4137
- asyncio.create_task(_AGENT_RUNTIME.memory_update(ctx, req, current_user))
4138
-
4139
- message = ctx.final_message or "작업을 완료했습니다."
4140
- save_to_history("user", req.message, source=req.source or "web", conversation_id=req.conversation_id)
4141
- save_to_history("assistant", message, source=req.source or "web", conversation_id=req.conversation_id)
4142
- try:
4143
- WORKSPACE_OS.record_agent_run(
4144
- agent_id="agent:executor",
4145
- status="ok" if ctx.state == AgentState.DONE else "failed",
4146
- input_text=req.message,
4147
- output_text=message,
4148
- user_email=current_user or None,
4149
- timeline=ctx.transcript,
4150
- relationships=["agent:planner", "agent:reviewer"],
4151
- graph=_workspace_graph(),
4152
- )
4153
- except Exception as exc:
4154
- logging.warning("workspace agent run record failed: %s", exc)
4155
- created_files = _collect_created_files(ctx.transcript)
4156
- return {
4157
- "status": "ok" if ctx.state == AgentState.DONE else "failed",
4158
- "response": message,
4159
- "workspace": str(AGENT_ROOT),
4160
- "steps": ctx.transcript,
4161
- "state_history": ctx.state_history,
4162
- "final_state": ctx.state.value,
4163
- "created_files": created_files,
4164
- }
4165
-
4166
-
4167
- @app.post("/agent/resume")
4168
- async def agent_resume(req: AgentResumeRequest, request: Request):
4169
- """Resume a paused agent after human approval of the plan."""
4170
- current_user = require_user(request)
4171
-
4172
- with _pending_agents_lock:
4173
- entry = _pending_agents.pop(req.context_id, None)
4174
- if not entry:
4175
- raise HTTPException(status_code=404, detail="Agent context not found or expired. Start a new request.")
4176
-
4177
- ctx, orig_req, lang_hint, _orig_user = entry
4178
-
4179
- if not req.approved:
4180
- return {"status": "cancelled", "response": "사용자가 계획을 취소했습니다."}
4181
-
4182
- if req.modified_plan:
4183
- ctx.plan = req.modified_plan
4184
- ctx.transcript[-1].update(ctx.plan) # keep transcript in sync
4185
-
4186
- # Apply model overrides from resume request (takes priority over original request)
4187
- ctx.executing_model = req.executing_model or ctx.executing_model
4188
- ctx.reviewing_model = req.reviewing_model or ctx.reviewing_model
4189
-
4190
- _AGENT_RUNTIME.approve(ctx, current_user)
4191
-
4192
- max_steps = max(1, min(orig_req.max_steps, 50))
4193
- max_retry = 3
4194
- return await _agent_finish(ctx, orig_req, lang_hint, current_user, max_steps, max_retry)
4195
-
4196
-
4197
- # ── Direct Tool API ───────────────────────────────────────────────────────────
4198
-
4199
- def _tool_response(fn, *args):
4200
- try:
4201
- return {"status": "ok", "workspace": str(AGENT_ROOT), "result": fn(*args)}
4202
- except ToolError as exc:
4203
- raise HTTPException(status_code=400, detail=str(exc))
4204
-
4205
-
4206
- @app.post("/tools/list_dir")
4207
- async def tools_list_dir(req: ToolPathRequest, request: Request):
4208
- require_user(request)
4209
- return _tool_response(list_dir, req.path)
4210
-
4211
-
4212
- @app.post("/tools/workspace_tree")
4213
- async def tools_workspace_tree(req: ToolWorkspaceTreeRequest, request: Request):
4214
- require_user(request)
4215
- return _tool_response(workspace_tree, req.path, req.max_depth)
4216
-
4217
-
4218
- @app.post("/tools/read_file")
4219
- async def tools_read_file(req: ToolReadFileRequest, request: Request):
4220
- require_user(request)
4221
- try:
4222
- return {"status": "ok", "workspace": str(AGENT_ROOT),
4223
- "result": read_file(req.path, offset=req.offset, limit=req.limit, line_numbers=req.line_numbers)}
4224
- except ToolError as exc:
4225
- raise HTTPException(status_code=400, detail=str(exc))
4226
-
4227
-
4228
- @app.post("/tools/write_file")
4229
- async def tools_write_file(req: ToolWriteFileRequest, request: Request):
4230
- require_user(request)
4231
- return _tool_response(write_file, req.path, req.content)
4232
-
4233
-
4234
- @app.post("/tools/edit_file")
4235
- async def tools_edit_file(req: ToolEditFileRequest, request: Request):
4236
- require_user(request)
4237
- try:
4238
- return {"status": "ok", "workspace": str(AGENT_ROOT),
4239
- "result": edit_file(req.path, req.old_string, req.new_string, replace_all=req.replace_all)}
4240
- except ToolError as exc:
4241
- raise HTTPException(status_code=400, detail=str(exc))
4242
-
4243
-
4244
- @app.post("/tools/search_files")
4245
- async def tools_search_files(req: ToolSearchFilesRequest, request: Request):
4246
- require_user(request)
4247
- return _tool_response(search_files, req.query, req.path, req.max_results)
4248
-
4249
-
4250
- @app.post("/tools/grep")
4251
- async def tools_grep(req: ToolGrepRequest, request: Request):
4252
- require_user(request)
4253
- try:
4254
- return {"status": "ok", "workspace": str(AGENT_ROOT),
4255
- "result": grep(
4256
- req.pattern,
4257
- path=req.path,
4258
- glob=req.glob,
4259
- max_results=req.max_results,
4260
- case_insensitive=req.case_insensitive,
4261
- context_lines=req.context_lines,
4262
- )}
4263
- except ToolError as exc:
4264
- raise HTTPException(status_code=400, detail=str(exc))
4265
-
4266
-
4267
- @app.post("/tools/todo_read")
4268
- async def tools_todo_read(request: Request):
4269
- require_user(request)
4270
- return _tool_response(todo_read)
4271
-
4272
-
4273
- @app.post("/tools/todo_write")
4274
- async def tools_todo_write(req: ToolTodoWriteRequest, request: Request):
4275
- require_user(request)
4276
- return _tool_response(todo_write, req.todos)
4277
-
4278
-
4279
- @app.post("/tools/clear_history")
4280
- async def tools_clear_history(req: ToolClearHistoryRequest, request: Request):
4281
- current_user = require_user(request)
4282
- result = clear_history(req.keep_last)
4283
- append_audit_event(
4284
- "history_delete",
4285
- user_email=current_user,
4286
- source="tools",
4287
- keep_last=req.keep_last,
4288
- removed=result.get("removed", 0),
4289
- kept=result.get("kept", 0),
4290
- )
4291
- return result
4292
-
4293
-
4294
- @app.post("/tools/inspect_html")
4295
- async def tools_inspect_html(req: ToolPathRequest, request: Request):
4296
- require_user(request)
4297
- return _tool_response(inspect_html, req.path)
4298
-
4299
-
4300
- @app.post("/tools/preview_url")
4301
- async def tools_preview_url(req: ToolPathRequest, request: Request):
4302
- require_user(request)
4303
- return _tool_response(preview_url, req.path)
4304
-
4305
-
4306
- @app.post("/tools/create_docx")
4307
- async def tools_create_docx(req: ToolDocxRequest, request: Request):
4308
- require_user(request)
4309
- return _tool_response(create_docx, req.title, req.body, req.filename)
4310
-
4311
-
4312
- @app.post("/tools/create_xlsx")
4313
- async def tools_create_xlsx(req: ToolXlsxRequest, request: Request):
4314
- require_user(request)
4315
- return _tool_response(create_xlsx, req.rows, req.filename, req.sheet_name)
4316
-
4317
-
4318
- @app.post("/tools/create_pptx")
4319
- async def tools_create_pptx(req: ToolPptxRequest, request: Request):
4320
- require_user(request)
4321
- return _tool_response(create_pptx, req.title, req.slides, req.filename)
4322
-
4323
-
4324
- @app.post("/tools/create_pdf")
4325
- async def tools_create_pdf(req: ToolPdfRequest, request: Request):
4326
- require_user(request)
4327
- return _tool_response(create_pdf, req.title, req.body, req.filename)
4328
-
4329
-
4330
- @app.post("/tools/read_document")
4331
- async def tools_read_document(req: ToolPathRequest, request: Request):
4332
- current_user = require_user(request)
4333
- if Path(req.path).expanduser().is_absolute():
4334
- _require_local_approval(token=req.approval_token, path=req.path, action="read", user_email=current_user)
4335
- return _tool_response(read_document, req.path)
4336
-
4337
-
4338
- @app.get("/tools/pdf_pages")
4339
- async def tools_pdf_pages(path: str, request: Request, approval_token: Optional[str] = None):
4340
- """Render PDF pages as base64 PNG images using pypdfium2 (Apache-2.0)."""
4341
- current_user = require_user(request)
4342
- _require_local_approval(token=approval_token, path=path, action="read", user_email=current_user)
4343
- target = Path(path).expanduser().resolve()
4344
- if not target.exists() or not target.is_file():
4345
- raise HTTPException(status_code=404, detail="File not found")
4346
- import io
4347
- import pypdfium2 as pdfium
4348
- doc = None
4349
- try:
4350
- doc = pdfium.PdfDocument(str(target))
4351
- total = len(doc)
4352
- pages = []
4353
- for i in range(min(total, 20)): # 최대 20페이지
4354
- page = doc[i]
4355
- bitmap = page.render(scale=1.5)
4356
- pil_image = bitmap.to_pil()
4357
- buf = io.BytesIO()
4358
- pil_image.save(buf, format="PNG")
4359
- b64 = base64.b64encode(buf.getvalue()).decode()
4360
- pages.append({"page": i + 1, "b64": b64})
4361
- return {"total": total, "pages": pages}
4362
- except Exception as e:
4363
- raise HTTPException(status_code=500, detail=f"PDF 렌더링 실패: {e}")
4364
- finally:
4365
- if doc is not None:
4366
- try:
4367
- doc.close()
4368
- except Exception as e:
4369
- logging.warning("pypdfium2 doc close failed: %s", e)
4370
-
4371
-
4372
- @app.get("/tools/download")
4373
- async def tools_download(path: str, request: Request):
4374
- """Serve a generated file from agent workspace for download."""
4375
- require_user(request)
4376
- from urllib.parse import unquote
4377
- rel = unquote(path).lstrip("/")
4378
- target = (AGENT_ROOT / rel).resolve()
4379
- if AGENT_ROOT not in target.parents and target != AGENT_ROOT:
4380
- raise HTTPException(status_code=403, detail="경로가 작업 공간 밖입니다.")
4381
- if not target.exists() or not target.is_file():
4382
- raise HTTPException(status_code=404, detail="파일이 없습니다.")
4383
- return FileResponse(
4384
- path=target,
4385
- filename=target.name,
4386
- media_type="application/octet-stream",
4387
- )
4388
-
4389
-
4390
- @app.post("/upload/document")
4391
- async def upload_document(request: Request, file: UploadFile = File(...)):
4392
- current_user = require_user(request)
4393
- enforce_rate_limit(current_user, "upload")
4394
- """Upload a document and extract text (PDF, DOCX, XLSX, PPTX, TXT, MD, CSV)."""
4395
- suffix = Path(file.filename or "upload").suffix.lower()
4396
- allowed = {".pdf", ".docx", ".xlsx", ".pptx", ".txt", ".md", ".csv"}
4397
- if suffix not in allowed:
4398
- raise HTTPException(status_code=400, detail=f"지원하지 않는 형식: {suffix}")
4399
- contents = await file.read()
4400
- if len(contents) > 10 * 1024 * 1024:
4401
- raise HTTPException(status_code=400, detail="파일이 너무 큽니다. 최대 10MB.")
4402
- # MIME sniff — verify the bytes actually match the claimed extension (cheap header check)
4403
- if not _bytes_match_extension(contents, suffix):
4404
- raise HTTPException(status_code=400, detail=f"파일 내용이 확장자({suffix})와 일치하지 않습니다.")
4405
- with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
4406
- tmp.write(contents)
4407
- tmp_path = tmp.name
4408
- try:
4409
- result = read_document(tmp_path)
4410
- sensitive = classify_sensitive_message(
4411
- {
4412
- "role": "document",
4413
- "content": result.get("content") or result.get("preview") or "",
4414
- "user_email": current_user,
4415
- "timestamp": datetime.now().isoformat(),
4416
- },
4417
- -1,
4418
- )
4419
- try:
4420
- if not (ENABLE_GRAPH and KNOWLEDGE_GRAPH):
4421
- raise RuntimeError("graph disabled")
4422
- graph_result = KNOWLEDGE_GRAPH.ingest_document(
4423
- Path(tmp_path),
4424
- original_filename=file.filename,
4425
- mime_type=file.content_type,
4426
- uploader=current_user,
4427
- conversation_id=request.query_params.get("conversation_id"),
4428
- extracted=result,
4429
- )
4430
- result["knowledge_graph"] = {
4431
- "node_id": graph_result["node_id"],
4432
- "sha256": graph_result["sha256"],
4433
- }
4434
- except Exception as graph_error:
4435
- logging.warning("knowledge graph document ingest failed: %s", graph_error)
4436
- result["knowledge_graph"] = {"error": str(graph_error)}
4437
- append_audit_event(
4438
- "document_upload",
4439
- user_email=current_user,
4440
- conversation_id=request.query_params.get("conversation_id"),
4441
- filename=file.filename,
4442
- mime_type=file.content_type,
4443
- ext=suffix,
4444
- bytes=len(contents),
4445
- extracted_chars=result.get("chars"),
4446
- graph_node=(result.get("knowledge_graph") or {}).get("node_id"),
4447
- content_preview=sensitive.get("preview"),
4448
- sensitivity=sensitive.get("sensitivity"),
4449
- sensitive_labels=sensitive.get("labels") or [],
4450
- )
4451
- except ToolError as exc:
4452
- raise HTTPException(status_code=400, detail=str(exc))
4453
- finally:
4454
- try:
4455
- Path(tmp_path).unlink()
4456
- except OSError:
4457
- pass
4458
- result["original_filename"] = file.filename
4459
- return result
4460
-
4461
-
4462
- _PERMISSION_ACTION_LABELS = {
4463
- "list": "폴더 목록 보기",
4464
- "read": "파일 읽기",
4465
- "write": "파일 쓰기",
4466
- }
4467
-
4468
- _LOCAL_APPROVAL_TTL_SECONDS = 5 * 60
4469
- _local_approval_lock = threading.Lock()
4470
- _local_approvals: Dict[str, Dict[str, object]] = {}
4471
-
4472
- # Discord bot / webhook settings for permission notifications (optional)
4473
- DISCORD_PERMISSION_WEBHOOK_URL = CONFIG.discord_permission_webhook
4474
- DISCORD_BOT_TOKEN = CONFIG.discord_bot_token
4475
- DISCORD_PERMISSION_CHANNEL = CONFIG.discord_permission_channel
4476
-
4477
- # Secret token that allows permission monitor script to call approve/deny endpoints
4478
- # without an admin user session (used by perm_monitor.py).
4479
- PERMISSION_MONITOR_SECRET = CONFIG.permission_monitor_secret
4480
-
4481
- # Local queue file — written by server, read by perm_monitor.py
4482
- _PERM_QUEUE_FILE = DATA_DIR / "permission_queue.json"
4483
-
4484
-
4485
- def _perm_queue_write(token: str, record: Dict[str, object]) -> None:
4486
- """Append a permission request to the local queue file for the monitor script."""
4487
- try:
4488
- queue: Dict = {}
4489
- if _PERM_QUEUE_FILE.exists():
4490
- try:
4491
- queue = json.loads(_PERM_QUEUE_FILE.read_text(encoding="utf-8"))
4492
- except Exception:
4493
- queue = {}
4494
- queue[token] = {**record, "notified": False}
4495
- _PERM_QUEUE_FILE.write_text(json.dumps(queue, ensure_ascii=False, indent=2), encoding="utf-8")
4496
- except Exception as exc:
4497
- logging.warning("perm_queue_write failed: %s", exc)
4498
-
4499
-
4500
- def _perm_queue_remove(token: str) -> None:
4501
- """Remove a token from the queue file after approval or denial."""
4502
- try:
4503
- if not _PERM_QUEUE_FILE.exists():
4504
- return
4505
- queue: Dict = json.loads(_PERM_QUEUE_FILE.read_text(encoding="utf-8"))
4506
- queue.pop(token, None)
4507
- _PERM_QUEUE_FILE.write_text(json.dumps(queue, ensure_ascii=False, indent=2), encoding="utf-8")
4508
- except Exception as exc:
4509
- logging.warning("perm_queue_remove failed: %s", exc)
4510
-
4511
-
4512
- def _normalize_local_path_for_approval(path: str) -> str:
4513
- return str(Path(path).expanduser().resolve())
4514
-
4515
-
4516
- def _content_fingerprint(content: str = "") -> str:
4517
- return hashlib.sha256(content.encode("utf-8")).hexdigest()
4518
-
4519
-
4520
- def _notify_discord_permission_sync(token: str, path: str, action: str, user_email: str) -> None:
4521
- """Fire-and-forget Discord bot/webhook notification for permission requests."""
4522
- # Try Discord bot API first (sends to a specific channel), then fall back to webhook
4523
- sent = False
4524
- if DISCORD_BOT_TOKEN and DISCORD_PERMISSION_CHANNEL:
4525
- action_label = _PERMISSION_ACTION_LABELS.get(action, action)
4526
- expires_at_iso = time.strftime(
4527
- "%Y-%m-%d %H:%M:%S UTC",
4528
- time.gmtime(time.time() + _LOCAL_APPROVAL_TTL_SECONDS),
4529
- )
4530
- msg = (
4531
- f"🔐 **파일 접근 권한 요청**\n"
4532
- f"**경로:** `{path}`\n"
4533
- f"**작업:** {action_label}\n"
4534
- f"**요청자:** {user_email}\n"
4535
- f"**토큰:** `{token}`\n"
4536
- f"**만료:** {expires_at_iso}\n\n"
4537
- f"승인하려면 `승인 {token[:8]}` / 거부하려면 `거부 {token[:8]}` 라고 답장하세요."
4538
- )
4539
- payload = json.dumps({"content": msg}, ensure_ascii=False).encode("utf-8")
4540
- try:
4541
- req = urllib.request.Request(
4542
- f"https://discord.com/api/v10/channels/{DISCORD_PERMISSION_CHANNEL}/messages",
4543
- data=payload,
4544
- headers={
4545
- "Content-Type": "application/json",
4546
- "Authorization": f"Bot {DISCORD_BOT_TOKEN}",
4547
- },
4548
- method="POST",
4549
- )
4550
- with urllib.request.urlopen(req, timeout=5):
4551
- pass
4552
- sent = True
4553
- except Exception as exc:
4554
- logging.warning("Discord bot permission notify failed: %s", exc)
4555
-
4556
- if not sent and DISCORD_PERMISSION_WEBHOOK_URL:
4557
- action_label = _PERMISSION_ACTION_LABELS.get(action, action)
4558
- expires_at_iso = time.strftime(
4559
- "%Y-%m-%d %H:%M:%S UTC",
4560
- time.gmtime(time.time() + _LOCAL_APPROVAL_TTL_SECONDS),
4561
- )
4562
- payload = json.dumps({
4563
- "embeds": [
4564
- {
4565
- "title": "🔐 파일 접근 권한 요청",
4566
- "color": 0xFF9900,
4567
- "fields": [
4568
- {"name": "경로", "value": f"`{path}`", "inline": False},
4569
- {"name": "작업", "value": action_label, "inline": True},
4570
- {"name": "요청자", "value": user_email, "inline": True},
4571
- {"name": "토큰", "value": f"`{token}`", "inline": False},
4572
- {"name": "만료", "value": expires_at_iso, "inline": True},
4573
- ],
4574
- "footer": {
4575
- "text": (
4576
- "승인: POST /permissions/approve/{token} | "
4577
- "거부: POST /permissions/deny/{token} | "
4578
- "목록: GET /permissions/pending"
4579
- )
4580
- },
4581
- }
4582
- ]
4583
- }, ensure_ascii=False).encode("utf-8")
4584
- try:
4585
- req = urllib.request.Request(
4586
- DISCORD_PERMISSION_WEBHOOK_URL,
4587
- data=payload,
4588
- headers={"Content-Type": "application/json"},
4589
- method="POST",
4590
- )
4591
- with urllib.request.urlopen(req, timeout=5):
4592
- pass
4593
- except Exception as exc: # pylint: disable=broad-except
4594
- logging.warning("Discord permission webhook failed: %s", exc)
4595
-
4596
-
4597
- def _local_permission_response(path: str, action: str, user_email: str, content: str = "") -> dict:
4598
- normalized = _normalize_local_path_for_approval(path)
4599
- token = secrets.token_urlsafe(24)
4600
- record: Dict[str, object] = {
4601
- "path": normalized,
4602
- "action": action,
4603
- "user_email": user_email,
4604
- "expires_at": time.time() + _LOCAL_APPROVAL_TTL_SECONDS,
4605
- # approved=False until user explicitly confirms (Discord, web UI, etc.)
4606
- "approved": False,
4607
- }
4608
- if action == "write":
4609
- record["content_hash"] = _content_fingerprint(content)
4610
- with _local_approval_lock:
4611
- _local_approvals[token] = record
4612
- # Write to local queue file — perm_monitor.py or Claude Code reads this
4613
- # and relays the notification to Discord via the Discord MCP plugin.
4614
- _perm_queue_write(token, record)
4615
- action_label = _PERMISSION_ACTION_LABELS.get(action, action)
4616
- return {
4617
- "permission_required": True,
4618
- "path": path,
4619
- "action": action,
4620
- "action_label": action_label,
4621
- "approval_token": token,
4622
- "expires_in": _LOCAL_APPROVAL_TTL_SECONDS,
4623
- "message": f"AI가 '{path}' 에 대한 {action_label} 권한을 요청합니다.",
4624
- "check_status_url": f"/permissions/status/{token}",
4625
- }
4626
-
4627
-
4628
- def _require_local_user(request: Request) -> str:
4629
- email = get_current_user(request)
4630
- if not email:
4631
- raise HTTPException(status_code=401, detail="로컬 파일 접근은 로그인 세션이 필요합니다.")
4632
- return email
4633
-
4634
-
4635
- def _require_local_approval(
4636
- *,
4637
- token: Optional[str],
4638
- path: str,
4639
- action: str,
4640
- user_email: str,
4641
- content: str = "",
4642
- ) -> None:
4643
- if not token:
4644
- raise HTTPException(status_code=403, detail="파일 접근 승인 토큰이 필요합니다.")
4645
- normalized = _normalize_local_path_for_approval(path)
4646
- now = time.time()
4647
- with _local_approval_lock:
4648
- expired = [key for key, value in _local_approvals.items() if float(value.get("expires_at", 0)) < now]
4649
- for key in expired:
4650
- _local_approvals.pop(key, None)
4651
- record = _local_approvals.get(token)
4652
- if not record:
4653
- raise HTTPException(status_code=403, detail="파일 접근 승인이 만료되었거나 유효하지 않습니다.")
4654
- if not record.get("approved"):
4655
- raise HTTPException(status_code=403, detail="파일 접근이 아직 승인되지 않았습니다. Discord 또는 UI에서 승인해주세요.")
4656
- if record.get("user_email") != user_email:
4657
- raise HTTPException(status_code=403, detail="다른 사용자의 파일 접근 승인은 사용할 수 없습니다.")
4658
- if record.get("path") != normalized or record.get("action") != action:
4659
- raise HTTPException(status_code=403, detail="파일 접근 승인 범위가 일치하지 않습니다.")
4660
- if action == "write" and record.get("content_hash") != _content_fingerprint(content):
4661
- raise HTTPException(status_code=403, detail="승인된 파일 내용과 요청 내용이 다릅니다.")
4662
-
4663
-
4664
- # ── Permission management endpoints ──────────────────────────────────────────
4665
-
4666
- @app.get("/permissions/pending")
4667
- async def permissions_pending(request: Request):
4668
- """List all pending (not yet approved) permission requests. Admin only."""
4669
- require_admin(request)
4670
- now = time.time()
4671
- with _local_approval_lock:
4672
- result = {}
4673
- for tok, rec in list(_local_approvals.items()):
4674
- expires_at = float(rec.get("expires_at", 0))
4675
- if expires_at < now:
4676
- continue
4677
- result[tok] = {
4678
- "path": rec.get("path"),
4679
- "action": rec.get("action"),
4680
- "action_label": _PERMISSION_ACTION_LABELS.get(str(rec.get("action", "")), str(rec.get("action", ""))),
4681
- "user_email": rec.get("user_email"),
4682
- "approved": bool(rec.get("approved")),
4683
- "expires_in": round(expires_at - now),
4684
- }
4685
- return {"pending": result, "count": len(result)}
4686
-
4687
-
4688
- def _check_permission_auth(request: Request, token: Optional[str] = None) -> None:
4689
- """Allow access if requester is admin OR presents the LATTICEAI_PERMISSION_SECRET.
4690
- Used by approve/deny endpoints so the permission monitor script can call them."""
4691
- # Check secret header first (monitor script path)
4692
- if PERMISSION_MONITOR_SECRET:
4693
- auth_header = request.headers.get("Authorization", "")
4694
- if auth_header == f"Bearer {PERMISSION_MONITOR_SECRET}":
4695
- return # Authorized via secret
4696
- if token:
4697
- current_user = get_current_user(request)
4698
- with _local_approval_lock:
4699
- record = _local_approvals.get(token)
4700
- if current_user and record and record.get("user_email") == current_user:
4701
- return
4702
- # Fall back to admin session
4703
- require_admin(request)
4704
-
4705
-
4706
- @app.post("/permissions/approve/{token}")
4707
- async def permissions_approve(token: str, request: Request):
4708
- """Approve a pending permission request. Admin or permission-monitor secret.
4709
- Called by Discord (via Claude Code) or web UI after user confirmation."""
4710
- _check_permission_auth(request, token)
4711
- with _local_approval_lock:
4712
- record = _local_approvals.get(token)
4713
- if not record:
4714
- raise HTTPException(status_code=404, detail="토큰이 없거나 만료되었습니다.")
4715
- if float(record.get("expires_at", 0)) < time.time():
4716
- _local_approvals.pop(token, None)
4717
- raise HTTPException(status_code=410, detail="토큰이 만료되었습니다.")
4718
- record["approved"] = True
4719
- _perm_queue_remove(token)
4720
- logging.info(
4721
- "Permission approved: token=%s path=%s action=%s user=%s",
4722
- token, record.get("path"), record.get("action"), record.get("user_email"),
4723
- )
4724
- return {
4725
- "ok": True,
4726
- "token": token,
4727
- "path": record.get("path"),
4728
- "action": record.get("action"),
4729
- "user_email": record.get("user_email"),
4730
- }
4731
-
4732
-
4733
- @app.post("/permissions/deny/{token}")
4734
- async def permissions_deny(token: str, request: Request):
4735
- """Deny/revoke a pending permission request. Admin or permission-monitor secret."""
4736
- _check_permission_auth(request, token)
4737
- with _local_approval_lock:
4738
- record = _local_approvals.pop(token, None)
4739
- _perm_queue_remove(token)
4740
- if not record:
4741
- raise HTTPException(status_code=404, detail="토큰이 없거나 이미 처리되었습니다.")
4742
- logging.info(
4743
- "Permission denied: token=%s path=%s action=%s user=%s",
4744
- token, record.get("path"), record.get("action"), record.get("user_email"),
4745
- )
4746
- return {
4747
- "ok": True,
4748
- "denied": True,
4749
- "token": token,
4750
- "path": record.get("path"),
4751
- "action": record.get("action"),
4752
- }
4753
-
4754
-
4755
- @app.get("/permissions/status/{token}")
4756
- async def permissions_status(token: str, request: Request):
4757
- """Check approval status of a token. Used by AI agents to poll for approval."""
4758
- require_user(request)
4759
- now = time.time()
4760
- with _local_approval_lock:
4761
- record = _local_approvals.get(token)
4762
- if not record:
4763
- return {"status": "denied_or_expired", "token": token}
4764
- if float(record.get("expires_at", 0)) < now:
4765
- return {"status": "expired", "token": token}
4766
- if record.get("approved"):
4767
- return {"status": "approved", "token": token}
4768
- return {
4769
- "status": "pending",
4770
- "token": token,
4771
- "expires_in": round(float(record.get("expires_at", 0)) - now),
4772
- }
4773
-
4774
-
4775
- @app.post("/local/list")
4776
- async def local_list_endpoint(req: LocalAccessRequest, request: Request):
4777
- current_user = _require_local_user(request)
4778
- if not req.approved:
4779
- return _local_permission_response(req.path, "list", current_user)
4780
- _require_local_approval(token=req.approval_token, path=req.path, action="list", user_email=current_user)
4781
- return _tool_response(local_list, req.path)
4782
-
4783
-
4784
- @app.get("/local/list")
4785
- async def local_list_get_endpoint(path: str, request: Request):
4786
- current_user = _require_local_user(request)
4787
- return _local_permission_response(path, "list", current_user)
4788
-
4789
-
4790
- @app.post("/local/read")
4791
- async def local_read_endpoint(req: LocalAccessRequest, request: Request):
4792
- current_user = _require_local_user(request)
4793
- if not req.approved:
4794
- return _local_permission_response(req.path, "read", current_user)
4795
- _require_local_approval(token=req.approval_token, path=req.path, action="read", user_email=current_user)
4796
- return _tool_response(local_read, req.path)
4797
-
4798
-
4799
- @app.get("/local/serve")
4800
- async def local_serve_file(path: str, request: Request, approval_token: Optional[str] = None):
4801
- """Serve a local file (images etc.) directly for browser preview."""
4802
- current_user = _require_local_user(request)
4803
- _require_local_approval(token=approval_token, path=path, action="read", user_email=current_user)
4804
- target = Path(path).expanduser().resolve()
4805
- if not target.exists() or not target.is_file():
4806
- raise HTTPException(status_code=404, detail="File not found")
4807
- return FileResponse(str(target))
4808
-
4809
-
4810
- @app.post("/local/write")
4811
- async def local_write_endpoint(req: LocalWriteRequest, request: Request):
4812
- current_user = _require_local_user(request)
4813
- if not req.approved:
4814
- return _local_permission_response(req.path, "write", current_user, req.content)
4815
- _require_local_approval(
4816
- token=req.approval_token,
4817
- path=req.path,
4818
- action="write",
4819
- user_email=current_user,
4820
- content=req.content,
4821
- )
4822
- return _tool_response(local_write, req.path, req.content)
4823
-
4824
-
4825
- app.include_router(create_knowledge_graph_router(
4826
- get_graph=lambda: KNOWLEDGE_GRAPH,
4827
- require_graph=_require_graph,
4828
- require_user=require_user,
1268
+ app.include_router(create_tools_router(
1269
+ config=CONFIG,
1270
+ data_dir=DATA_DIR,
4829
1271
  static_dir=STATIC_DIR,
4830
- ))
4831
-
4832
- app.include_router(create_local_knowledge_router(
4833
- get_graph=lambda: KNOWLEDGE_GRAPH,
4834
- require_graph=_require_graph,
4835
- require_user=require_user,
4836
- require_local_user=_require_local_user,
4837
- local_permission_response=_local_permission_response,
4838
- require_local_approval=_require_local_approval,
4839
- watcher=LOCAL_KG_WATCHER,
4840
- ))
4841
-
4842
-
4843
- @app.get("/tools/chrome_status")
4844
- async def tools_chrome_status(request: Request):
4845
- require_user(request)
4846
- return _tool_response(desktop_bridge_status)
4847
-
4848
-
4849
- @app.get("/tools/computer_use_status")
4850
- async def tools_computer_use_status(request: Request):
4851
- require_user(request)
4852
- return _tool_response(computer_status)
4853
-
4854
-
4855
- # ── 내 컴퓨터 API ──────────────────────────────────────────────────────────
4856
-
4857
- CU_SYSTEM_PROMPT = """You are Lattice AI desktop-control agent. You control the Mac desktop using tools.
4858
- Prefer non-visual direct actions when possible. Use screenshots only when you must inspect visible UI state or choose screen coordinates.
4859
-
4860
- Available actions:
4861
- - computer_screenshot: {"action":"computer_screenshot","args":{}} — capture screen, returns screenshot_b64
4862
- - computer_open_app: {"action":"computer_open_app","args":{"app":"Google Chrome"}} — open or focus a Mac app
4863
- - computer_open_url: {"action":"computer_open_url","args":{"url":"https://example.com","app":"Google Chrome"}} — open URL in app
4864
- - computer_click: {"action":"computer_click","args":{"x":500,"y":300,"button":"left","double":false}}
4865
- - computer_type: {"action":"computer_type","args":{"text":"hello world","interval":0.04}}
4866
- - computer_key: {"action":"computer_key","args":{"key":"return"}} — keys: return, escape, tab, space, command+c, etc.
4867
- - computer_scroll: {"action":"computer_scroll","args":{"x":500,"y":300,"direction":"down","clicks":3}}
4868
- - computer_move: {"action":"computer_move","args":{"x":500,"y":300}}
4869
- - computer_drag: {"action":"computer_drag","args":{"x1":100,"y1":100,"x2":500,"y2":500}}
4870
- - final: {"action":"final","message":"Korean summary of what was accomplished"}
4871
-
4872
- Rules:
4873
- - Respond with exactly ONE JSON object. No markdown, no extra text.
4874
- - Do not take screenshots for simple app launch, URL opening, keyboard shortcuts, or non-visual tasks.
4875
- - Take a screenshot before coordinate-based clicks/drags or when the task explicitly asks you to inspect the screen.
4876
- - After coordinate-based clicking or typing into an unknown focused field, take a screenshot only if verification is necessary.
4877
- - Use coordinates relative to the screen (0,0 is top-left).
4878
- - If a UI element is not visible, scroll or search for it first.
4879
- - macOS Accessibility permission required for mouse/keyboard control.
4880
- """
4881
-
4882
- class CuAgentRequest(BaseModel):
4883
- task: str
4884
- conversation_id: Optional[str] = None
4885
- max_steps: int = 15
4886
- temperature: float = 0.1
4887
-
4888
- class CuClickRequest(BaseModel):
4889
- x: int
4890
- y: int
4891
- button: str = "left"
4892
- double: bool = False
4893
-
4894
- class CuOpenAppRequest(BaseModel):
4895
- app: str = "Google Chrome"
4896
-
4897
- class CuOpenUrlRequest(BaseModel):
4898
- url: str
4899
- app: str = "Google Chrome"
4900
-
4901
- class CuTypeRequest(BaseModel):
4902
- text: str
4903
- interval: float = 0.04
4904
-
4905
- class CuKeyRequest(BaseModel):
4906
- key: str
4907
-
4908
- class CuScrollRequest(BaseModel):
4909
- x: int
4910
- y: int
4911
- direction: str = "down"
4912
- clicks: int = 3
4913
-
4914
- class CuMoveRequest(BaseModel):
4915
- x: int
4916
- y: int
4917
-
4918
- class CuDragRequest(BaseModel):
4919
- x1: int
4920
- y1: int
4921
- x2: int
4922
- y2: int
4923
-
4924
-
4925
- @app.get("/cu/status")
4926
- async def cu_status(request: Request):
4927
- require_user(request)
4928
- try:
4929
- return computer_status()
4930
- except ToolError as exc:
4931
- raise HTTPException(status_code=400, detail=str(exc))
4932
-
4933
-
4934
- @app.get("/cu/screenshot")
4935
- async def cu_screenshot(request: Request):
4936
- require_user(request)
4937
- try:
4938
- return computer_screenshot()
4939
- except ToolError as exc:
4940
- raise HTTPException(status_code=400, detail=str(exc))
4941
-
4942
-
4943
- @app.post("/cu/open_app")
4944
- async def cu_open_app(req: CuOpenAppRequest, request: Request):
4945
- require_user(request)
4946
- return _tool_response(computer_open_app, req.app)
4947
-
4948
-
4949
- @app.post("/cu/open_url")
4950
- async def cu_open_url(req: CuOpenUrlRequest, request: Request):
4951
- require_user(request)
4952
- return _tool_response(computer_open_url, req.url, req.app)
4953
-
4954
-
4955
- @app.post("/cu/click")
4956
- async def cu_click(req: CuClickRequest, request: Request):
4957
- require_user(request)
4958
- return _tool_response(computer_click, req.x, req.y, req.button, req.double)
4959
-
4960
-
4961
- @app.post("/cu/type")
4962
- async def cu_type(req: CuTypeRequest, request: Request):
4963
- require_user(request)
4964
- return _tool_response(computer_type, req.text, req.interval)
4965
-
4966
-
4967
- @app.post("/cu/key")
4968
- async def cu_key(req: CuKeyRequest, request: Request):
4969
- require_user(request)
4970
- return _tool_response(computer_key, req.key)
4971
-
4972
-
4973
- @app.post("/cu/scroll")
4974
- async def cu_scroll(req: CuScrollRequest, request: Request):
4975
- require_user(request)
4976
- return _tool_response(computer_scroll, req.x, req.y, req.direction, req.clicks)
4977
-
4978
-
4979
- @app.post("/cu/move")
4980
- async def cu_move(req: CuMoveRequest, request: Request):
4981
- require_user(request)
4982
- return _tool_response(computer_move, req.x, req.y)
4983
-
4984
-
4985
- @app.post("/cu/drag")
4986
- async def cu_drag(req: CuDragRequest, request: Request):
4987
- require_user(request)
4988
- return _tool_response(computer_drag, req.x1, req.y1, req.x2, req.y2)
4989
-
4990
-
4991
- @app.post("/cu/agent")
4992
- async def cu_agent(req: CuAgentRequest, request: Request):
4993
- """SSE streaming desktop-control agent loop."""
4994
- require_user(request)
4995
- async def _stream():
4996
- task_lower = (req.task or "").lower()
4997
- url_match = re.search(r"(https?://[^\s]+|localhost:\d+[^\s]*|127\.0\.0\.1:\d+[^\s]*)", req.task or "")
4998
-
4999
- def _send(event: str, data: dict) -> str:
5000
- return f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"
5001
-
5002
- if ("chrome" in task_lower or "크롬" in task_lower) and any(word in task_lower for word in ["open", "열", "켜", "실행", "띄"]):
5003
- yield _send("start", {"task": req.task, "max_steps": 1})
5004
- try:
5005
- if url_match:
5006
- url = url_match.group(1)
5007
- yield _send("action", {"step": 1, "action": "computer_open_url", "args": {"url": url, "app": "Google Chrome"}})
5008
- result = computer_open_url(url, "Google Chrome")
5009
- yield _send("result", {"step": 1, "action": "computer_open_url", "result": result})
5010
- message = f"Google Chrome에서 {url}을 열었습니다."
5011
- action_name = "computer_open_url"
5012
- else:
5013
- yield _send("action", {"step": 1, "action": "computer_open_app", "args": {"app": "Google Chrome"}})
5014
- result = computer_open_app("Google Chrome")
5015
- yield _send("result", {"step": 1, "action": "computer_open_app", "result": result})
5016
- message = "Google Chrome을 열었습니다."
5017
- action_name = "computer_open_app"
5018
- save_to_history("user", req.task, source="web", conversation_id=req.conversation_id)
5019
- save_to_history("assistant", message, source="web", conversation_id=req.conversation_id)
5020
- yield _send("final", {"message": message, "steps": [{"step": 1, "action": action_name, "result": result}]})
5021
- except ToolError as exc:
5022
- yield _send("tool_error", {"step": 1, "action": "computer_open_app", "error": str(exc)})
5023
- return
5024
-
5025
- if not router.current_model_id:
5026
- yield _send("error", {"error": "No model loaded."})
5027
- return
5028
-
5029
- transcript = []
5030
- last_screenshot_b64: Optional[str] = None
5031
- max_steps = max(1, min(req.max_steps, 20))
5032
-
5033
- yield _send("start", {"task": req.task, "max_steps": max_steps})
5034
-
5035
- for step in range(max_steps):
5036
- context = (
5037
- f"{CU_SYSTEM_PROMPT}\n\n"
5038
- f"Task: {req.task}\n\n"
5039
- f"Steps completed so far:\n{json.dumps(transcript, ensure_ascii=False, indent=2)}"
5040
- )
5041
- raw = await router.generate(
5042
- message="Choose the next computer use action.",
5043
- context=context,
5044
- image_data=last_screenshot_b64,
5045
- max_tokens=1024,
5046
- temperature=req.temperature,
5047
- )
5048
-
5049
- try:
5050
- action = _extract_agent_action(str(raw))
5051
- except ValueError as exc:
5052
- yield _send("error", {"step": step + 1, "error": str(exc), "raw": str(raw)})
5053
- break
5054
-
5055
- name = action.get("action")
5056
- args = action.get("args") or {}
5057
-
5058
- if name == "final":
5059
- message = action.get("message", "작업을 완료했습니다.")
5060
- save_to_history("user", req.task, source="web", conversation_id=req.conversation_id)
5061
- save_to_history("assistant", message, source="web", conversation_id=req.conversation_id)
5062
- yield _send("final", {"message": message, "steps": transcript})
5063
- return
5064
-
5065
- yield _send("action", {"step": step + 1, "action": name, "args": args})
5066
-
5067
- try:
5068
- result = execute_tool(name, args)
5069
- # store screenshot for next VLM call
5070
- if name == "computer_screenshot" and "screenshot_b64" in result:
5071
- last_screenshot_b64 = result["screenshot_b64"]
5072
- # strip b64 from transcript to keep it small
5073
- result_summary = {k: v for k, v in result.items() if k != "screenshot_b64"}
5074
- result_summary["screenshot_captured"] = True
5075
- transcript.append({"step": step + 1, "action": name, "args": args, "result": result_summary})
5076
- yield _send("screenshot", {"step": step + 1, "screenshot_b64": last_screenshot_b64,
5077
- "width": result.get("screen_width"), "height": result.get("screen_height")})
5078
- else:
5079
- last_screenshot_b64 = None
5080
- transcript.append({"step": step + 1, "action": name, "args": args, "result": result})
5081
- yield _send("result", {"step": step + 1, "action": name, "result": result})
5082
- except (ToolError, KeyError, TypeError) as exc:
5083
- error_str = str(exc)
5084
- transcript.append({"step": step + 1, "action": name, "args": args, "error": error_str})
5085
- yield _send("tool_error", {"step": step + 1, "action": name, "error": error_str})
5086
-
5087
- yield _send("done", {"steps": len(transcript), "transcript": transcript})
5088
-
5089
- return StreamingResponse(
5090
- _stream(),
5091
- media_type="text/event-stream",
5092
- headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
5093
- )
5094
-
5095
-
5096
- @app.post("/tools/knowledge_save")
5097
- async def tools_knowledge_save(req: ToolKnowledgeSaveRequest, request: Request):
5098
- require_user(request)
5099
- return _tool_response(knowledge_save, req.content, req.folder, req.title)
5100
-
5101
-
5102
- @app.post("/tools/knowledge_search")
5103
- async def tools_knowledge_search(req: ToolKnowledgeSearchRequest, request: Request):
5104
- require_user(request)
5105
- return _tool_response(knowledge_search, req.query, req.max_results)
5106
-
5107
-
5108
- @app.get("/tools/knowledge_tree")
5109
- async def tools_knowledge_tree(request: Request):
5110
- require_user(request)
5111
- return _tool_response(knowledge_tree)
5112
-
5113
-
5114
- @app.post("/tools/obsidian_save")
5115
- async def tools_obsidian_save(req: ToolKnowledgeSaveRequest, request: Request):
5116
- require_user(request)
5117
- return _tool_response(obsidian_save, req.content, req.folder, req.title)
5118
-
5119
-
5120
- @app.post("/tools/obsidian_search")
5121
- async def tools_obsidian_search(req: ToolKnowledgeSearchRequest, request: Request):
5122
- require_user(request)
5123
- return _tool_response(obsidian_search, req.query, req.max_results)
5124
-
5125
-
5126
- @app.get("/tools/obsidian_tree")
5127
- async def tools_obsidian_tree(request: Request):
5128
- require_user(request)
5129
- return _tool_response(obsidian_tree)
5130
-
5131
-
5132
- @app.get("/obsidian/status")
5133
- async def obsidian_status(request: Request):
5134
- require_user(request)
5135
- return {
5136
- "status": "ok",
5137
- "vault_root": str(BRAIN_DIR),
5138
- "folders": [path.name for path in BRAIN_DIR.iterdir() if path.is_dir()] if BRAIN_DIR.exists() else [],
5139
- "ocr_engine": shutil.which("tesseract") or None,
5140
- }
5141
-
5142
-
5143
- @app.get("/tools/git_status")
5144
- async def tools_git_status(request: Request):
5145
- require_user(request)
5146
- return _tool_response(git_status)
5147
-
5148
-
5149
- @app.post("/tools/git_diff")
5150
- async def tools_git_diff(req: ToolGitDiffRequest, request: Request):
5151
- require_user(request)
5152
- return _tool_response(git_diff, req.path, req.cwd)
5153
-
5154
-
5155
- @app.post("/tools/git_log")
5156
- async def tools_git_log(req: ToolGitLogRequest, request: Request):
5157
- require_user(request)
5158
- return _tool_response(git_log, req.max_count, req.cwd)
5159
-
5160
-
5161
- @app.post("/tools/git_show")
5162
- async def tools_git_show(req: ToolGitShowRequest, request: Request):
5163
- require_user(request)
5164
- return _tool_response(git_show, req.revision, req.cwd)
5165
-
5166
-
5167
- @app.post("/tools/run_command")
5168
- async def tools_run_command(req: ToolRunCommandRequest, request: Request):
5169
- require_admin(request)
5170
- return _tool_response(run_command, req.command, req.cwd)
5171
-
5172
-
5173
- @app.get("/tools/network_status")
5174
- async def tools_network_status(request: Request):
5175
- require_user(request)
5176
- return _tool_response(network_status)
5177
-
5178
-
5179
- @app.post("/tools/build_project")
5180
- async def tools_build_project(req: ToolScriptRequest, request: Request):
5181
- require_admin(request)
5182
- return _tool_response(build_project, req.cwd, req.script)
5183
-
5184
-
5185
- @app.post("/tools/deploy_project")
5186
- async def tools_deploy_project(req: ToolScriptRequest, request: Request):
5187
- require_admin(request)
5188
- return _tool_response(deploy_project, req.cwd, req.script)
5189
-
5190
-
5191
- @app.get("/tools/permissions")
5192
- async def tools_permissions(request: Request):
5193
- """Compact tool permission view (tool / risk / requires_approval / network).
5194
-
5195
- A simpler authorization-layer summary derived from TOOL_GOVERNANCE.
5196
- Use /mcp/tools for the full 7-dimensional governance object.
5197
- """
5198
- require_user(request)
5199
- return {"status": "ok", "permissions": list_tool_permissions()}
5200
-
5201
-
5202
- # ── MCP / skills / plugins router (latticeai.api.mcp, v1.3.0) ────────────────
5203
- app.include_router(create_mcp_router(
1272
+ model_router=router,
5204
1273
  require_user=require_user,
5205
1274
  require_admin=require_admin,
1275
+ get_current_user=get_current_user,
1276
+ clear_history=clear_history,
5206
1277
  append_audit_event=append_audit_event,
1278
+ enforce_rate_limit=enforce_rate_limit,
1279
+ bytes_match_extension=_bytes_match_extension,
1280
+ classify_sensitive_message=classify_sensitive_message,
1281
+ save_to_history=save_to_history,
1282
+ enable_graph=ENABLE_GRAPH,
1283
+ knowledge_graph=KNOWLEDGE_GRAPH,
1284
+ require_graph=_require_graph,
1285
+ local_kg_watcher=LOCAL_KG_WATCHER,
5207
1286
  load_mcp_installs=load_mcp_installs,
5208
1287
  recommend_mcps=recommend_mcps,
5209
1288
  install_mcp=install_mcp,
5210
1289
  mcp_public_item=mcp_public_item,
5211
- get_tool_permission=get_tool_permission,
5212
- tool_governance=TOOL_GOVERNANCE,
5213
- tool_governance_default=_TOOL_GOVERNANCE_DEFAULT,
5214
- check_tool_role=_check_tool_role,
5215
- tool_response=_tool_response,
5216
- require_graph=_require_graph,
5217
- knowledge_graph=KNOWLEDGE_GRAPH,
5218
- data_dir=DATA_DIR,
5219
1290
  ))
5220
1291
 
5221
-
5222
- # ── P-Reinforce Knowledge Gardener ────────────────────────────────────────────
5223
-
5224
- @app.post("/garden")
5225
- async def garden(req: GardenRequest, request: Request):
5226
- """Raw 데이터를 P-Reinforce 구조로 자동 분류·저장"""
5227
- require_user(request)
5228
- result = await gardener.process(req.raw_data, req.category)
5229
- return result
5230
-
5231
-
5232
- @app.get("/garden/tree")
5233
- async def garden_tree(request: Request):
5234
- """지식 정원 파일트리 반환"""
5235
- require_user(request)
5236
- return gardener.get_tree()
5237
-
5238
-
5239
- # ── Setup Wizard ─────────────────────────────────────────────────────────────
5240
-
5241
- class SetupInstallRequest(BaseModel):
5242
- items: List[Dict]
5243
-
5244
- def setup_auto_state() -> Dict[str, object]:
5245
- """Return the PPT-aligned zero-config setup state used by setup UI/API."""
5246
- profile = auto_setup_probe()
5247
- recommendation = auto_setup_recommend(profile)
5248
- install_plan = auto_setup_plan(profile, recommendation)
5249
- return {
5250
- "probe": profile.to_json(),
5251
- "recommend": recommendation.to_json(),
5252
- "plan": install_plan.to_json(),
5253
- "verify": auto_setup_verify(profile, recommendation),
5254
- "preset": auto_setup_preset(profile, recommendation),
5255
- }
5256
-
5257
-
5258
- def primary_setup_model(recs: Dict[str, object]) -> Optional[Dict[str, object]]:
5259
- models = recs.get("models") if isinstance(recs, dict) else None
5260
- if not isinstance(models, list):
5261
- return None
5262
- candidates = [
5263
- item for item in models
5264
- if isinstance(item, dict) and not item.get("disabled") and (item.get("model_id") or (item.get("action") or {}).get("model_id"))
5265
- ]
5266
- if not candidates:
5267
- return None
5268
- return next((item for item in candidates if item.get("checked")), candidates[0])
5269
-
5270
-
5271
- @app.get("/setup/scan")
5272
- async def setup_scan(request: Request):
5273
- """환경 감지 및 맞춤 추천 반환."""
5274
- require_user(request)
5275
- env = scan_environment()
5276
- recs = get_recommendations(env)
5277
- zero_config = setup_auto_state()
5278
- primary_model = primary_setup_model(recs)
5279
- if primary_model:
5280
- model_id = primary_model.get("model_id") or (primary_model.get("action") or {}).get("model_id")
5281
- model_provider, provider_model = parse_model_ref(str(model_id))
5282
- primary_runtime = "mlx" if model_provider == "local_mlx" else model_provider
5283
- zero_config.setdefault("recommend", {})["model_id"] = model_id
5284
- zero_config["recommend"]["runtime"] = primary_runtime
5285
- rationale = [
5286
- item for item in zero_config["recommend"].get("rationale", [])
5287
- if not (isinstance(item, str) and item.startswith("RAM ") and "→" in item)
5288
- ]
5289
- rationale.append(f"실제 다운로드 및 로드 가능한 {primary_runtime} 모델 → {model_id}")
5290
- zero_config["recommend"]["rationale"] = rationale
5291
- if isinstance(zero_config.get("plan"), dict):
5292
- if model_provider == "ollama":
5293
- command = ["ollama", "pull", provider_model]
5294
- elif model_provider in {"vllm", "lmstudio", "llamacpp"}:
5295
- command = ["lattice-ai", "models", "load", str(model_id)]
5296
- else:
5297
- command = ["huggingface-cli", "download", str(model_id), "--quiet"]
5298
- zero_config["plan"]["steps"] = [{
5299
- "name": f"weights:{model_id}",
5300
- "why": "추론에 사용할 모델 가중치",
5301
- "command": command,
5302
- "requires_admin": False,
5303
- }]
5304
- if isinstance(zero_config.get("preset"), dict):
5305
- zero_config["preset"].setdefault("model", {})["id"] = model_id
5306
- zero_config["preset"]["model"]["runtime"] = primary_runtime
5307
- env["zero_config"] = zero_config
5308
- recs.setdefault("summary", {})["zero_config"] = zero_config["recommend"]
5309
- recs["install_plan"] = zero_config["plan"]
5310
- recs["preset"] = zero_config["preset"]
5311
- return {"environment": env, "recommendations": recs, "zero_config": zero_config}
5312
-
5313
- @app.get("/setup/auto")
5314
- async def setup_auto(request: Request):
5315
- """PPT-aligned zero-config setup pipeline: probe → recommend → plan → verify → preset."""
5316
- require_user(request)
5317
- return setup_auto_state()
5318
-
5319
- @app.post("/setup/install")
5320
- async def setup_install(req: SetupInstallRequest, request: Request):
5321
- """선택된 항목을 순서대로 설치 · 로드하는 SSE 스트림."""
5322
- require_user(request)
5323
- async def _gen():
5324
- async for chunk in install_stream(req.items, router):
5325
- yield chunk
5326
- return StreamingResponse(_gen(), media_type="text/event-stream",
5327
- headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
5328
-
5329
- @app.post("/setup/open-auth/{mcp_id}")
5330
- async def setup_open_auth(mcp_id: str, request: Request):
5331
- require_user(request)
5332
- """MCP 인증 페이지를 브라우저에서 자동으로 엽니다."""
5333
- auth_urls: Dict[str, str] = {
5334
- "github": "https://github.com/apps",
5335
- "google-drive": "https://chatgpt.com/connectors",
5336
- "slack": "https://chatgpt.com/connectors",
5337
- "chrome": "https://chatgpt.com/connectors",
5338
- "computer-use": "https://chatgpt.com/connectors",
5339
- "figma": "https://chatgpt.com/connectors",
5340
- "notion": "https://chatgpt.com/connectors",
5341
- "linear": "https://chatgpt.com/connectors",
5342
- "gmail": "https://chatgpt.com/connectors",
5343
- "google-calendar": "https://chatgpt.com/connectors",
5344
- "outlook-email": "https://chatgpt.com/connectors",
5345
- "outlook-calendar": "https://chatgpt.com/connectors",
5346
- "teams": "https://chatgpt.com/connectors",
5347
- "sharepoint": "https://chatgpt.com/connectors",
5348
- "canva": "https://chatgpt.com/connectors",
5349
- }
5350
- url = auth_urls.get(mcp_id)
5351
- if not url:
5352
- raise HTTPException(status_code=404, detail=f"알 수 없는 MCP: {mcp_id}")
5353
- open_url(url)
5354
- return {"status": "ok", "opened": url, "mcp_id": mcp_id}
5355
-
5356
-
5357
- @app.post("/permissions/open/{permission_id}")
5358
- async def open_permission_settings(permission_id: str, request: Request):
5359
- require_user(request)
5360
- """macOS 권한 설정 화면을 엽니다."""
5361
- urls = {
5362
- "accessibility": "x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility",
5363
- "automation": "x-apple.systempreferences:com.apple.preference.security?Privacy_Automation",
5364
- "screen": "x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture",
5365
- }
5366
- url = urls.get(permission_id)
5367
- if not url:
5368
- raise HTTPException(status_code=404, detail="알 수 없는 권한 설정입니다.")
5369
- open_url(url)
5370
- return {"status": "ok", "opened": url, "permission": permission_id}
5371
-
1292
+ app.include_router(create_garden_router(gardener=gardener, require_user=require_user))
1293
+ app.include_router(create_setup_router(model_router=router, require_user=require_user))
5372
1294
 
5373
1295
  # ── Entry Point ────────────────────────────────────────────────────────────────
5374
1296