ltcai 0.1.30 → 0.1.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.py CHANGED
@@ -47,6 +47,8 @@ from PIL import Image
47
47
 
48
48
  from llm_router import AsyncOpenAI, LLMRouter, OPENAI_COMPATIBLE_PROVIDERS, HF_MODELS_ROOT, ensure_mlx_runtime, hf_model_dir, parse_model_ref, mx, normalize_branding
49
49
  from knowledge_graph import KnowledgeGraphStore
50
+ from knowledge_graph_api import create_knowledge_graph_router
51
+ from local_knowledge_api import LocalKnowledgeWatcher, create_local_knowledge_router
50
52
  import mcp_registry
51
53
  from mcp_registry import (
52
54
  MCP_REGISTRY, _THIRD_PARTY_SKILL_SOURCES, _KNOWN_REPO_LICENSES,
@@ -374,6 +376,7 @@ MCP_FILE = DATA_DIR / "mcp_installs.json"
374
376
  AUDIT_FILE = DATA_DIR / "audit_log.json"
375
377
  SSO_FILE = DATA_DIR / "sso_config.json"
376
378
  KNOWLEDGE_GRAPH = KnowledgeGraphStore(DATA_DIR / "knowledge_graph.sqlite", DATA_DIR / "knowledge_graph_blobs") if ENABLE_GRAPH else None
379
+ LOCAL_KG_WATCHER = LocalKnowledgeWatcher(lambda: KNOWLEDGE_GRAPH) if ENABLE_GRAPH else None
377
380
 
378
381
  def _require_graph():
379
382
  if not ENABLE_GRAPH or KNOWLEDGE_GRAPH is None:
@@ -491,17 +494,6 @@ class SkillInstallRequest(BaseModel):
491
494
  plugin: str
492
495
  skill: str
493
496
 
494
- class KnowledgeGraphIngestRequest(BaseModel):
495
- type: str
496
- content: str = ""
497
- role: Optional[str] = None
498
- title: Optional[str] = None
499
- source: Optional[str] = None
500
- conversation_id: Optional[str] = None
501
- user_email: Optional[str] = None
502
- user_nickname: Optional[str] = None
503
- metadata: Optional[Dict] = None
504
-
505
497
  DEFAULT_VPC_CONFIG = {
506
498
  "provider": "AWS",
507
499
  "region": "ap-northeast-2",
@@ -1432,11 +1424,17 @@ async def lifespan(app: FastAPI):
1432
1424
  print("⏭️ Telegram Bot Bridge disabled for this mode.")
1433
1425
  _spawn(unload_idle_models_loop(), name="unload_idle_models")
1434
1426
  _spawn(autoload_default_model(), name="autoload_default_model")
1427
+ if LOCAL_KG_WATCHER:
1428
+ restored = LOCAL_KG_WATCHER.restore_enabled_sources()
1429
+ if restored.get("restored"):
1430
+ print(f"🕸️ Local knowledge watchers restored: {restored['restored']}")
1435
1431
  except Exception as e:
1436
1432
  print(f"⚠️ Startup sequence failed: {e}")
1437
1433
  try:
1438
1434
  yield
1439
1435
  finally:
1436
+ if LOCAL_KG_WATCHER:
1437
+ LOCAL_KG_WATCHER.stop_all()
1440
1438
  router.unload_all()
1441
1439
  for proc in LOCAL_SERVER_PROCESSES.values():
1442
1440
  try:
@@ -2224,83 +2222,114 @@ ENGINE_INSTALLERS = {
2224
2222
 
2225
2223
  ENGINE_MODEL_CATALOG = {
2226
2224
  "local_mlx": [
2225
+ {"id": "mlx-community/SmolLM-1.7B-Instruct-4bit", "name": "SmolLM 1.7B", "family": "SmolLM", "tag": "local-light", "size": "963MB", "pullable": True},
2226
+ {"id": "mlx-community/gemma-3-1b-it-4bit", "name": "Gemma 3 1B", "family": "Gemma 3", "tag": "local-light", "size": "733MB", "pullable": True},
2227
+ {"id": "mlx-community/Llama-3.2-1B-Instruct-4bit", "name": "Llama 3.2 1B", "family": "Llama 3.x", "tag": "local-light", "size": "1.3GB", "pullable": True},
2228
+ {"id": "mlx-community/gemma-2-2b-it-4bit", "name": "Gemma 2 2B", "family": "Gemma 2", "tag": "local-light", "size": "1.6GB", "pullable": True},
2227
2229
  {"id": "mlx-community/gemma-4-e2b-4bit", "name": "Gemma 4 E2B Base", "family": "Gemma 4", "tag": "local-vlm", "size": "3.6GB", "pullable": True},
2228
2230
  {"id": "mlx-community/gemma-4-e2b-it-4bit", "name": "Gemma 4 E2B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "3.6GB", "pullable": True},
2229
2231
  {"id": "mlx-community/gemma-4-e4b-4bit", "name": "Gemma 4 E4B Base", "family": "Gemma 4", "tag": "local-vlm", "size": "5.2GB", "pullable": True},
2230
2232
  {"id": "mlx-community/gemma-4-e4b-it-4bit", "name": "Gemma 4 E4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "5.2GB", "pullable": True},
2231
- {"id": "mlx-community/gemma-4-26b-a4b-it-4bit", "name": "Gemma 4 26B A4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "Apple Silicon", "pullable": True},
2232
- {"id": "Jiunsong/supergemma4-26b-abliterated-multimodal-mlx-4bit", "name": "SuperGemma4 26B Abliterated Multimodal", "family": "Gemma 4", "tag": "local-vlm", "size": "Apple Silicon", "pullable": True},
2233
- {"id": "mlx-community/Qwen2.5-Coder-3B-Instruct-4bit", "name": "Qwen 2.5 Coder 3B", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "2.1GB", "pullable": True},
2234
- {"id": "mlx-community/Qwen2.5-Coder-7B-Instruct-4bit", "name": "Qwen 2.5 Coder 7B", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "4.3GB", "pullable": True},
2235
- {"id": "mlx-community/Qwen2.5-Coder-14B-Instruct-4bit", "name": "Qwen 2.5 Coder 14B", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "8.5GB", "pullable": True},
2236
- {"id": "mlx-community/Qwen2.5-3B-Instruct-4bit", "name": "Qwen 2.5 3B", "family": "Qwen 2.5", "tag": "local-general", "size": "2.1GB", "pullable": True},
2237
- {"id": "mlx-community/Qwen2.5-7B-Instruct-4bit", "name": "Qwen 2.5 7B", "family": "Qwen 2.5", "tag": "local-general", "size": "4.3GB", "pullable": True},
2238
- {"id": "mlx-community/Qwen2.5-14B-Instruct-4bit", "name": "Qwen 2.5 14B", "family": "Qwen 2.5", "tag": "local-general", "size": "8.5GB", "pullable": True},
2233
+ {"id": "mlx-community/Qwen3-VL-4B-Instruct-4bit", "name": "Qwen3-VL 4B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "2.7GB", "pullable": True},
2234
+ {"id": "mlx-community/Qwen3-VL-8B-Instruct-4bit", "name": "Qwen3-VL 8B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "4.8GB", "pullable": True},
2235
+ {"id": "mlx-community/Qwen2.5-VL-7B-Instruct-4bit", "name": "Qwen2.5-VL 7B", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "4.4GB", "pullable": True},
2236
+ {"id": "mlx-community/gemma-3-4b-it-4bit", "name": "Gemma 3 4B", "family": "Gemma 3", "tag": "local-vlm", "size": "3.3GB", "pullable": True},
2239
2237
  {"id": "mlx-community/Llama-3.2-3B-Instruct-4bit", "name": "Llama 3.2 3B", "family": "Llama 3.x", "tag": "local-general", "size": "2.0GB", "pullable": True},
2240
2238
  {"id": "mlx-community/Llama-3.1-8B-Instruct-4bit", "name": "Llama 3.1 8B", "family": "Llama 3.1", "tag": "local-general", "size": "4.7GB", "pullable": True},
2239
+ {"id": "mlx-community/gemma-2-9b-it-4bit", "name": "Gemma 2 9B", "family": "Gemma 2", "tag": "local-general", "size": "5.4GB", "pullable": True},
2240
+ {"id": "mlx-community/gemma-3-12b-it-4bit", "name": "Gemma 3 12B", "family": "Gemma 3", "tag": "local-vlm", "size": "8.0GB", "pullable": True},
2241
+ {"id": "mlx-community/Phi-3.5-mini-instruct-4bit", "name": "Phi 3.5 Mini", "family": "Phi", "tag": "local-coding", "size": "2.2GB", "pullable": True},
2242
+ {"id": "mlx-community/Phi-4-mini-instruct-4bit", "name": "Phi 4 Mini", "family": "Phi", "tag": "local-coding", "size": "2.2GB", "pullable": True},
2243
+ {"id": "mlx-community/phi-4-4bit", "name": "Phi 4", "family": "Phi", "tag": "local-coding", "size": "8.3GB", "pullable": True},
2244
+ {"id": "mlx-community/Mistral-7B-Instruct-v0.3-4bit", "name": "Mistral 7B Instruct v0.3", "family": "Mistral", "tag": "local-general", "size": "4.1GB", "pullable": True},
2245
+ {"id": "mlx-community/Ministral-8B-Instruct-2410-4bit", "name": "Ministral 8B Instruct", "family": "Mistral", "tag": "local-general", "size": "4.5GB", "pullable": True},
2246
+ {"id": "mlx-community/Mistral-Small-24B-Instruct-2501-4bit", "name": "Mistral Small 24B", "family": "Mistral", "tag": "local-large", "size": "13.3GB", "pullable": True},
2247
+ {"id": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", "name": "Qwen2.5 Coder 32B", "family": "Qwen2.5", "tag": "local-coding", "size": "18.5GB", "pullable": True},
2248
+ {"id": "mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit", "name": "Qwen3-VL 30B A3B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "18GB", "pullable": True},
2249
+ {"id": "mlx-community/gemma-3-27b-it-4bit", "name": "Gemma 3 27B", "family": "Gemma 3", "tag": "local-vlm", "size": "17GB", "pullable": True},
2250
+ {"id": "mlx-community/gemma-4-26b-a4b-it-4bit", "name": "Gemma 4 26B A4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "15.6GB", "pullable": True},
2241
2251
  {"id": "mlx-community/Llama-3.3-70B-Instruct-4bit", "name": "Llama 3.3 70B", "family": "Llama 3.x", "tag": "local-general", "size": "40GB+", "pullable": True},
2242
2252
  {"id": "mlx-community/Llama-3.1-70B-Instruct-4bit", "name": "Llama 3.1 70B", "family": "Llama 3.1", "tag": "local-general", "size": "40GB+", "pullable": True},
2243
- {"id": "mlx-community/Phi-3.5-mini-instruct-4bit", "name": "Phi 3.5 Mini", "family": "Phi", "tag": "local-light", "size": "2.2GB", "pullable": True},
2244
- {"id": "mlx-community/DeepSeek-R1-Distill-Qwen-7B-4bit", "name": "DeepSeek R1 Distill 7B", "family": "DeepSeek", "tag": "reasoning", "size": "4.3GB", "pullable": True},
2245
2253
  ],
2246
2254
  "ollama": [
2255
+ {"id": "ollama:qwen3-vl:4b", "name": "Qwen3-VL 4B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
2256
+ {"id": "ollama:qwen3-vl:8b", "name": "Qwen3-VL 8B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
2257
+ {"id": "ollama:qwen3-vl:30b", "name": "Qwen3-VL 30B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
2258
+ {"id": "ollama:qwen3:8b", "name": "Qwen3 8B via Ollama", "family": "Qwen", "tag": "local-server", "size": "pull required", "pullable": True},
2259
+ {"id": "ollama:qwen2.5-coder:14b", "name": "Qwen2.5 Coder 14B via Ollama", "family": "Qwen", "tag": "local-coding", "size": "pull required", "pullable": True},
2260
+ {"id": "ollama:gemma3:1b", "name": "Gemma 3 1B via Ollama", "family": "Gemma", "tag": "local-light", "size": "pull required", "pullable": True},
2247
2261
  {"id": "ollama:gemma3:4b", "name": "Gemma 3 4B via Ollama", "family": "Gemma", "tag": "local-server", "size": "pull required", "pullable": True},
2248
2262
  {"id": "ollama:gemma3:4b-it-q4_K_M", "name": "Gemma 3 4B q4_K_M via Ollama", "family": "Gemma", "tag": "quantized", "size": "pull required", "pullable": True},
2249
2263
  {"id": "ollama:gemma3:12b", "name": "Gemma 3 12B via Ollama", "family": "Gemma", "tag": "local-server", "size": "pull required", "pullable": True},
2250
2264
  {"id": "ollama:gemma3:12b-it-q4_K_M", "name": "Gemma 3 12B q4_K_M via Ollama", "family": "Gemma", "tag": "quantized", "size": "pull required", "pullable": True},
2251
- {"id": "ollama:qwen2.5:3b", "name": "Qwen 2.5 3B via Ollama", "family": "Qwen 2.5", "tag": "local-server", "size": "pull required", "pullable": True},
2252
- {"id": "ollama:qwen2.5:7b", "name": "Qwen 2.5 7B via Ollama", "family": "Qwen 2.5", "tag": "local-server", "size": "pull required", "pullable": True},
2253
- {"id": "ollama:qwen2.5:14b", "name": "Qwen 2.5 14B via Ollama", "family": "Qwen 2.5", "tag": "local-server", "size": "pull required", "pullable": True},
2254
- {"id": "ollama:qwen2.5:32b", "name": "Qwen 2.5 32B via Ollama", "family": "Qwen 2.5", "tag": "local-server", "size": "pull required", "pullable": True},
2255
- {"id": "ollama:qwen2.5-coder:7b", "name": "Qwen 2.5 Coder 7B via Ollama", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "pull required", "pullable": True},
2256
- {"id": "ollama:qwen2.5-coder:14b", "name": "Qwen 2.5 Coder 14B via Ollama", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "pull required", "pullable": True},
2265
+ {"id": "ollama:gemma3:27b", "name": "Gemma 3 27B via Ollama", "family": "Gemma", "tag": "local-large", "size": "pull required", "pullable": True},
2266
+ {"id": "ollama:llama3.2:1b", "name": "Llama 3.2 1B via Ollama", "family": "Llama 3.x", "tag": "local-light", "size": "pull required", "pullable": True},
2257
2267
  {"id": "ollama:llama3.2:3b", "name": "Llama 3.2 3B via Ollama", "family": "Llama 3.x", "tag": "local-server", "size": "pull required", "pullable": True},
2258
2268
  {"id": "ollama:llama3.1:8b", "name": "Llama 3.1 8B via Ollama", "family": "Llama 3.1", "tag": "local-server", "size": "pull required", "pullable": True},
2259
2269
  {"id": "ollama:llama3.1:8b-instruct-q4_0", "name": "Llama 3.1 8B q4_0 via Ollama", "family": "Llama 3.1", "tag": "quantized", "size": "pull required", "pullable": True},
2260
2270
  {"id": "ollama:llama3.1:8b-instruct-q8_0", "name": "Llama 3.1 8B q8_0 via Ollama", "family": "Llama 3.1", "tag": "quantized", "size": "pull required", "pullable": True},
2261
2271
  {"id": "ollama:llama3.1:70b", "name": "Llama 3.1 70B via Ollama", "family": "Llama 3.1", "tag": "local-server", "size": "pull required", "pullable": True},
2272
+ {"id": "ollama:llama3.3:70b", "name": "Llama 3.3 70B via Ollama", "family": "Llama 3.x", "tag": "local-large", "size": "pull required", "pullable": True},
2273
+ {"id": "ollama:mistral:7b", "name": "Mistral 7B via Ollama", "family": "Mistral", "tag": "local-server", "size": "pull required", "pullable": True},
2274
+ {"id": "ollama:mixtral:8x7b", "name": "Mixtral 8x7B via Ollama", "family": "Mistral", "tag": "local-large", "size": "pull required", "pullable": True},
2275
+ {"id": "ollama:phi4-mini", "name": "Phi 4 Mini via Ollama", "family": "Phi", "tag": "local-coding", "size": "pull required", "pullable": True},
2276
+ {"id": "ollama:phi4", "name": "Phi 4 via Ollama", "family": "Phi", "tag": "local-coding", "size": "pull required", "pullable": True},
2277
+ {"id": "ollama:smollm2:1.7b", "name": "SmolLM2 1.7B via Ollama", "family": "SmolLM", "tag": "local-light", "size": "pull required", "pullable": True},
2262
2278
  ],
2263
2279
  "vllm": [
2264
- {"id": "vllm:Qwen/Qwen2.5-0.5B-Instruct-AWQ", "name": "Qwen 2.5 0.5B AWQ via vLLM", "family": "Qwen 2.5", "tag": "local-light", "size": "0.5B", "pullable": True},
2280
+ {"id": "vllm:Qwen/Qwen3-VL-4B-Instruct", "name": "Qwen3-VL 4B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
2281
+ {"id": "vllm:Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen3-VL 8B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
2282
+ {"id": "vllm:Qwen/Qwen3-VL-30B-A3B-Instruct", "name": "Qwen3-VL 30B A3B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
2283
+ {"id": "vllm:Qwen/Qwen2.5-VL-7B-Instruct", "name": "Qwen2.5-VL 7B via vLLM", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
2265
2284
  {"id": "vllm:google/gemma-2-2b", "name": "Gemma 2 2B Base via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2266
2285
  {"id": "vllm:google/gemma-2-2b-it", "name": "Gemma 2 2B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2267
2286
  {"id": "vllm:google/gemma-2-9b", "name": "Gemma 2 9B Base via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2268
2287
  {"id": "vllm:google/gemma-2-9b-it", "name": "Gemma 2 9B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2269
- {"id": "vllm:Qwen/Qwen2.5-3B-Instruct", "name": "Qwen 2.5 3B via vLLM", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2270
- {"id": "vllm:Qwen/Qwen2.5-7B-Instruct", "name": "Qwen 2.5 7B via vLLM", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2271
- {"id": "vllm:Qwen/Qwen2.5-14B-Instruct", "name": "Qwen 2.5 14B via vLLM", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2272
- {"id": "vllm:Qwen/Qwen2.5-32B-Instruct", "name": "Qwen 2.5 32B via vLLM", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2273
- {"id": "vllm:Qwen/Qwen2.5-Coder-7B-Instruct", "name": "Qwen 2.5 Coder 7B via vLLM", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "server model", "pullable": True},
2274
- {"id": "vllm:Qwen/Qwen2.5-Coder-14B-Instruct", "name": "Qwen 2.5 Coder 14B via vLLM", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "server model", "pullable": True},
2288
+ {"id": "vllm:google/gemma-3-4b-it", "name": "Gemma 3 4B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2289
+ {"id": "vllm:google/gemma-3-12b-it", "name": "Gemma 3 12B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2290
+ {"id": "vllm:microsoft/Phi-3.5-mini-instruct", "name": "Phi 3.5 Mini via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
2291
+ {"id": "vllm:microsoft/Phi-4-mini-instruct", "name": "Phi 4 Mini via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
2292
+ {"id": "vllm:microsoft/phi-4", "name": "Phi 4 via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
2293
+ {"id": "vllm:mistralai/Mistral-7B-Instruct-v0.3", "name": "Mistral 7B via vLLM", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
2294
+ {"id": "vllm:mistralai/Ministral-8B-Instruct-2410", "name": "Ministral 8B via vLLM", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
2295
+ {"id": "vllm:mistralai/Mistral-Small-24B-Instruct-2501", "name": "Mistral Small 24B via vLLM", "family": "Mistral", "tag": "local-large", "size": "server model", "pullable": True},
2275
2296
  {"id": "vllm:meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B via vLLM", "family": "Llama 3.x", "tag": "local-server", "size": "server model", "pullable": True},
2276
2297
  {"id": "vllm:meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B via vLLM", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
2298
+ {"id": "vllm:meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B via vLLM", "family": "Llama 3.x", "tag": "local-large", "size": "server model", "pullable": True},
2277
2299
  {"id": "vllm:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B via vLLM", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
2278
2300
  ],
2279
2301
  "lmstudio": [
2280
- {"id": "lmstudio:https://huggingface.co/lmstudio-community/Qwen2.5-0.5B-Instruct-GGUF", "name": "Qwen 2.5 0.5B GGUF via LM Studio", "family": "Qwen 2.5", "tag": "local-light", "size": "0.5B", "pullable": True},
2302
+ {"id": "lmstudio:Qwen/Qwen3-VL-4B-Instruct", "name": "Qwen3-VL 4B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
2303
+ {"id": "lmstudio:Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen3-VL 8B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
2304
+ {"id": "lmstudio:Qwen/Qwen3-VL-30B-A3B-Instruct", "name": "Qwen3-VL 30B A3B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
2305
+ {"id": "lmstudio:Qwen/Qwen2.5-VL-7B-Instruct", "name": "Qwen2.5-VL 7B via LM Studio", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
2281
2306
  {"id": "lmstudio:google/gemma-2-2b-it", "name": "Gemma 2 2B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2282
2307
  {"id": "lmstudio:google/gemma-2-9b-it", "name": "Gemma 2 9B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2283
- {"id": "lmstudio:Qwen/Qwen2.5-3B-Instruct", "name": "Qwen 2.5 3B via LM Studio", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2284
- {"id": "lmstudio:Qwen/Qwen2.5-7B-Instruct", "name": "Qwen 2.5 7B via LM Studio", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2285
- {"id": "lmstudio:Qwen/Qwen2.5-14B-Instruct", "name": "Qwen 2.5 14B via LM Studio", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2286
- {"id": "lmstudio:Qwen/Qwen2.5-32B-Instruct", "name": "Qwen 2.5 32B via LM Studio", "family": "Qwen 2.5", "tag": "local-server", "size": "server model", "pullable": True},
2287
- {"id": "lmstudio:Qwen/Qwen2.5-Coder-7B-Instruct", "name": "Qwen 2.5 Coder 7B via LM Studio", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "server model", "pullable": True},
2288
- {"id": "lmstudio:Qwen/Qwen2.5-Coder-14B-Instruct", "name": "Qwen 2.5 Coder 14B via LM Studio", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "server model", "pullable": True},
2308
+ {"id": "lmstudio:google/gemma-3-4b-it", "name": "Gemma 3 4B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2309
+ {"id": "lmstudio:google/gemma-3-12b-it", "name": "Gemma 3 12B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
2310
+ {"id": "lmstudio:microsoft/Phi-3.5-mini-instruct", "name": "Phi 3.5 Mini via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
2311
+ {"id": "lmstudio:microsoft/Phi-4-mini-instruct", "name": "Phi 4 Mini via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
2312
+ {"id": "lmstudio:microsoft/phi-4", "name": "Phi 4 via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
2313
+ {"id": "lmstudio:mistralai/Mistral-7B-Instruct-v0.3", "name": "Mistral 7B via LM Studio", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
2314
+ {"id": "lmstudio:mistralai/Ministral-8B-Instruct-2410", "name": "Ministral 8B via LM Studio", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
2315
+ {"id": "lmstudio:mistralai/Mistral-Small-24B-Instruct-2501", "name": "Mistral Small 24B via LM Studio", "family": "Mistral", "tag": "local-large", "size": "server model", "pullable": True},
2289
2316
  {"id": "lmstudio:meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B via LM Studio", "family": "Llama 3.x", "tag": "local-server", "size": "server model", "pullable": True},
2290
2317
  {"id": "lmstudio:meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B via LM Studio", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
2318
+ {"id": "lmstudio:meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B via LM Studio", "family": "Llama 3.x", "tag": "local-large", "size": "server model", "pullable": True},
2291
2319
  {"id": "lmstudio:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B via LM Studio", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
2292
2320
  ],
2293
2321
  "llamacpp": [
2294
- {"id": "llamacpp:lmstudio-community/Qwen2.5-0.5B-Instruct-GGUF", "name": "Qwen 2.5 0.5B GGUF via llama.cpp", "family": "Qwen 2.5", "tag": "gguf-q4", "size": "0.5B", "pullable": True},
2322
+ {"id": "llamacpp:Qwen/Qwen3-VL-4B-Instruct-GGUF", "name": "Qwen3-VL 4B GGUF via llama.cpp", "family": "Qwen3-VL", "tag": "gguf-vlm", "size": "gguf", "pullable": True},
2323
+ {"id": "llamacpp:Qwen/Qwen3-VL-8B-Instruct-GGUF", "name": "Qwen3-VL 8B GGUF via llama.cpp", "family": "Qwen3-VL", "tag": "gguf-vlm", "size": "gguf", "pullable": True},
2295
2324
  {"id": "llamacpp:unsloth/gemma-2-2b-it-GGUF", "name": "Gemma 2 2B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
2296
2325
  {"id": "llamacpp:unsloth/gemma-2-9b-it-GGUF", "name": "Gemma 2 9B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
2297
- {"id": "llamacpp:Qwen/Qwen2.5-7B-Instruct-GGUF", "name": "Qwen 2.5 7B GGUF via llama.cpp", "family": "Qwen 2.5", "tag": "local-server", "size": "gguf", "pullable": True},
2298
- {"id": "llamacpp:Qwen/Qwen2.5-14B-Instruct-GGUF", "name": "Qwen 2.5 14B GGUF via llama.cpp", "family": "Qwen 2.5", "tag": "local-server", "size": "gguf", "pullable": True},
2299
- {"id": "llamacpp:Qwen/Qwen2.5-32B-Instruct-GGUF", "name": "Qwen 2.5 32B GGUF via llama.cpp", "family": "Qwen 2.5", "tag": "gguf-q4", "size": "gguf", "pullable": True},
2300
- {"id": "llamacpp:Qwen/Qwen2.5-Coder-7B-Instruct-GGUF", "name": "Qwen 2.5 Coder 7B GGUF via llama.cpp", "family": "Qwen 2.5 Coder", "tag": "local-coding", "size": "gguf", "pullable": True},
2301
- {"id": "llamacpp:Qwen/Qwen2.5-Coder-14B-Instruct-GGUF", "name": "Qwen 2.5 Coder 14B GGUF via llama.cpp", "family": "Qwen 2.5 Coder", "tag": "gguf-q4", "size": "gguf", "pullable": True},
2326
+ {"id": "llamacpp:unsloth/gemma-3-4b-it-GGUF", "name": "Gemma 3 4B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
2327
+ {"id": "llamacpp:bartowski/Mistral-7B-Instruct-v0.3-GGUF", "name": "Mistral 7B GGUF via llama.cpp", "family": "Mistral", "tag": "gguf-q4", "size": "gguf", "pullable": True},
2328
+ {"id": "llamacpp:bartowski/Phi-3.5-mini-instruct-GGUF", "name": "Phi 3.5 Mini GGUF via llama.cpp", "family": "Phi", "tag": "gguf-q4", "size": "gguf", "pullable": True},
2329
+ {"id": "llamacpp:bartowski/phi-4-GGUF", "name": "Phi 4 GGUF via llama.cpp", "family": "Phi", "tag": "gguf-q4", "size": "gguf", "pullable": True},
2302
2330
  {"id": "llamacpp:bartowski/Llama-3.2-3B-Instruct-GGUF", "name": "Llama 3.2 3B GGUF via llama.cpp", "family": "Llama 3.x", "tag": "gguf-q4", "size": "gguf", "pullable": True},
2303
2331
  {"id": "llamacpp:bartowski/Llama-3.1-8B-Instruct-GGUF", "name": "Llama 3.1 8B GGUF via llama.cpp", "family": "Llama 3.1", "tag": "local-server", "size": "gguf", "pullable": True},
2332
+ {"id": "llamacpp:bartowski/Llama-3.3-70B-Instruct-GGUF", "name": "Llama 3.3 70B GGUF via llama.cpp", "family": "Llama 3.x", "tag": "local-large", "size": "gguf", "pullable": True},
2304
2333
  {"id": "llamacpp:bartowski/Llama-3.1-70B-Instruct-GGUF", "name": "Llama 3.1 70B GGUF via llama.cpp", "family": "Llama 3.1", "tag": "local-server", "size": "gguf", "pullable": True},
2305
2334
  ],
2306
2335
  }
@@ -2326,8 +2355,40 @@ VLLM_METAL_BIN = VLLM_METAL_ENV / "bin" / "vllm"
2326
2355
  VLLM_METAL_PYTHON = VLLM_METAL_ENV / "bin" / "python"
2327
2356
  LMSTUDIO_BUNDLED_CLI = Path("/Applications/LM Studio.app/Contents/Resources/app/.webpack/lms")
2328
2357
 
2358
+ def windows_binary_candidates(binary: str) -> List[Path]:
2359
+ local_appdata = os.environ.get("LOCALAPPDATA", "")
2360
+ program_files = os.environ.get("ProgramFiles", r"C:\Program Files")
2361
+ program_files_x86 = os.environ.get("ProgramFiles(x86)", r"C:\Program Files (x86)")
2362
+ candidates = {
2363
+ "ollama": [
2364
+ Path(local_appdata) / "Programs" / "Ollama" / "ollama.exe" if local_appdata else None,
2365
+ Path(program_files) / "Ollama" / "ollama.exe",
2366
+ ],
2367
+ "lms": [
2368
+ Path(local_appdata) / "Programs" / "LM Studio" / "resources" / "app" / ".webpack" / "lms.exe" if local_appdata else None,
2369
+ Path(program_files) / "LM Studio" / "resources" / "app" / ".webpack" / "lms.exe",
2370
+ ],
2371
+ "nvidia-smi": [
2372
+ Path(program_files) / "NVIDIA Corporation" / "NVSMI" / "nvidia-smi.exe",
2373
+ Path(program_files_x86) / "NVIDIA Corporation" / "NVSMI" / "nvidia-smi.exe",
2374
+ ],
2375
+ }
2376
+ return [item for item in candidates.get(binary, []) if item is not None]
2377
+
2378
+
2379
+ def local_binary(binary: str) -> Optional[str]:
2380
+ found = shutil.which(binary)
2381
+ if found:
2382
+ return found
2383
+ if platform.system() == "Windows":
2384
+ for candidate in windows_binary_candidates(binary):
2385
+ if candidate.exists():
2386
+ return str(candidate)
2387
+ return None
2388
+
2389
+
2329
2390
  def find_lmstudio_cli() -> Optional[str]:
2330
- cli = shutil.which("lms")
2391
+ cli = local_binary("lms")
2331
2392
  if cli:
2332
2393
  return cli
2333
2394
  if LMSTUDIO_BUNDLED_CLI.exists():
@@ -2551,6 +2612,8 @@ def engine_support_status(engine: str) -> Dict[str, object]:
2551
2612
  if engine != "vllm":
2552
2613
  return {"supported": True, "reason": None}
2553
2614
  is_apple_silicon = sys.platform == "darwin" and platform.machine() == "arm64"
2615
+ if sys.platform.startswith("win"):
2616
+ return {"supported": False, "reason": "vLLM은 Windows native 자동 설치보다 WSL2/Linux 환경을 권장합니다."}
2554
2617
  if sys.platform == "darwin" and not is_apple_silicon:
2555
2618
  return {"supported": False, "reason": "vLLM Metal 자동 설치는 Apple Silicon macOS에서만 지원됩니다."}
2556
2619
  if sys.version_info >= (3, 13) and is_apple_silicon:
@@ -2807,6 +2870,9 @@ def download_hf_model(
2807
2870
 
2808
2871
 
2809
2872
  def pull_ollama_model_with_progress(model_name: str, progress_emit=None) -> Dict[str, object]:
2873
+ ollama = local_binary("ollama")
2874
+ if not ollama:
2875
+ raise HTTPException(status_code=400, detail="Ollama가 설치되지 않았습니다.")
2810
2876
  started_at = time.time()
2811
2877
  if progress_emit:
2812
2878
  progress_emit(model_download_progress_payload(
@@ -2817,7 +2883,7 @@ def pull_ollama_model_with_progress(model_name: str, progress_emit=None) -> Dict
2817
2883
  indeterminate=True,
2818
2884
  ))
2819
2885
  process = subprocess.Popen(
2820
- ["ollama", "pull", model_name],
2886
+ [ollama, "pull", model_name],
2821
2887
  stdout=subprocess.PIPE,
2822
2888
  stderr=subprocess.STDOUT,
2823
2889
  text=True,
@@ -2876,10 +2942,11 @@ def pull_ollama_model_with_progress(model_name: str, progress_emit=None) -> Dict
2876
2942
 
2877
2943
 
2878
2944
  def get_ollama_pulled_models() -> set:
2879
- if not shutil.which("ollama"):
2945
+ ollama = local_binary("ollama")
2946
+ if not ollama:
2880
2947
  return set()
2881
2948
  try:
2882
- result = subprocess.run(["ollama", "list"], capture_output=True, text=True, timeout=5, check=False)
2949
+ result = subprocess.run([ollama, "list"], capture_output=True, text=True, timeout=5, check=False)
2883
2950
  pulled = set()
2884
2951
  for line in result.stdout.splitlines()[1:]:
2885
2952
  parts = line.split()
@@ -2934,16 +3001,17 @@ def get_openai_compatible_server_models(provider: str) -> List[str]:
2934
3001
 
2935
3002
 
2936
3003
  def ensure_ollama_server() -> None:
2937
- if not shutil.which("ollama"):
3004
+ ollama = local_binary("ollama")
3005
+ if not ollama:
2938
3006
  raise HTTPException(status_code=400, detail="Ollama가 설치되지 않았습니다.")
2939
3007
  try:
2940
- probe = subprocess.run(["ollama", "list"], capture_output=True, text=True, timeout=3, check=False)
3008
+ probe = subprocess.run([ollama, "list"], capture_output=True, text=True, timeout=3, check=False)
2941
3009
  if probe.returncode == 0:
2942
3010
  return
2943
3011
  except Exception:
2944
3012
  pass
2945
3013
  subprocess.Popen(
2946
- ["ollama", "serve"],
3014
+ [ollama, "serve"],
2947
3015
  stdout=subprocess.DEVNULL,
2948
3016
  stderr=subprocess.DEVNULL,
2949
3017
  start_new_session=True,
@@ -2951,7 +3019,7 @@ def ensure_ollama_server() -> None:
2951
3019
  deadline = time.time() + 20
2952
3020
  while time.time() < deadline:
2953
3021
  try:
2954
- probe = subprocess.run(["ollama", "list"], capture_output=True, text=True, timeout=3, check=False)
3022
+ probe = subprocess.run([ollama, "list"], capture_output=True, text=True, timeout=3, check=False)
2955
3023
  if probe.returncode == 0:
2956
3024
  return
2957
3025
  except Exception:
@@ -3062,7 +3130,7 @@ def engine_installed(engine: str) -> bool:
3062
3130
  if engine == "local_mlx":
3063
3131
  return bool(importlib.util.find_spec("mlx") and importlib.util.find_spec("mlx_lm"))
3064
3132
  if engine == "ollama":
3065
- return shutil.which("ollama") is not None
3133
+ return local_binary("ollama") is not None
3066
3134
  if engine == "vllm":
3067
3135
  return vllm_metal_python() is not None or vllm_executable() is not None or importlib.util.find_spec("vllm") is not None
3068
3136
  if engine == "lmstudio":
@@ -3301,11 +3369,12 @@ def install_engine(engine: str) -> Dict:
3301
3369
  "stderr": completed.stderr[-12000:],
3302
3370
  "installed": engine_installed(engine),
3303
3371
  }
3304
- if engine == "ollama" and completed.returncode == 0 and shutil.which("ollama"):
3372
+ ollama = local_binary("ollama")
3373
+ if engine == "ollama" and completed.returncode == 0 and ollama:
3305
3374
  # Skip if already running to avoid orphan daemons.
3306
3375
  already_up = False
3307
3376
  try:
3308
- probe = subprocess.run(["ollama", "list"], capture_output=True, timeout=2, check=False)
3377
+ probe = subprocess.run([ollama, "list"], capture_output=True, timeout=2, check=False)
3309
3378
  already_up = probe.returncode == 0
3310
3379
  except Exception:
3311
3380
  already_up = False
@@ -3315,7 +3384,7 @@ def install_engine(engine: str) -> Dict:
3315
3384
  try:
3316
3385
  # Detach so the daemon survives this request but doesn't become our zombie.
3317
3386
  subprocess.Popen(
3318
- ["ollama", "serve"],
3387
+ [ollama, "serve"],
3319
3388
  stdout=subprocess.DEVNULL,
3320
3389
  stderr=subprocess.DEVNULL,
3321
3390
  start_new_session=True,
@@ -3392,9 +3461,12 @@ async def prepare_and_load_model(
3392
3461
  download_result = download_hf_model(parsed_model, "local_mlx")
3393
3462
  elif parsed_provider == "ollama":
3394
3463
  ensure_ollama_server()
3464
+ ollama = local_binary("ollama")
3465
+ if not ollama:
3466
+ raise HTTPException(status_code=400, detail="Ollama가 설치되지 않았습니다.")
3395
3467
  if parsed_model not in get_ollama_pulled_models():
3396
3468
  completed = subprocess.run(
3397
- ["ollama", "pull", parsed_model],
3469
+ [ollama, "pull", parsed_model],
3398
3470
  capture_output=True,
3399
3471
  text=True,
3400
3472
  timeout=900,
@@ -3778,9 +3850,12 @@ async def pull_ollama_model(req: PullModelRequest, request: Request):
3778
3850
 
3779
3851
  if provider == "ollama":
3780
3852
  ensure_ollama_server()
3853
+ ollama = local_binary("ollama")
3854
+ if not ollama:
3855
+ raise HTTPException(status_code=400, detail="Ollama가 설치되지 않았습니다.")
3781
3856
  try:
3782
3857
  completed = subprocess.run(
3783
- ["ollama", "pull", model_name],
3858
+ [ollama, "pull", model_name],
3784
3859
  capture_output=True, text=True, timeout=900, check=False,
3785
3860
  )
3786
3861
  except subprocess.TimeoutExpired:
@@ -3877,21 +3952,23 @@ async def set_api_key(req: SetApiKeyRequest, request: Request):
3877
3952
  async def list_models():
3878
3953
  """HuggingFace 추천 모델 목록 및 로드 상태 반환"""
3879
3954
  recommended = [
3880
- # Qwen Series
3881
- {"id": "mlx-community/Qwen2.5-Coder-7B-Instruct-4bit", "name": "Qwen 2.5 Coder 7B", "tag": "coding", "size": "4.3GB"},
3882
- {"id": "mlx-community/Qwen2.5-7B-Instruct-4bit", "name": "Qwen 2.5 7B", "tag": "general", "size": "4.3GB"},
3883
-
3884
- # Llama Series
3885
- {"id": "mlx-community/Llama-3.2-3B-Instruct-4bit", "name": "Llama 3.2 3B", "tag": "light", "size": "2.0GB"},
3886
- {"id": "mlx-community/Llama-3.1-8B-Instruct-4bit", "name": "Llama 3.1 8B", "tag": "general", "size": "4.7GB"},
3887
-
3888
- # Gemma Series
3889
- {"id": "mlx-community/gemma-4-e4b-it-4bit", "name": "Gemma 4 E4B (4-bit)", "tag": "next-gen", "size": "5.2GB"},
3890
- {"id": "mlx-community/gemma-2-9b-it-4bit", "name": "Gemma 2 9B", "tag": "balanced","size": "5.4GB"},
3891
- {"id": "mlx-community/gemma-2-2b-it-4bit", "name": "Gemma 2 2B", "tag": "ultra-light", "size": "1.6GB"},
3892
-
3893
- # Reasoning
3894
- {"id": "mlx-community/DeepSeek-R1-Distill-Qwen-7B-4bit","name": "DeepSeek R1 (7B)", "tag": "reasoning","size": "4.3GB"},
3955
+ {"id": "mlx-community/Qwen3-VL-4B-Instruct-4bit", "name": "Qwen3-VL 4B", "tag": "multimodal", "size": "2.7GB"},
3956
+ {"id": "mlx-community/Qwen3-VL-8B-Instruct-4bit", "name": "Qwen3-VL 8B", "tag": "multimodal", "size": "4.8GB"},
3957
+ {"id": "mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit", "name": "Qwen3-VL 30B A3B","tag": "multimodal", "size": "18GB"},
3958
+ {"id": "mlx-community/SmolLM-1.7B-Instruct-4bit", "name": "SmolLM 1.7B", "tag": "ultra-light", "size": "963MB"},
3959
+ {"id": "mlx-community/gemma-3-1b-it-4bit", "name": "Gemma 3 1B", "tag": "ultra-light", "size": "733MB"},
3960
+ {"id": "mlx-community/Llama-3.2-1B-Instruct-4bit", "name": "Llama 3.2 1B", "tag": "light", "size": "1.3GB"},
3961
+ {"id": "mlx-community/Llama-3.2-3B-Instruct-4bit", "name": "Llama 3.2 3B", "tag": "light", "size": "2.0GB"},
3962
+ {"id": "mlx-community/Phi-4-mini-instruct-4bit", "name": "Phi 4 Mini", "tag": "coding", "size": "2.2GB"},
3963
+ {"id": "mlx-community/Qwen2.5-VL-7B-Instruct-4bit", "name": "Qwen2.5-VL 7B", "tag": "multimodal", "size": "4.4GB"},
3964
+ {"id": "mlx-community/Mistral-7B-Instruct-v0.3-4bit", "name": "Mistral 7B v0.3", "tag": "general", "size": "4.1GB"},
3965
+ {"id": "mlx-community/Llama-3.1-8B-Instruct-4bit", "name": "Llama 3.1 8B", "tag": "general", "size": "4.7GB"},
3966
+ {"id": "mlx-community/gemma-4-e4b-it-4bit", "name": "Gemma 4 E4B", "tag": "multimodal", "size": "5.2GB"},
3967
+ {"id": "mlx-community/gemma-3-12b-it-4bit", "name": "Gemma 3 12B", "tag": "balanced", "size": "8.0GB"},
3968
+ {"id": "mlx-community/phi-4-4bit", "name": "Phi 4", "tag": "coding", "size": "8.3GB"},
3969
+ {"id": "mlx-community/Mistral-Small-24B-Instruct-2501-4bit", "name": "Mistral Small 24B", "tag": "large", "size": "13.3GB"},
3970
+ {"id": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", "name": "Qwen2.5 Coder 32B","tag": "coding", "size": "18.5GB"},
3971
+ {"id": "mlx-community/gemma-4-26b-a4b-it-4bit", "name": "Gemma 4 26B A4B", "tag": "multimodal", "size": "15.6GB"},
3895
3972
  ]
3896
3973
  return {
3897
3974
  "recommended": recommended,
@@ -4204,97 +4281,6 @@ async def search_history(q: str, request: Request):
4204
4281
  grouped[cid]["messages"].append(item)
4205
4282
  return {"results": list(grouped.values())[-30:], "query": q}
4206
4283
 
4207
-
4208
- @app.get("/graph")
4209
- async def knowledge_graph_page(request: Request):
4210
- """Serve the interactive knowledge graph canvas UI."""
4211
- _require_graph()
4212
- require_user(request)
4213
- return FileResponse(STATIC_DIR / "graph.html")
4214
-
4215
-
4216
- @app.get("/knowledge-graph")
4217
- async def knowledge_graph_legacy_page(request: Request):
4218
- """Backward-compatible route for the graph page."""
4219
- _require_graph()
4220
- require_user(request)
4221
- return FileResponse(STATIC_DIR / "graph.html")
4222
-
4223
-
4224
- @app.get("/knowledge-graph/stats")
4225
- async def knowledge_graph_stats(request: Request):
4226
- _require_graph()
4227
- require_user(request)
4228
- return KNOWLEDGE_GRAPH.stats()
4229
-
4230
- @app.get("/knowledge-graph/schema")
4231
- async def knowledge_graph_schema(request: Request):
4232
- _require_graph()
4233
- require_user(request)
4234
- stats = KNOWLEDGE_GRAPH.stats()
4235
- return {
4236
- "legacy_schema_version": stats.get("schema_version"),
4237
- "v2_schema_available": stats.get("v2_schema_available"),
4238
- "v2": stats.get("v2"),
4239
- }
4240
-
4241
-
4242
- @app.get("/knowledge-graph/graph")
4243
- async def knowledge_graph_data(request: Request, limit: int = 300):
4244
- _require_graph()
4245
- require_user(request)
4246
- return KNOWLEDGE_GRAPH.graph(limit)
4247
-
4248
-
4249
- @app.get("/knowledge-graph/search")
4250
- async def knowledge_graph_search(q: str, request: Request, limit: int = 30):
4251
- _require_graph()
4252
- require_user(request)
4253
- if not q or not q.strip():
4254
- return {"query": q, "matches": []}
4255
- return KNOWLEDGE_GRAPH.search(q, limit)
4256
-
4257
-
4258
- @app.get("/knowledge-graph/context")
4259
- async def knowledge_graph_context(q: str, request: Request, limit: int = 6):
4260
- _require_graph()
4261
- require_user(request)
4262
- return {"query": q, "context": KNOWLEDGE_GRAPH.context_for_query(q, limit)}
4263
-
4264
-
4265
- @app.get("/knowledge-graph/neighbors/{node_id:path}")
4266
- async def knowledge_graph_neighbors(node_id: str, request: Request):
4267
- _require_graph()
4268
- require_user(request)
4269
- if not node_id:
4270
- raise HTTPException(status_code=400, detail="node_id required")
4271
- return KNOWLEDGE_GRAPH.neighbors(node_id)
4272
-
4273
-
4274
- @app.post("/knowledge-graph/ingest")
4275
- async def knowledge_graph_ingest(req: KnowledgeGraphIngestRequest, request: Request):
4276
- _require_graph()
4277
- current_user = require_user(request)
4278
- event_type = (req.type or "").strip().lower()
4279
- if event_type not in {"message", "ai_response", "note"}:
4280
- raise HTTPException(status_code=400, detail="지원하는 type: message, ai_response, note")
4281
- role = req.role or ("assistant" if event_type == "ai_response" else "user")
4282
- return KNOWLEDGE_GRAPH.ingest_message(
4283
- role,
4284
- req.content,
4285
- user_email=req.user_email or current_user,
4286
- user_nickname=req.user_nickname,
4287
- source=req.source or "mcp",
4288
- conversation_id=req.conversation_id,
4289
- raw={
4290
- "type": req.type,
4291
- "title": req.title,
4292
- "content": req.content,
4293
- "metadata": req.metadata or {},
4294
- },
4295
- )
4296
-
4297
-
4298
4284
  async def _stream_chat(req: ChatRequest, context: str = "", image_data: str = None) -> AsyncIterator[str]:
4299
4285
  full_response = ""
4300
4286
  async for chunk in router.stream_generate(req.message, context, req.max_tokens, req.temperature, image_data):
@@ -5435,24 +5421,26 @@ async def tools_read_document(req: ToolPathRequest, request: Request):
5435
5421
 
5436
5422
  @app.get("/tools/pdf_pages")
5437
5423
  async def tools_pdf_pages(path: str, request: Request, approval_token: Optional[str] = None):
5438
- """Render PDF pages as base64 PNG images using PyMuPDF."""
5424
+ """Render PDF pages as base64 PNG images using pypdfium2 (Apache-2.0)."""
5439
5425
  current_user = require_user(request)
5440
5426
  _require_local_approval(token=approval_token, path=path, action="read", user_email=current_user)
5441
5427
  target = Path(path).expanduser().resolve()
5442
5428
  if not target.exists() or not target.is_file():
5443
5429
  raise HTTPException(status_code=404, detail="File not found")
5444
- import fitz # PyMuPDF
5430
+ import io
5431
+ import pypdfium2 as pdfium
5445
5432
  doc = None
5446
5433
  try:
5447
- doc = fitz.open(str(target))
5434
+ doc = pdfium.PdfDocument(str(target))
5448
5435
  total = len(doc)
5449
5436
  pages = []
5450
- for i, page in enumerate(doc):
5451
- if i >= 20: # 최대 20페이지
5452
- break
5453
- mat = fitz.Matrix(1.5, 1.5)
5454
- pix = page.get_pixmap(matrix=mat)
5455
- b64 = base64.b64encode(pix.tobytes("png")).decode()
5437
+ for i in range(min(total, 20)): # 최대 20페이지
5438
+ page = doc[i]
5439
+ bitmap = page.render(scale=1.5)
5440
+ pil_image = bitmap.to_pil()
5441
+ buf = io.BytesIO()
5442
+ pil_image.save(buf, format="PNG")
5443
+ b64 = base64.b64encode(buf.getvalue()).decode()
5456
5444
  pages.append({"page": i + 1, "b64": b64})
5457
5445
  return {"total": total, "pages": pages}
5458
5446
  except Exception as e:
@@ -5462,7 +5450,7 @@ async def tools_pdf_pages(path: str, request: Request, approval_token: Optional[
5462
5450
  try:
5463
5451
  doc.close()
5464
5452
  except Exception as e:
5465
- logging.warning("fitz doc close failed: %s", e)
5453
+ logging.warning("pypdfium2 doc close failed: %s", e)
5466
5454
 
5467
5455
 
5468
5456
  @app.get("/tools/download")
@@ -5918,6 +5906,24 @@ async def local_write_endpoint(req: LocalWriteRequest, request: Request):
5918
5906
  return _tool_response(local_write, req.path, req.content)
5919
5907
 
5920
5908
 
5909
+ app.include_router(create_knowledge_graph_router(
5910
+ get_graph=lambda: KNOWLEDGE_GRAPH,
5911
+ require_graph=_require_graph,
5912
+ require_user=require_user,
5913
+ static_dir=STATIC_DIR,
5914
+ ))
5915
+
5916
+ app.include_router(create_local_knowledge_router(
5917
+ get_graph=lambda: KNOWLEDGE_GRAPH,
5918
+ require_graph=_require_graph,
5919
+ require_user=require_user,
5920
+ require_local_user=_require_local_user,
5921
+ local_permission_response=_local_permission_response,
5922
+ require_local_approval=_require_local_approval,
5923
+ watcher=LOCAL_KG_WATCHER,
5924
+ ))
5925
+
5926
+
5921
5927
  @app.get("/tools/chrome_status")
5922
5928
  async def tools_chrome_status(request: Request):
5923
5929
  require_user(request)
@@ -6707,24 +6713,32 @@ async def setup_scan(request: Request):
6707
6713
  primary_model = primary_setup_model(recs)
6708
6714
  if primary_model:
6709
6715
  model_id = primary_model.get("model_id") or (primary_model.get("action") or {}).get("model_id")
6716
+ model_provider, provider_model = parse_model_ref(str(model_id))
6717
+ primary_runtime = "mlx" if model_provider == "local_mlx" else model_provider
6710
6718
  zero_config.setdefault("recommend", {})["model_id"] = model_id
6711
- zero_config["recommend"]["runtime"] = "mlx"
6719
+ zero_config["recommend"]["runtime"] = primary_runtime
6712
6720
  rationale = [
6713
6721
  item for item in zero_config["recommend"].get("rationale", [])
6714
6722
  if not (isinstance(item, str) and item.startswith("RAM ") and "→" in item)
6715
6723
  ]
6716
- rationale.append(f"실제 다운로드 및 로드 가능한 MLX 모델 → {model_id}")
6724
+ rationale.append(f"실제 다운로드 및 로드 가능한 {primary_runtime} 모델 → {model_id}")
6717
6725
  zero_config["recommend"]["rationale"] = rationale
6718
6726
  if isinstance(zero_config.get("plan"), dict):
6727
+ if model_provider == "ollama":
6728
+ command = ["ollama", "pull", provider_model]
6729
+ elif model_provider in {"vllm", "lmstudio", "llamacpp"}:
6730
+ command = ["lattice-ai", "models", "load", str(model_id)]
6731
+ else:
6732
+ command = ["huggingface-cli", "download", str(model_id), "--quiet"]
6719
6733
  zero_config["plan"]["steps"] = [{
6720
6734
  "name": f"weights:{model_id}",
6721
6735
  "why": "추론에 사용할 모델 가중치",
6722
- "command": ["huggingface-cli", "download", model_id, "--quiet"],
6736
+ "command": command,
6723
6737
  "requires_admin": False,
6724
6738
  }]
6725
6739
  if isinstance(zero_config.get("preset"), dict):
6726
6740
  zero_config["preset"].setdefault("model", {})["id"] = model_id
6727
- zero_config["preset"]["model"]["runtime"] = "mlx"
6741
+ zero_config["preset"]["model"]["runtime"] = primary_runtime
6728
6742
  env["zero_config"] = zero_config
6729
6743
  recs.setdefault("summary", {})["zero_config"] = zero_config["recommend"]
6730
6744
  recs["install_plan"] = zero_config["plan"]