ltcai 2.1.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +153 -609
  2. package/auto_setup.py +17 -17
  3. package/docs/CHANGELOG.md +83 -0
  4. package/docs/MULTI_AGENT_RUNTIME.md +4 -4
  5. package/docs/PLUGIN_SDK.md +7 -7
  6. package/docs/REALTIME_COLLABORATION.md +6 -6
  7. package/docs/V2_ARCHITECTURE.md +45 -25
  8. package/docs/WORKFLOW_DESIGNER.md +4 -4
  9. package/docs/architecture.md +127 -135
  10. package/docs/kg-schema.md +3 -3
  11. package/docs/public-deploy.md +2 -3
  12. package/docs/spec-vs-impl.md +13 -10
  13. package/knowledge_graph.py +2 -2
  14. package/latticeai/__init__.py +1 -1
  15. package/latticeai/api/models.py +8 -0
  16. package/latticeai/core/config.py +1 -1
  17. package/latticeai/core/graph_curator.py +2 -2
  18. package/latticeai/core/marketplace.py +2 -2
  19. package/latticeai/core/model_compat.py +7 -63
  20. package/latticeai/core/model_resolution.py +1 -1
  21. package/latticeai/core/multi_agent.py +1 -1
  22. package/latticeai/core/plugins.py +1 -1
  23. package/latticeai/core/realtime.py +1 -1
  24. package/latticeai/core/workflow_engine.py +1 -1
  25. package/latticeai/core/workspace_os.py +1 -1
  26. package/latticeai/server_app.py +1 -1
  27. package/latticeai/services/model_catalog.py +105 -153
  28. package/latticeai/services/model_recommendation.py +28 -17
  29. package/latticeai/services/model_runtime.py +2 -2
  30. package/llm_router.py +80 -92
  31. package/ltcai_cli.py +2 -3
  32. package/package.json +8 -3
  33. package/static/account.html +3 -1
  34. package/static/activity.html +5 -2
  35. package/static/admin.html +5 -1
  36. package/static/agents.html +5 -2
  37. package/static/chat.html +12 -10
  38. package/static/css/responsive.css +597 -0
  39. package/static/css/tokens.css +224 -165
  40. package/static/graph.html +12 -2
  41. package/static/lattice-reference.css +366 -739
  42. package/static/platform.css +45 -16
  43. package/static/plugins.html +5 -2
  44. package/static/scripts/admin.js +33 -33
  45. package/static/scripts/chat.js +109 -42
  46. package/static/scripts/graph.js +169 -11
  47. package/static/scripts/ux.js +167 -0
  48. package/static/workflows.html +5 -2
  49. package/static/workspace.css +55 -19
  50. package/static/workspace.html +5 -2
  51. package/telegram_bot.py +1 -1
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Lattice AI MLX — Local LLM Bridge Server
3
- Apple Silicon (M1-M5) 전용 | mlx-lm 기반
3
+ Apple Silicon (M1-M5) 전용 | MLX-VLM 기반
4
4
  """
5
5
 
6
6
  import asyncio
@@ -17,8 +17,8 @@ from typing import Dict, List, Optional
17
17
 
18
18
  ENGINE_INSTALLERS = {
19
19
  "local_mlx": {
20
- "command": [sys.executable, "-m", "pip", "install", "--upgrade", "mlx-lm", "mlx-vlm", "huggingface_hub[cli]"],
21
- "label": "Install MLX runtime",
20
+ "command": [sys.executable, "-m", "pip", "install", "--upgrade", "mlx-vlm", "huggingface_hub[cli]"],
21
+ "label": "Install MLX-VLM runtime",
22
22
  },
23
23
  "openai": {
24
24
  "command": [sys.executable, "-m", "pip", "install", "openai"],
@@ -61,170 +61,109 @@ ENGINE_INSTALLERS = {
61
61
  },
62
62
  }
63
63
 
64
+ def _model(
65
+ model_id: str,
66
+ name: str,
67
+ family: str,
68
+ tag: str,
69
+ size: str,
70
+ *,
71
+ source_country: str,
72
+ source_company: str,
73
+ execution_method: str,
74
+ internet_requirement: str = "모델을 다운로드할 때만 인터넷 필요; 실행 중에는 필요 없음",
75
+ pullable: bool = True,
76
+ ) -> Dict[str, object]:
77
+ clean_model_name = re.split(r"\s+via\s+", name, maxsplit=1)[0]
78
+ return {
79
+ "id": model_id,
80
+ "name": name,
81
+ "model_name": clean_model_name,
82
+ "family": family,
83
+ "tag": tag,
84
+ "size": size,
85
+ "pullable": pullable,
86
+ "modality": "multimodal",
87
+ "source_country": source_country,
88
+ "source_company": source_company,
89
+ "execution_method": execution_method,
90
+ "run_location": "내 컴퓨터에서만 실행",
91
+ "internet_requirement": internet_requirement,
92
+ "source_display_order": [
93
+ "source_country",
94
+ "source_company",
95
+ "execution_method",
96
+ "internet_requirement",
97
+ "model_name",
98
+ ],
99
+ }
100
+
101
+
102
+ _RUNS_ON_THIS_COMPUTER = "내 컴퓨터에서만 실행"
103
+
104
+
64
105
  ENGINE_MODEL_CATALOG = {
65
106
  "local_mlx": [
66
- {"id": "mlx-community/SmolLM-1.7B-Instruct-4bit", "name": "SmolLM 1.7B", "family": "SmolLM", "tag": "local-light", "size": "963MB", "pullable": True},
67
- {"id": "mlx-community/gemma-3-1b-it-4bit", "name": "Gemma 3 1B", "family": "Gemma 3", "tag": "local-light", "size": "733MB", "pullable": True},
68
- {"id": "mlx-community/Llama-3.2-1B-Instruct-4bit", "name": "Llama 3.2 1B", "family": "Llama 3.x", "tag": "local-light", "size": "1.3GB", "pullable": True},
69
- {"id": "mlx-community/gemma-2-2b-it-4bit", "name": "Gemma 2 2B", "family": "Gemma 2", "tag": "local-light", "size": "1.6GB", "pullable": True},
70
- {"id": "mlx-community/gemma-4-e2b-4bit", "name": "Gemma 4 E2B Base", "family": "Gemma 4", "tag": "local-vlm", "size": "3.6GB", "pullable": True},
71
- {"id": "mlx-community/gemma-4-e2b-it-4bit", "name": "Gemma 4 E2B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "3.6GB", "pullable": True},
72
- {"id": "mlx-community/gemma-4-e4b-4bit", "name": "Gemma 4 E4B Base", "family": "Gemma 4", "tag": "local-vlm", "size": "5.2GB", "pullable": True},
73
- {"id": "mlx-community/gemma-4-e4b-it-4bit", "name": "Gemma 4 E4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "5.2GB", "pullable": True},
74
- {"id": "mlx-community/Qwen3-VL-4B-Instruct-4bit", "name": "Qwen3-VL 4B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "2.7GB", "pullable": True},
75
- {"id": "mlx-community/Qwen3-VL-8B-Instruct-4bit", "name": "Qwen3-VL 8B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "4.8GB", "pullable": True},
76
- {"id": "mlx-community/Qwen2.5-VL-7B-Instruct-4bit", "name": "Qwen2.5-VL 7B", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "4.4GB", "pullable": True},
77
- {"id": "mlx-community/gemma-3-4b-it-4bit", "name": "Gemma 3 4B", "family": "Gemma 3", "tag": "local-vlm", "size": "3.3GB", "pullable": True},
78
- {"id": "mlx-community/Llama-3.2-3B-Instruct-4bit", "name": "Llama 3.2 3B", "family": "Llama 3.x", "tag": "local-general", "size": "2.0GB", "pullable": True},
79
- {"id": "mlx-community/Llama-3.1-8B-Instruct-4bit", "name": "Llama 3.1 8B", "family": "Llama 3.1", "tag": "local-general", "size": "4.7GB", "pullable": True},
80
- {"id": "mlx-community/gemma-2-9b-it-4bit", "name": "Gemma 2 9B", "family": "Gemma 2", "tag": "local-general", "size": "5.4GB", "pullable": True},
81
- {"id": "mlx-community/gemma-3-12b-it-4bit", "name": "Gemma 3 12B", "family": "Gemma 3", "tag": "local-vlm", "size": "8.0GB", "pullable": True},
82
- {"id": "mlx-community/Phi-3.5-mini-instruct-4bit", "name": "Phi 3.5 Mini", "family": "Phi", "tag": "local-coding", "size": "2.2GB", "pullable": True},
83
- {"id": "mlx-community/Phi-4-mini-instruct-4bit", "name": "Phi 4 Mini", "family": "Phi", "tag": "local-coding", "size": "2.2GB", "pullable": True},
84
- {"id": "mlx-community/phi-4-4bit", "name": "Phi 4", "family": "Phi", "tag": "local-coding", "size": "8.3GB", "pullable": True},
85
- {"id": "mlx-community/Mistral-7B-Instruct-v0.3-4bit", "name": "Mistral 7B Instruct v0.3", "family": "Mistral", "tag": "local-general", "size": "4.1GB", "pullable": True},
86
- {"id": "mlx-community/Ministral-8B-Instruct-2410-4bit", "name": "Ministral 8B Instruct", "family": "Mistral", "tag": "local-general", "size": "4.5GB", "pullable": True},
87
- {"id": "mlx-community/Mistral-Small-24B-Instruct-2501-4bit", "name": "Mistral Small 24B", "family": "Mistral", "tag": "local-large", "size": "13.3GB", "pullable": True},
88
- {"id": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", "name": "Qwen2.5 Coder 32B", "family": "Qwen2.5", "tag": "local-coding", "size": "18.5GB", "pullable": True},
89
- {"id": "mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit", "name": "Qwen3-VL 30B A3B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "18GB", "pullable": True},
90
- {"id": "mlx-community/gemma-3-27b-it-4bit", "name": "Gemma 3 27B", "family": "Gemma 3", "tag": "local-vlm", "size": "17GB", "pullable": True},
91
- {"id": "mlx-community/gemma-4-26b-a4b-it-4bit", "name": "Gemma 4 26B A4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "15.6GB", "pullable": True},
92
- {"id": "mlx-community/gemma-4-31b-it-4bit", "name": "Gemma 4 31B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "18.4GB", "pullable": True},
93
- {"id": "mlx-community/gpt-oss-20b-MXFP4-Q8", "name": "GPT-OSS 20B", "family": "GPT-OSS", "tag": "local-reasoning", "size": "12.1GB", "pullable": True},
94
- {"id": "mlx-community/gpt-oss-120b-MXFP4-Q4", "name": "GPT-OSS 120B", "family": "GPT-OSS", "tag": "local-large", "size": "62.3GB", "pullable": True},
95
- {"id": "mlx-community/Llama-3.3-70B-Instruct-4bit", "name": "Llama 3.3 70B", "family": "Llama 3.x", "tag": "local-general", "size": "40GB+", "pullable": True},
96
- {"id": "mlx-community/Llama-3.1-70B-Instruct-4bit", "name": "Llama 3.1 70B", "family": "Llama 3.1", "tag": "local-general", "size": "40GB+", "pullable": True},
107
+ _model("mlx-community/gemma-4-e2b-4bit", "Gemma 4 E2B Base", "Gemma 4", "local-vlm", "3.6GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
108
+ _model("mlx-community/gemma-4-e2b-it-4bit", "Gemma 4 E2B Instruct", "Gemma 4", "local-vlm", "3.6GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
109
+ _model("mlx-community/gemma-4-e4b-4bit", "Gemma 4 E4B Base", "Gemma 4", "local-vlm", "5.2GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
110
+ _model("mlx-community/gemma-4-e4b-it-4bit", "Gemma 4 E4B Instruct", "Gemma 4", "local-vlm", "5.2GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
111
+ _model("mlx-community/gemma-4-12b-it-4bit", "Gemma 4 12B Instruct", "Gemma 4", "local-vlm", "7.6GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
112
+ _model("mlx-community/gemma-4-26b-a4b-it-4bit", "Gemma 4 26B A4B Instruct", "Gemma 4", "local-vlm", "15.6GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
113
+ _model("mlx-community/gemma-4-31b-it-4bit", "Gemma 4 31B Instruct", "Gemma 4", "local-vlm", "18.4GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
114
+ _model("mlx-community/Qwen3-VL-4B-Instruct-4bit", "Qwen3-VL 4B", "Qwen3-VL", "local-vlm", "2.7GB", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
115
+ _model("mlx-community/Qwen3-VL-8B-Instruct-4bit", "Qwen3-VL 8B", "Qwen3-VL", "local-vlm", "4.8GB", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
116
+ _model("mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit", "Qwen3-VL 30B A3B", "Qwen3-VL", "local-vlm", "18GB", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
117
+ _model("mlx-community/Llama-4-Scout-17B-16E-Instruct-4bit", "Llama 4 Scout 17B 16E", "Llama 4", "local-vlm", "11.8GB", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
97
118
  ],
98
119
  "ollama": [
99
- {"id": "ollama:qwen3-vl:4b", "name": "Qwen3-VL 4B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
100
- {"id": "ollama:qwen3-vl:8b", "name": "Qwen3-VL 8B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
101
- {"id": "ollama:qwen3-vl:30b", "name": "Qwen3-VL 30B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
102
- {"id": "ollama:gpt-oss:20b", "name": "GPT-OSS 20B via Ollama", "family": "GPT-OSS", "tag": "local-reasoning", "size": "pull required", "pullable": True},
103
- {"id": "ollama:gpt-oss:120b", "name": "GPT-OSS 120B via Ollama", "family": "GPT-OSS", "tag": "local-large", "size": "pull required", "pullable": True},
104
- {"id": "ollama:hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M", "name": "Gemma 4 31B Q4 via Ollama", "family": "Gemma 4", "tag": "local-vlm", "size": "18.7GB", "pullable": True},
105
- {"id": "ollama:qwen3:8b", "name": "Qwen3 8B via Ollama", "family": "Qwen", "tag": "local-server", "size": "pull required", "pullable": True},
106
- {"id": "ollama:qwen2.5-coder:14b", "name": "Qwen2.5 Coder 14B via Ollama", "family": "Qwen", "tag": "local-coding", "size": "pull required", "pullable": True},
107
- {"id": "ollama:gemma3:1b", "name": "Gemma 3 1B via Ollama", "family": "Gemma", "tag": "local-light", "size": "pull required", "pullable": True},
108
- {"id": "ollama:gemma3:4b", "name": "Gemma 3 4B via Ollama", "family": "Gemma", "tag": "local-server", "size": "pull required", "pullable": True},
109
- {"id": "ollama:gemma3:4b-it-q4_K_M", "name": "Gemma 3 4B q4_K_M via Ollama", "family": "Gemma", "tag": "quantized", "size": "pull required", "pullable": True},
110
- {"id": "ollama:gemma3:12b", "name": "Gemma 3 12B via Ollama", "family": "Gemma", "tag": "local-server", "size": "pull required", "pullable": True},
111
- {"id": "ollama:gemma3:12b-it-q4_K_M", "name": "Gemma 3 12B q4_K_M via Ollama", "family": "Gemma", "tag": "quantized", "size": "pull required", "pullable": True},
112
- {"id": "ollama:gemma3:27b", "name": "Gemma 3 27B via Ollama", "family": "Gemma", "tag": "local-large", "size": "pull required", "pullable": True},
113
- {"id": "ollama:llama3.2:1b", "name": "Llama 3.2 1B via Ollama", "family": "Llama 3.x", "tag": "local-light", "size": "pull required", "pullable": True},
114
- {"id": "ollama:llama3.2:3b", "name": "Llama 3.2 3B via Ollama", "family": "Llama 3.x", "tag": "local-server", "size": "pull required", "pullable": True},
115
- {"id": "ollama:llama3.1:8b", "name": "Llama 3.1 8B via Ollama", "family": "Llama 3.1", "tag": "local-server", "size": "pull required", "pullable": True},
116
- {"id": "ollama:llama3.1:8b-instruct-q4_0", "name": "Llama 3.1 8B q4_0 via Ollama", "family": "Llama 3.1", "tag": "quantized", "size": "pull required", "pullable": True},
117
- {"id": "ollama:llama3.1:8b-instruct-q8_0", "name": "Llama 3.1 8B q8_0 via Ollama", "family": "Llama 3.1", "tag": "quantized", "size": "pull required", "pullable": True},
118
- {"id": "ollama:llama3.1:70b", "name": "Llama 3.1 70B via Ollama", "family": "Llama 3.1", "tag": "local-server", "size": "pull required", "pullable": True},
119
- {"id": "ollama:llama3.3:70b", "name": "Llama 3.3 70B via Ollama", "family": "Llama 3.x", "tag": "local-large", "size": "pull required", "pullable": True},
120
- {"id": "ollama:mistral:7b", "name": "Mistral 7B via Ollama", "family": "Mistral", "tag": "local-server", "size": "pull required", "pullable": True},
121
- {"id": "ollama:mixtral:8x7b", "name": "Mixtral 8x7B via Ollama", "family": "Mistral", "tag": "local-large", "size": "pull required", "pullable": True},
122
- {"id": "ollama:phi4-mini", "name": "Phi 4 Mini via Ollama", "family": "Phi", "tag": "local-coding", "size": "pull required", "pullable": True},
123
- {"id": "ollama:phi4", "name": "Phi 4 via Ollama", "family": "Phi", "tag": "local-coding", "size": "pull required", "pullable": True},
124
- {"id": "ollama:smollm2:1.7b", "name": "SmolLM2 1.7B via Ollama", "family": "SmolLM", "tag": "local-light", "size": "pull required", "pullable": True},
125
- {"id": "ollama:deepseek-r1:1.5b", "name": "DeepSeek-R1 1.5B via Ollama", "family": "DeepSeek", "tag": "local-light", "size": "pull required", "pullable": True},
126
- {"id": "ollama:deepseek-r1:7b", "name": "DeepSeek-R1 7B via Ollama", "family": "DeepSeek", "tag": "local-reasoning", "size": "pull required", "pullable": True},
127
- {"id": "ollama:deepseek-r1:8b", "name": "DeepSeek-R1 8B via Ollama", "family": "DeepSeek", "tag": "local-reasoning", "size": "pull required", "pullable": True},
128
- {"id": "ollama:deepseek-r1:14b", "name": "DeepSeek-R1 14B via Ollama", "family": "DeepSeek", "tag": "local-reasoning", "size": "pull required", "pullable": True},
129
- {"id": "ollama:deepseek-r1:32b", "name": "DeepSeek-R1 32B via Ollama", "family": "DeepSeek", "tag": "local-large", "size": "pull required", "pullable": True},
130
- {"id": "ollama:deepseek-coder-v2:16b", "name": "DeepSeek-Coder-V2 16B via Ollama", "family": "DeepSeek", "tag": "local-coding", "size": "pull required", "pullable": True},
120
+ _model("ollama:qwen3-vl:4b", "Qwen3-VL 4B via Ollama", "Qwen3-VL", "local-vlm", "pull required", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
121
+ _model("ollama:qwen3-vl:8b", "Qwen3-VL 8B via Ollama", "Qwen3-VL", "local-vlm", "pull required", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
122
+ _model("ollama:qwen3-vl:30b", "Qwen3-VL 30B via Ollama", "Qwen3-VL", "local-vlm", "pull required", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
123
+ _model("ollama:hf.co/ggml-org/gemma-4-12B-it-GGUF:Q4_K_M", "Gemma 4 12B Q4 via Ollama", "Gemma 4", "local-vlm", "7.9GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
124
+ _model("ollama:hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M", "Gemma 4 31B Q4 via Ollama", "Gemma 4", "local-vlm", "18.7GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
125
+ _model("ollama:hf.co/ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_M", "Llama 4 Scout Q4 via Ollama", "Llama 4", "local-vlm", "12GB", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
131
126
  ],
132
127
  "vllm": [
133
- {"id": "vllm:openai/gpt-oss-20b", "name": "GPT-OSS 20B via vLLM", "family": "GPT-OSS", "tag": "local-reasoning", "size": "server model", "pullable": True},
134
- {"id": "vllm:openai/gpt-oss-120b", "name": "GPT-OSS 120B via vLLM", "family": "GPT-OSS", "tag": "local-large", "size": "server model", "pullable": True},
135
- {"id": "vllm:Qwen/Qwen3-VL-4B-Instruct", "name": "Qwen3-VL 4B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
136
- {"id": "vllm:Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen3-VL 8B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
137
- {"id": "vllm:Qwen/Qwen3-VL-30B-A3B-Instruct", "name": "Qwen3-VL 30B A3B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
138
- {"id": "vllm:Qwen/Qwen2.5-VL-7B-Instruct", "name": "Qwen2.5-VL 7B via vLLM", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
139
- {"id": "vllm:google/gemma-2-2b", "name": "Gemma 2 2B Base via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
140
- {"id": "vllm:google/gemma-2-2b-it", "name": "Gemma 2 2B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
141
- {"id": "vllm:google/gemma-2-9b", "name": "Gemma 2 9B Base via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
142
- {"id": "vllm:google/gemma-2-9b-it", "name": "Gemma 2 9B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
143
- {"id": "vllm:google/gemma-3-4b-it", "name": "Gemma 3 4B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
144
- {"id": "vllm:google/gemma-3-12b-it", "name": "Gemma 3 12B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
145
- {"id": "vllm:microsoft/Phi-3.5-mini-instruct", "name": "Phi 3.5 Mini via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
146
- {"id": "vllm:microsoft/Phi-4-mini-instruct", "name": "Phi 4 Mini via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
147
- {"id": "vllm:microsoft/phi-4", "name": "Phi 4 via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
148
- {"id": "vllm:mistralai/Mistral-7B-Instruct-v0.3", "name": "Mistral 7B via vLLM", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
149
- {"id": "vllm:mistralai/Ministral-8B-Instruct-2410", "name": "Ministral 8B via vLLM", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
150
- {"id": "vllm:mistralai/Mistral-Small-24B-Instruct-2501", "name": "Mistral Small 24B via vLLM", "family": "Mistral", "tag": "local-large", "size": "server model", "pullable": True},
151
- {"id": "vllm:meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B via vLLM", "family": "Llama 3.x", "tag": "local-server", "size": "server model", "pullable": True},
152
- {"id": "vllm:meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B via vLLM", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
153
- {"id": "vllm:meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B via vLLM", "family": "Llama 3.x", "tag": "local-large", "size": "server model", "pullable": True},
154
- {"id": "vllm:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B via vLLM", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
128
+ _model("vllm:Qwen/Qwen3-VL-4B-Instruct", "Qwen3-VL 4B via vLLM", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
129
+ _model("vllm:Qwen/Qwen3-VL-8B-Instruct", "Qwen3-VL 8B via vLLM", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
130
+ _model("vllm:Qwen/Qwen3-VL-30B-A3B-Instruct", "Qwen3-VL 30B A3B via vLLM", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
131
+ _model("vllm:google/gemma-4-12b-it", "Gemma 4 12B via vLLM", "Gemma 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
132
+ _model("vllm:suitch/gemma-4-31B-it-4bit", "Gemma 4 31B via vLLM", "Gemma 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
133
+ _model("vllm:meta-llama/Llama-4-Scout-17B-16E-Instruct", "Llama 4 Scout via vLLM", "Llama 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
155
134
  ],
156
135
  "lmstudio": [
157
- {"id": "lmstudio:openai/gpt-oss-20b", "name": "GPT-OSS 20B via LM Studio", "family": "GPT-OSS", "tag": "local-reasoning", "size": "server model", "pullable": True},
158
- {"id": "lmstudio:openai/gpt-oss-120b", "name": "GPT-OSS 120B via LM Studio", "family": "GPT-OSS", "tag": "local-large", "size": "server model", "pullable": True},
159
- {"id": "lmstudio:ggml-org/gemma-4-31B-it-GGUF", "name": "Gemma 4 31B 4-bit via LM Studio", "family": "Gemma 4", "tag": "local-vlm", "size": "server model", "pullable": True},
160
- {"id": "lmstudio:Qwen/Qwen3-VL-4B-Instruct", "name": "Qwen3-VL 4B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
161
- {"id": "lmstudio:Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen3-VL 8B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
162
- {"id": "lmstudio:Qwen/Qwen3-VL-30B-A3B-Instruct", "name": "Qwen3-VL 30B A3B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
163
- {"id": "lmstudio:Qwen/Qwen2.5-VL-7B-Instruct", "name": "Qwen2.5-VL 7B via LM Studio", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
164
- {"id": "lmstudio:google/gemma-2-2b-it", "name": "Gemma 2 2B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
165
- {"id": "lmstudio:google/gemma-2-9b-it", "name": "Gemma 2 9B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
166
- {"id": "lmstudio:google/gemma-3-4b-it", "name": "Gemma 3 4B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
167
- {"id": "lmstudio:google/gemma-3-12b-it", "name": "Gemma 3 12B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
168
- {"id": "lmstudio:microsoft/Phi-3.5-mini-instruct", "name": "Phi 3.5 Mini via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
169
- {"id": "lmstudio:microsoft/Phi-4-mini-instruct", "name": "Phi 4 Mini via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
170
- {"id": "lmstudio:microsoft/phi-4", "name": "Phi 4 via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
171
- {"id": "lmstudio:mistralai/Mistral-7B-Instruct-v0.3", "name": "Mistral 7B via LM Studio", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
172
- {"id": "lmstudio:mistralai/Ministral-8B-Instruct-2410", "name": "Ministral 8B via LM Studio", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
173
- {"id": "lmstudio:mistralai/Mistral-Small-24B-Instruct-2501", "name": "Mistral Small 24B via LM Studio", "family": "Mistral", "tag": "local-large", "size": "server model", "pullable": True},
174
- {"id": "lmstudio:meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B via LM Studio", "family": "Llama 3.x", "tag": "local-server", "size": "server model", "pullable": True},
175
- {"id": "lmstudio:meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B via LM Studio", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
176
- {"id": "lmstudio:meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B via LM Studio", "family": "Llama 3.x", "tag": "local-large", "size": "server model", "pullable": True},
177
- {"id": "lmstudio:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B via LM Studio", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
136
+ _model("lmstudio:Qwen/Qwen3-VL-4B-Instruct", "Qwen3-VL 4B via LM Studio", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
137
+ _model("lmstudio:Qwen/Qwen3-VL-8B-Instruct", "Qwen3-VL 8B via LM Studio", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
138
+ _model("lmstudio:Qwen/Qwen3-VL-30B-A3B-Instruct", "Qwen3-VL 30B A3B via LM Studio", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
139
+ _model("lmstudio:ggml-org/gemma-4-12B-it-GGUF", "Gemma 4 12B 4-bit via LM Studio", "Gemma 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
140
+ _model("lmstudio:ggml-org/gemma-4-31B-it-GGUF", "Gemma 4 31B 4-bit via LM Studio", "Gemma 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
141
+ _model("lmstudio:meta-llama/Llama-4-Scout-17B-16E-Instruct", "Llama 4 Scout via LM Studio", "Llama 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
178
142
  ],
179
143
  "llamacpp": [
180
- {"id": "llamacpp:ggml-org/gpt-oss-20b-GGUF", "name": "GPT-OSS 20B GGUF via llama.cpp", "family": "GPT-OSS", "tag": "gguf-q4", "size": "gguf", "pullable": True},
181
- {"id": "llamacpp:ggml-org/gpt-oss-120b-GGUF", "name": "GPT-OSS 120B GGUF via llama.cpp", "family": "GPT-OSS", "tag": "gguf-q4", "size": "gguf", "pullable": True},
182
- {"id": "llamacpp:ggml-org/gemma-4-31B-it-GGUF", "name": "Gemma 4 31B GGUF via llama.cpp", "family": "Gemma 4", "tag": "gguf-q4", "size": "gguf", "pullable": True},
183
- {"id": "llamacpp:Qwen/Qwen3-VL-4B-Instruct-GGUF", "name": "Qwen3-VL 4B GGUF via llama.cpp", "family": "Qwen3-VL", "tag": "gguf-vlm", "size": "gguf", "pullable": True},
184
- {"id": "llamacpp:Qwen/Qwen3-VL-8B-Instruct-GGUF", "name": "Qwen3-VL 8B GGUF via llama.cpp", "family": "Qwen3-VL", "tag": "gguf-vlm", "size": "gguf", "pullable": True},
185
- {"id": "llamacpp:unsloth/gemma-2-2b-it-GGUF", "name": "Gemma 2 2B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
186
- {"id": "llamacpp:unsloth/gemma-2-9b-it-GGUF", "name": "Gemma 2 9B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
187
- {"id": "llamacpp:unsloth/gemma-3-4b-it-GGUF", "name": "Gemma 3 4B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
188
- {"id": "llamacpp:bartowski/Mistral-7B-Instruct-v0.3-GGUF", "name": "Mistral 7B GGUF via llama.cpp", "family": "Mistral", "tag": "gguf-q4", "size": "gguf", "pullable": True},
189
- {"id": "llamacpp:bartowski/Phi-3.5-mini-instruct-GGUF", "name": "Phi 3.5 Mini GGUF via llama.cpp", "family": "Phi", "tag": "gguf-q4", "size": "gguf", "pullable": True},
190
- {"id": "llamacpp:bartowski/phi-4-GGUF", "name": "Phi 4 GGUF via llama.cpp", "family": "Phi", "tag": "gguf-q4", "size": "gguf", "pullable": True},
191
- {"id": "llamacpp:bartowski/Llama-3.2-3B-Instruct-GGUF", "name": "Llama 3.2 3B GGUF via llama.cpp", "family": "Llama 3.x", "tag": "gguf-q4", "size": "gguf", "pullable": True},
192
- {"id": "llamacpp:bartowski/Llama-3.1-8B-Instruct-GGUF", "name": "Llama 3.1 8B GGUF via llama.cpp", "family": "Llama 3.1", "tag": "local-server", "size": "gguf", "pullable": True},
193
- {"id": "llamacpp:bartowski/Llama-3.3-70B-Instruct-GGUF", "name": "Llama 3.3 70B GGUF via llama.cpp", "family": "Llama 3.x", "tag": "local-large", "size": "gguf", "pullable": True},
194
- {"id": "llamacpp:bartowski/Llama-3.1-70B-Instruct-GGUF", "name": "Llama 3.1 70B GGUF via llama.cpp", "family": "Llama 3.1", "tag": "local-server", "size": "gguf", "pullable": True},
195
- {"id": "llamacpp:unsloth/DeepSeek-R1-GGUF", "name": "DeepSeek-R1 GGUF via llama.cpp", "family": "DeepSeek", "tag": "gguf-q4", "size": "gguf", "pullable": True},
196
- {"id": "llamacpp:bartowski/DeepSeek-Coder-V2-Lite-Instruct-GGUF", "name": "DeepSeek-Coder-V2 Lite GGUF via llama.cpp", "family": "DeepSeek", "tag": "gguf-q4", "size": "gguf", "pullable": True},
144
+ _model("llamacpp:Qwen/Qwen3-VL-4B-Instruct-GGUF", "Qwen3-VL 4B GGUF via llama.cpp", "Qwen3-VL", "gguf-vlm", "gguf", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
145
+ _model("llamacpp:Qwen/Qwen3-VL-8B-Instruct-GGUF", "Qwen3-VL 8B GGUF via llama.cpp", "Qwen3-VL", "gguf-vlm", "gguf", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
146
+ _model("llamacpp:Qwen/Qwen3-VL-30B-A3B-Instruct-GGUF", "Qwen3-VL 30B GGUF via llama.cpp", "Qwen3-VL", "gguf-vlm", "gguf", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
147
+ _model("llamacpp:ggml-org/gemma-4-12B-it-GGUF", "Gemma 4 12B GGUF via llama.cpp", "Gemma 4", "gguf-vlm", "gguf", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
148
+ _model("llamacpp:ggml-org/gemma-4-31B-it-GGUF", "Gemma 4 31B GGUF via llama.cpp", "Gemma 4", "gguf-vlm", "gguf", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
149
+ _model("llamacpp:ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF", "Llama 4 Scout GGUF via llama.cpp", "Llama 4", "gguf-vlm", "gguf", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
197
150
  ],
198
151
  }
199
152
 
200
153
  MODEL_ENGINE_ALIASES = {
201
- "gpt-oss-20b": {
202
- "local_mlx": "mlx-community/gpt-oss-20b-MXFP4-Q8",
203
- "ollama": "gpt-oss:20b",
204
- "vllm": "openai/gpt-oss-20b",
205
- "lmstudio": "openai/gpt-oss-20b",
206
- "llamacpp": "ggml-org/gpt-oss-20b-GGUF",
207
- },
208
- "openai/gpt-oss-20b": {
209
- "local_mlx": "mlx-community/gpt-oss-20b-MXFP4-Q8",
210
- "ollama": "gpt-oss:20b",
211
- "vllm": "openai/gpt-oss-20b",
212
- "lmstudio": "openai/gpt-oss-20b",
213
- "llamacpp": "ggml-org/gpt-oss-20b-GGUF",
214
- },
215
- "gpt-oss-120b": {
216
- "local_mlx": "mlx-community/gpt-oss-120b-MXFP4-Q4",
217
- "ollama": "gpt-oss:120b",
218
- "vllm": "openai/gpt-oss-120b",
219
- "lmstudio": "openai/gpt-oss-120b",
220
- "llamacpp": "ggml-org/gpt-oss-120b-GGUF",
221
- },
222
- "openai/gpt-oss-120b": {
223
- "local_mlx": "mlx-community/gpt-oss-120b-MXFP4-Q4",
224
- "ollama": "gpt-oss:120b",
225
- "vllm": "openai/gpt-oss-120b",
226
- "lmstudio": "openai/gpt-oss-120b",
227
- "llamacpp": "ggml-org/gpt-oss-120b-GGUF",
154
+ "gemma-4-12b-it-4bit": {
155
+ "local_mlx": "mlx-community/gemma-4-12b-it-4bit",
156
+ "ollama": "hf.co/ggml-org/gemma-4-12B-it-GGUF:Q4_K_M",
157
+ "vllm": "google/gemma-4-12b-it",
158
+ "lmstudio": "ggml-org/gemma-4-12B-it-GGUF",
159
+ "llamacpp": "ggml-org/gemma-4-12B-it-GGUF",
160
+ },
161
+ "mlx-community/gemma-4-12b-it-4bit": {
162
+ "local_mlx": "mlx-community/gemma-4-12b-it-4bit",
163
+ "ollama": "hf.co/ggml-org/gemma-4-12B-it-GGUF:Q4_K_M",
164
+ "vllm": "google/gemma-4-12b-it",
165
+ "lmstudio": "ggml-org/gemma-4-12B-it-GGUF",
166
+ "llamacpp": "ggml-org/gemma-4-12B-it-GGUF",
228
167
  },
229
168
  "gemma-4-31b-it-4bit": {
230
169
  "local_mlx": "mlx-community/gemma-4-31b-it-4bit",
@@ -247,13 +186,26 @@ MODEL_ENGINE_ALIASES = {
247
186
  "lmstudio": "ggml-org/gemma-4-31B-it-GGUF",
248
187
  "llamacpp": "ggml-org/gemma-4-31B-it-GGUF",
249
188
  },
189
+ "qwen3-vl-8b": {
190
+ "local_mlx": "mlx-community/Qwen3-VL-8B-Instruct-4bit",
191
+ "ollama": "qwen3-vl:8b",
192
+ "vllm": "Qwen/Qwen3-VL-8B-Instruct",
193
+ "lmstudio": "Qwen/Qwen3-VL-8B-Instruct",
194
+ "llamacpp": "Qwen/Qwen3-VL-8B-Instruct-GGUF",
195
+ },
196
+ "llama-4-scout": {
197
+ "local_mlx": "mlx-community/Llama-4-Scout-17B-16E-Instruct-4bit",
198
+ "ollama": "hf.co/ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_M",
199
+ "vllm": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
200
+ "lmstudio": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
201
+ "llamacpp": "ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF",
202
+ },
250
203
  }
251
204
 
252
205
  _VERSIONED_MODEL_PATTERNS = (
253
206
  ("gemma", re.compile(r"\bgemma[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
254
207
  ("qwen", re.compile(r"\bqwen[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
255
208
  ("llama", re.compile(r"\bllama[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
256
- ("phi", re.compile(r"\bphi[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
257
209
  )
258
210
 
259
211
 
@@ -3,7 +3,8 @@
3
3
  Given a detected system profile (from :func:`auto_setup.probe`) this module
4
4
  classifies every model in :data:`model_catalog.ENGINE_MODEL_CATALOG` into one of
5
5
  three states — **recommended**, **compatible**, or **not_recommended** — and
6
- groups the result by model family (Gemma, Qwen, Llama, Phi, DeepSeek, …).
6
+ groups the result by current multimodal model family (Gemma 4, Qwen3-VL,
7
+ Llama 4).
7
8
 
8
9
  It is intentionally pure and dependency-light: the only input is a plain dict
9
10
  describing the machine, so it is fully unit-testable without touching real
@@ -28,12 +29,11 @@ NOT_RECOMMENDED = "not_recommended"
28
29
  # Apple-Silicon only. Used to decide platform availability before sizing.
29
30
  _APPLE_ONLY_ENGINES = {"local_mlx"}
30
31
 
31
- # Family display order for the grouped view (best/newest first within a brand).
32
+ # Family display order for the grouped view (newest multimodal generations first).
32
33
  _FAMILY_ORDER = [
33
- "Gemma 4", "Gemma 3", "Gemma 2", "Gemma",
34
- "Qwen3-VL", "Qwen2.5-VL", "Qwen2.5", "Qwen",
35
- "Llama 3.x", "Llama 3.1", "Llama",
36
- "Mistral", "Phi", "GPT-OSS", "DeepSeek", "SmolLM",
34
+ "Gemma 4",
35
+ "Qwen3-VL",
36
+ "Llama 4",
37
37
  ]
38
38
 
39
39
  _SIZE_RE = re.compile(r"([\d.]+)\s*(TB|GB|MB)", re.IGNORECASE)
@@ -44,7 +44,7 @@ def parse_size_gb(size: Any) -> Optional[float]:
44
44
  """Parse a catalog ``size`` string (``"4.7GB"``, ``"963MB"``, ``"40GB+"``).
45
45
 
46
46
  Returns ``None`` when the size is non-numeric (e.g. ``"pull required"`` or
47
- ``"server model"``) so callers can treat it as "size unknown".
47
+ ``"실행 도구에서 관리"``) so callers can treat it as "size unknown".
48
48
  """
49
49
  if not isinstance(size, str):
50
50
  return None
@@ -92,30 +92,38 @@ def _classify_one(
92
92
  need_gb = estimated_ram_gb(size_gb) if size_gb is not None else None
93
93
 
94
94
  if not engine_available:
95
- status, reason = NOT_RECOMMENDED, "Requires Apple Silicon (MLX runtime)"
95
+ status, reason = NOT_RECOMMENDED, "Apple Silicon MLX-VLM이 필요합니다"
96
96
  elif need_gb is None:
97
- # Server/pull models have no fixed on-disk size treat as compatible
98
- # (the engine streams/pulls weights on demand).
99
- status, reason = COMPATIBLE, "Served/pulled on demand by the engine"
97
+ # Tool-managed/pull models have no fixed on-disk size, so treat them as
98
+ # compatible and let the execution tool validate the exact model.
99
+ status, reason = COMPATIBLE, "선택한 실행 방식에서 필요할 모델을 받습니다"
100
100
  elif ram_gb <= 0:
101
- status, reason = COMPATIBLE, "Memory unknown verify before loading"
102
- elif need_gb <= ram_gb * 0.6:
103
- status, reason = RECOMMENDED, f"Fits comfortably (~{need_gb:.0f} GB of {ram_gb:.0f} GB RAM)"
101
+ status, reason = COMPATIBLE, "메모리 정보를 확인하지 못했습니다. 불러오기 전에 검증합니다"
102
+ elif need_gb <= ram_gb * 0.75:
103
+ status, reason = RECOMMENDED, f"현재 메모리에서 안정적으로 사용할 가능성이 높습니다 (~{need_gb:.0f} GB / {ram_gb:.0f} GB)"
104
104
  elif need_gb <= ram_gb * 0.9:
105
- status, reason = COMPATIBLE, f"Runs but tight (~{need_gb:.0f} GB of {ram_gb:.0f} GB RAM)"
105
+ status, reason = COMPATIBLE, f"사용 가능하지만 여유가 적습니다 (~{need_gb:.0f} GB / {ram_gb:.0f} GB)"
106
106
  else:
107
- status, reason = NOT_RECOMMENDED, f"Needs ~{need_gb:.0f} GB RAM (have {ram_gb:.0f} GB)"
107
+ status, reason = NOT_RECOMMENDED, f"권장 메모리가 부족합니다 (~{need_gb:.0f} GB 필요, 현재 {ram_gb:.0f} GB)"
108
108
 
109
109
  return {
110
110
  "id": model.get("id"),
111
111
  "name": model.get("name"),
112
+ "model_name": model.get("model_name") or model.get("name"),
112
113
  "family": model.get("family"),
113
114
  "tag": model.get("tag"),
115
+ "modality": model.get("modality") or "multimodal",
114
116
  "size": model.get("size"),
115
117
  "size_gb": size_gb,
116
118
  "required_ram_gb": need_gb,
117
119
  "status": status,
118
120
  "reason": reason,
121
+ "source_country": model.get("source_country"),
122
+ "source_company": model.get("source_company"),
123
+ "execution_method": model.get("execution_method"),
124
+ "run_location": model.get("run_location"),
125
+ "internet_requirement": model.get("internet_requirement"),
126
+ "source_display_order": model.get("source_display_order"),
119
127
  }
120
128
 
121
129
 
@@ -132,7 +140,10 @@ def recommend_catalog(profile: Dict[str, Any], *, engine: str = "local_mlx") ->
132
140
  ``profile`` is a dict shaped like ``auto_setup.SystemProfile.to_json()``
133
141
  (``os``, ``arch``, ``ram_mb``, ``gpu={vendor,vram_mb}`` …).
134
142
  """
135
- models = ENGINE_MODEL_CATALOG.get(engine, [])
143
+ models = [
144
+ model for model in ENGINE_MODEL_CATALOG.get(engine, [])
145
+ if str(model.get("modality") or "").lower() == "multimodal"
146
+ ]
136
147
  engine_available = _engine_available(engine, profile)
137
148
  ram_gb = _ram_gb(profile)
138
149
 
@@ -64,7 +64,7 @@ INVITE_GATE_ENABLED = False
64
64
  ALLOW_PLAINTEXT_API_KEYS = False
65
65
  CORS_ALLOW_NETWORK = False
66
66
  PUBLIC_MODEL = "openai:gpt-4o-mini"
67
- LOCAL_MODEL = "mlx-community/SmolLM-1.7B-Instruct-4bit"
67
+ LOCAL_MODEL = "mlx-community/gemma-4-12b-it-4bit"
68
68
  IS_PUBLIC_MODE = False
69
69
  keyring = None
70
70
 
@@ -889,7 +889,7 @@ def ensure_llamacpp_server(model_name: str) -> None:
889
889
 
890
890
  def engine_installed(engine: str) -> bool:
891
891
  if engine == "local_mlx":
892
- return bool(importlib.util.find_spec("mlx") and importlib.util.find_spec("mlx_lm"))
892
+ return bool(importlib.util.find_spec("mlx") and importlib.util.find_spec("mlx_vlm"))
893
893
  if engine == "ollama":
894
894
  return local_binary("ollama") is not None
895
895
  if engine == "vllm":