npm - ltcai - Versions diffs - 2.0.0 → 2.2.0 - Mend

ltcai 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/README.md +140 -589
package/auto_setup.py +17 -17
package/docs/CHANGELOG.md +99 -0
package/docs/MULTI_AGENT_RUNTIME.md +23 -5
package/docs/PLUGIN_SDK.md +21 -8
package/docs/REALTIME_COLLABORATION.md +19 -6
package/docs/V2_ARCHITECTURE.md +65 -33
package/docs/WORKFLOW_DESIGNER.md +18 -8
package/docs/architecture.md +127 -135
package/docs/kg-schema.md +3 -3
package/docs/public-deploy.md +2 -3
package/knowledge_graph.py +2 -2
package/latticeai/__init__.py +1 -1
package/latticeai/api/agents.py +57 -1
package/latticeai/api/marketplace.py +81 -0
package/latticeai/api/models.py +8 -0
package/latticeai/api/plugins.py +1 -1
package/latticeai/api/realtime.py +1 -1
package/latticeai/api/workflow_designer.py +10 -1
package/latticeai/core/config.py +1 -1
package/latticeai/core/graph_curator.py +2 -2
package/latticeai/core/marketplace.py +178 -0
package/latticeai/core/model_compat.py +7 -63
package/latticeai/core/model_resolution.py +1 -1
package/latticeai/core/multi_agent.py +359 -68
package/latticeai/core/plugins.py +29 -13
package/latticeai/core/realtime.py +1 -1
package/latticeai/core/workflow_engine.py +1 -1
package/latticeai/core/workspace_os.py +257 -10
package/latticeai/server_app.py +17 -5
package/latticeai/services/model_catalog.py +105 -153
package/latticeai/services/model_recommendation.py +28 -17
package/latticeai/services/model_runtime.py +2 -2
package/latticeai/services/platform_runtime.py +9 -5
package/llm_router.py +80 -92
package/ltcai_cli.py +2 -3
package/package.json +2 -2
package/static/agents.html +47 -3
package/static/chat.html +5 -6
package/static/plugins.html +51 -0
package/static/scripts/chat.js +34 -36
package/static/workflows.html +22 -0
package/static/workspace.html +1 -1
package/telegram_bot.py +1 -1

package/latticeai/services/model_catalog.py CHANGED Viewed

@@ -17,8 +17,8 @@ from typing import Dict, List, Optional
 ENGINE_INSTALLERS = {
     "local_mlx": {
-        "command": [sys.executable, "-m", "pip", "install", "--upgrade", "mlx-lm", "mlx-vlm", "huggingface_hub[cli]"],
-        "label": "Install MLX runtime",
+        "command": [sys.executable, "-m", "pip", "install", "--upgrade", "mlx-vlm", "huggingface_hub[cli]"],
+        "label": "Install MLX-VLM runtime",
     },
     "openai": {
         "command": [sys.executable, "-m", "pip", "install", "openai"],
@@ -61,170 +61,109 @@ ENGINE_INSTALLERS = {
     },
 }
+def _model(
+    model_id: str,
+    name: str,
+    family: str,
+    tag: str,
+    size: str,
+    *,
+    source_country: str,
+    source_company: str,
+    execution_method: str,
+    internet_requirement: str = "모델을 다운로드할 때만 인터넷 필요; 실행 중에는 필요 없음",
+    pullable: bool = True,
+) -> Dict[str, object]:
+    clean_model_name = re.split(r"\s+via\s+", name, maxsplit=1)[0]
+    return {
+        "id": model_id,
+        "name": name,
+        "model_name": clean_model_name,
+        "family": family,
+        "tag": tag,
+        "size": size,
+        "pullable": pullable,
+        "modality": "multimodal",
+        "source_country": source_country,
+        "source_company": source_company,
+        "execution_method": execution_method,
+        "run_location": "내 컴퓨터에서만 실행",
+        "internet_requirement": internet_requirement,
+        "source_display_order": [
+            "source_country",
+            "source_company",
+            "execution_method",
+            "internet_requirement",
+            "model_name",
+        ],
+    }
+_RUNS_ON_THIS_COMPUTER = "내 컴퓨터에서만 실행"
 ENGINE_MODEL_CATALOG = {
     "local_mlx": [
-        {"id": "mlx-community/SmolLM-1.7B-Instruct-4bit", "name": "SmolLM 1.7B", "family": "SmolLM", "tag": "local-light", "size": "963MB", "pullable": True},
-        {"id": "mlx-community/gemma-3-1b-it-4bit", "name": "Gemma 3 1B", "family": "Gemma 3", "tag": "local-light", "size": "733MB", "pullable": True},
-        {"id": "mlx-community/Llama-3.2-1B-Instruct-4bit", "name": "Llama 3.2 1B", "family": "Llama 3.x", "tag": "local-light", "size": "1.3GB", "pullable": True},
-        {"id": "mlx-community/gemma-2-2b-it-4bit", "name": "Gemma 2 2B", "family": "Gemma 2", "tag": "local-light", "size": "1.6GB", "pullable": True},
-        {"id": "mlx-community/gemma-4-e2b-4bit", "name": "Gemma 4 E2B Base", "family": "Gemma 4", "tag": "local-vlm", "size": "3.6GB", "pullable": True},
-        {"id": "mlx-community/gemma-4-e2b-it-4bit", "name": "Gemma 4 E2B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "3.6GB", "pullable": True},
-        {"id": "mlx-community/gemma-4-e4b-4bit", "name": "Gemma 4 E4B Base", "family": "Gemma 4", "tag": "local-vlm", "size": "5.2GB", "pullable": True},
-        {"id": "mlx-community/gemma-4-e4b-it-4bit", "name": "Gemma 4 E4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "5.2GB", "pullable": True},
-        {"id": "mlx-community/Qwen3-VL-4B-Instruct-4bit", "name": "Qwen3-VL 4B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "2.7GB", "pullable": True},
-        {"id": "mlx-community/Qwen3-VL-8B-Instruct-4bit", "name": "Qwen3-VL 8B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "4.8GB", "pullable": True},
-        {"id": "mlx-community/Qwen2.5-VL-7B-Instruct-4bit", "name": "Qwen2.5-VL 7B", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "4.4GB", "pullable": True},
-        {"id": "mlx-community/gemma-3-4b-it-4bit", "name": "Gemma 3 4B", "family": "Gemma 3", "tag": "local-vlm", "size": "3.3GB", "pullable": True},
-        {"id": "mlx-community/Llama-3.2-3B-Instruct-4bit", "name": "Llama 3.2 3B", "family": "Llama 3.x", "tag": "local-general", "size": "2.0GB", "pullable": True},
-        {"id": "mlx-community/Llama-3.1-8B-Instruct-4bit", "name": "Llama 3.1 8B", "family": "Llama 3.1", "tag": "local-general", "size": "4.7GB", "pullable": True},
-        {"id": "mlx-community/gemma-2-9b-it-4bit", "name": "Gemma 2 9B", "family": "Gemma 2", "tag": "local-general", "size": "5.4GB", "pullable": True},
-        {"id": "mlx-community/gemma-3-12b-it-4bit", "name": "Gemma 3 12B", "family": "Gemma 3", "tag": "local-vlm", "size": "8.0GB", "pullable": True},
-        {"id": "mlx-community/Phi-3.5-mini-instruct-4bit", "name": "Phi 3.5 Mini", "family": "Phi", "tag": "local-coding", "size": "2.2GB", "pullable": True},
-        {"id": "mlx-community/Phi-4-mini-instruct-4bit", "name": "Phi 4 Mini", "family": "Phi", "tag": "local-coding", "size": "2.2GB", "pullable": True},
-        {"id": "mlx-community/phi-4-4bit", "name": "Phi 4", "family": "Phi", "tag": "local-coding", "size": "8.3GB", "pullable": True},
-        {"id": "mlx-community/Mistral-7B-Instruct-v0.3-4bit", "name": "Mistral 7B Instruct v0.3", "family": "Mistral", "tag": "local-general", "size": "4.1GB", "pullable": True},
-        {"id": "mlx-community/Ministral-8B-Instruct-2410-4bit", "name": "Ministral 8B Instruct", "family": "Mistral", "tag": "local-general", "size": "4.5GB", "pullable": True},
-        {"id": "mlx-community/Mistral-Small-24B-Instruct-2501-4bit", "name": "Mistral Small 24B", "family": "Mistral", "tag": "local-large", "size": "13.3GB", "pullable": True},
-        {"id": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", "name": "Qwen2.5 Coder 32B", "family": "Qwen2.5", "tag": "local-coding", "size": "18.5GB", "pullable": True},
-        {"id": "mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit", "name": "Qwen3-VL 30B A3B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "18GB", "pullable": True},
-        {"id": "mlx-community/gemma-3-27b-it-4bit", "name": "Gemma 3 27B", "family": "Gemma 3", "tag": "local-vlm", "size": "17GB", "pullable": True},
-        {"id": "mlx-community/gemma-4-26b-a4b-it-4bit", "name": "Gemma 4 26B A4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "15.6GB", "pullable": True},
-        {"id": "mlx-community/gemma-4-31b-it-4bit", "name": "Gemma 4 31B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "18.4GB", "pullable": True},
-        {"id": "mlx-community/gpt-oss-20b-MXFP4-Q8", "name": "GPT-OSS 20B", "family": "GPT-OSS", "tag": "local-reasoning", "size": "12.1GB", "pullable": True},
-        {"id": "mlx-community/gpt-oss-120b-MXFP4-Q4", "name": "GPT-OSS 120B", "family": "GPT-OSS", "tag": "local-large", "size": "62.3GB", "pullable": True},
-        {"id": "mlx-community/Llama-3.3-70B-Instruct-4bit", "name": "Llama 3.3 70B", "family": "Llama 3.x", "tag": "local-general", "size": "40GB+", "pullable": True},
-        {"id": "mlx-community/Llama-3.1-70B-Instruct-4bit", "name": "Llama 3.1 70B", "family": "Llama 3.1", "tag": "local-general", "size": "40GB+", "pullable": True},
+        _model("mlx-community/gemma-4-e2b-4bit", "Gemma 4 E2B Base", "Gemma 4", "local-vlm", "3.6GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("mlx-community/gemma-4-e2b-it-4bit", "Gemma 4 E2B Instruct", "Gemma 4", "local-vlm", "3.6GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("mlx-community/gemma-4-e4b-4bit", "Gemma 4 E4B Base", "Gemma 4", "local-vlm", "5.2GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("mlx-community/gemma-4-e4b-it-4bit", "Gemma 4 E4B Instruct", "Gemma 4", "local-vlm", "5.2GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("mlx-community/gemma-4-12b-it-4bit", "Gemma 4 12B Instruct", "Gemma 4", "local-vlm", "7.6GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("mlx-community/gemma-4-26b-a4b-it-4bit", "Gemma 4 26B A4B Instruct", "Gemma 4", "local-vlm", "15.6GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("mlx-community/gemma-4-31b-it-4bit", "Gemma 4 31B Instruct", "Gemma 4", "local-vlm", "18.4GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("mlx-community/Qwen3-VL-4B-Instruct-4bit", "Qwen3-VL 4B", "Qwen3-VL", "local-vlm", "2.7GB", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("mlx-community/Qwen3-VL-8B-Instruct-4bit", "Qwen3-VL 8B", "Qwen3-VL", "local-vlm", "4.8GB", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit", "Qwen3-VL 30B A3B", "Qwen3-VL", "local-vlm", "18GB", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("mlx-community/Llama-4-Scout-17B-16E-Instruct-4bit", "Llama 4 Scout 17B 16E", "Llama 4", "local-vlm", "11.8GB", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
     ],
     "ollama": [
-        {"id": "ollama:qwen3-vl:4b", "name": "Qwen3-VL 4B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
-        {"id": "ollama:qwen3-vl:8b", "name": "Qwen3-VL 8B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
-        {"id": "ollama:qwen3-vl:30b", "name": "Qwen3-VL 30B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
-        {"id": "ollama:gpt-oss:20b", "name": "GPT-OSS 20B via Ollama", "family": "GPT-OSS", "tag": "local-reasoning", "size": "pull required", "pullable": True},
-        {"id": "ollama:gpt-oss:120b", "name": "GPT-OSS 120B via Ollama", "family": "GPT-OSS", "tag": "local-large", "size": "pull required", "pullable": True},
-        {"id": "ollama:hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M", "name": "Gemma 4 31B Q4 via Ollama", "family": "Gemma 4", "tag": "local-vlm", "size": "18.7GB", "pullable": True},
-        {"id": "ollama:qwen3:8b", "name": "Qwen3 8B via Ollama", "family": "Qwen", "tag": "local-server", "size": "pull required", "pullable": True},
-        {"id": "ollama:qwen2.5-coder:14b", "name": "Qwen2.5 Coder 14B via Ollama", "family": "Qwen", "tag": "local-coding", "size": "pull required", "pullable": True},
-        {"id": "ollama:gemma3:1b", "name": "Gemma 3 1B via Ollama", "family": "Gemma", "tag": "local-light", "size": "pull required", "pullable": True},
-        {"id": "ollama:gemma3:4b", "name": "Gemma 3 4B via Ollama", "family": "Gemma", "tag": "local-server", "size": "pull required", "pullable": True},
-        {"id": "ollama:gemma3:4b-it-q4_K_M", "name": "Gemma 3 4B q4_K_M via Ollama", "family": "Gemma", "tag": "quantized", "size": "pull required", "pullable": True},
-        {"id": "ollama:gemma3:12b", "name": "Gemma 3 12B via Ollama", "family": "Gemma", "tag": "local-server", "size": "pull required", "pullable": True},
-        {"id": "ollama:gemma3:12b-it-q4_K_M", "name": "Gemma 3 12B q4_K_M via Ollama", "family": "Gemma", "tag": "quantized", "size": "pull required", "pullable": True},
-        {"id": "ollama:gemma3:27b", "name": "Gemma 3 27B via Ollama", "family": "Gemma", "tag": "local-large", "size": "pull required", "pullable": True},
-        {"id": "ollama:llama3.2:1b", "name": "Llama 3.2 1B via Ollama", "family": "Llama 3.x", "tag": "local-light", "size": "pull required", "pullable": True},
-        {"id": "ollama:llama3.2:3b", "name": "Llama 3.2 3B via Ollama", "family": "Llama 3.x", "tag": "local-server", "size": "pull required", "pullable": True},
-        {"id": "ollama:llama3.1:8b", "name": "Llama 3.1 8B via Ollama", "family": "Llama 3.1", "tag": "local-server", "size": "pull required", "pullable": True},
-        {"id": "ollama:llama3.1:8b-instruct-q4_0", "name": "Llama 3.1 8B q4_0 via Ollama", "family": "Llama 3.1", "tag": "quantized", "size": "pull required", "pullable": True},
-        {"id": "ollama:llama3.1:8b-instruct-q8_0", "name": "Llama 3.1 8B q8_0 via Ollama", "family": "Llama 3.1", "tag": "quantized", "size": "pull required", "pullable": True},
-        {"id": "ollama:llama3.1:70b", "name": "Llama 3.1 70B via Ollama", "family": "Llama 3.1", "tag": "local-server", "size": "pull required", "pullable": True},
-        {"id": "ollama:llama3.3:70b", "name": "Llama 3.3 70B via Ollama", "family": "Llama 3.x", "tag": "local-large", "size": "pull required", "pullable": True},
-        {"id": "ollama:mistral:7b", "name": "Mistral 7B via Ollama", "family": "Mistral", "tag": "local-server", "size": "pull required", "pullable": True},
-        {"id": "ollama:mixtral:8x7b", "name": "Mixtral 8x7B via Ollama", "family": "Mistral", "tag": "local-large", "size": "pull required", "pullable": True},
-        {"id": "ollama:phi4-mini", "name": "Phi 4 Mini via Ollama", "family": "Phi", "tag": "local-coding", "size": "pull required", "pullable": True},
-        {"id": "ollama:phi4", "name": "Phi 4 via Ollama", "family": "Phi", "tag": "local-coding", "size": "pull required", "pullable": True},
-        {"id": "ollama:smollm2:1.7b", "name": "SmolLM2 1.7B via Ollama", "family": "SmolLM", "tag": "local-light", "size": "pull required", "pullable": True},
-        {"id": "ollama:deepseek-r1:1.5b", "name": "DeepSeek-R1 1.5B via Ollama", "family": "DeepSeek", "tag": "local-light", "size": "pull required", "pullable": True},
-        {"id": "ollama:deepseek-r1:7b", "name": "DeepSeek-R1 7B via Ollama", "family": "DeepSeek", "tag": "local-reasoning", "size": "pull required", "pullable": True},
-        {"id": "ollama:deepseek-r1:8b", "name": "DeepSeek-R1 8B via Ollama", "family": "DeepSeek", "tag": "local-reasoning", "size": "pull required", "pullable": True},
-        {"id": "ollama:deepseek-r1:14b", "name": "DeepSeek-R1 14B via Ollama", "family": "DeepSeek", "tag": "local-reasoning", "size": "pull required", "pullable": True},
-        {"id": "ollama:deepseek-r1:32b", "name": "DeepSeek-R1 32B via Ollama", "family": "DeepSeek", "tag": "local-large", "size": "pull required", "pullable": True},
-        {"id": "ollama:deepseek-coder-v2:16b", "name": "DeepSeek-Coder-V2 16B via Ollama", "family": "DeepSeek", "tag": "local-coding", "size": "pull required", "pullable": True},
+        _model("ollama:qwen3-vl:4b", "Qwen3-VL 4B via Ollama", "Qwen3-VL", "local-vlm", "pull required", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("ollama:qwen3-vl:8b", "Qwen3-VL 8B via Ollama", "Qwen3-VL", "local-vlm", "pull required", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("ollama:qwen3-vl:30b", "Qwen3-VL 30B via Ollama", "Qwen3-VL", "local-vlm", "pull required", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("ollama:hf.co/ggml-org/gemma-4-12B-it-GGUF:Q4_K_M", "Gemma 4 12B Q4 via Ollama", "Gemma 4", "local-vlm", "7.9GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("ollama:hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M", "Gemma 4 31B Q4 via Ollama", "Gemma 4", "local-vlm", "18.7GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("ollama:hf.co/ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_M", "Llama 4 Scout Q4 via Ollama", "Llama 4", "local-vlm", "12GB", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
     ],
     "vllm": [
-        {"id": "vllm:openai/gpt-oss-20b", "name": "GPT-OSS 20B via vLLM", "family": "GPT-OSS", "tag": "local-reasoning", "size": "server model", "pullable": True},
-        {"id": "vllm:openai/gpt-oss-120b", "name": "GPT-OSS 120B via vLLM", "family": "GPT-OSS", "tag": "local-large", "size": "server model", "pullable": True},
-        {"id": "vllm:Qwen/Qwen3-VL-4B-Instruct", "name": "Qwen3-VL 4B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
-        {"id": "vllm:Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen3-VL 8B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
-        {"id": "vllm:Qwen/Qwen3-VL-30B-A3B-Instruct", "name": "Qwen3-VL 30B A3B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
-        {"id": "vllm:Qwen/Qwen2.5-VL-7B-Instruct", "name": "Qwen2.5-VL 7B via vLLM", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
-        {"id": "vllm:google/gemma-2-2b", "name": "Gemma 2 2B Base via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "vllm:google/gemma-2-2b-it", "name": "Gemma 2 2B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "vllm:google/gemma-2-9b", "name": "Gemma 2 9B Base via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "vllm:google/gemma-2-9b-it", "name": "Gemma 2 9B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "vllm:google/gemma-3-4b-it", "name": "Gemma 3 4B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "vllm:google/gemma-3-12b-it", "name": "Gemma 3 12B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "vllm:microsoft/Phi-3.5-mini-instruct", "name": "Phi 3.5 Mini via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
-        {"id": "vllm:microsoft/Phi-4-mini-instruct", "name": "Phi 4 Mini via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
-        {"id": "vllm:microsoft/phi-4", "name": "Phi 4 via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
-        {"id": "vllm:mistralai/Mistral-7B-Instruct-v0.3", "name": "Mistral 7B via vLLM", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "vllm:mistralai/Ministral-8B-Instruct-2410", "name": "Ministral 8B via vLLM", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "vllm:mistralai/Mistral-Small-24B-Instruct-2501", "name": "Mistral Small 24B via vLLM", "family": "Mistral", "tag": "local-large", "size": "server model", "pullable": True},
-        {"id": "vllm:meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B via vLLM", "family": "Llama 3.x", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "vllm:meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B via vLLM", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "vllm:meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B via vLLM", "family": "Llama 3.x", "tag": "local-large", "size": "server model", "pullable": True},
-        {"id": "vllm:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B via vLLM", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
+        _model("vllm:Qwen/Qwen3-VL-4B-Instruct", "Qwen3-VL 4B via vLLM", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("vllm:Qwen/Qwen3-VL-8B-Instruct", "Qwen3-VL 8B via vLLM", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("vllm:Qwen/Qwen3-VL-30B-A3B-Instruct", "Qwen3-VL 30B A3B via vLLM", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("vllm:google/gemma-4-12b-it", "Gemma 4 12B via vLLM", "Gemma 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("vllm:suitch/gemma-4-31B-it-4bit", "Gemma 4 31B via vLLM", "Gemma 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("vllm:meta-llama/Llama-4-Scout-17B-16E-Instruct", "Llama 4 Scout via vLLM", "Llama 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
     ],
     "lmstudio": [
-        {"id": "lmstudio:openai/gpt-oss-20b", "name": "GPT-OSS 20B via LM Studio", "family": "GPT-OSS", "tag": "local-reasoning", "size": "server model", "pullable": True},
-        {"id": "lmstudio:openai/gpt-oss-120b", "name": "GPT-OSS 120B via LM Studio", "family": "GPT-OSS", "tag": "local-large", "size": "server model", "pullable": True},
-        {"id": "lmstudio:ggml-org/gemma-4-31B-it-GGUF", "name": "Gemma 4 31B 4-bit via LM Studio", "family": "Gemma 4", "tag": "local-vlm", "size": "server model", "pullable": True},
-        {"id": "lmstudio:Qwen/Qwen3-VL-4B-Instruct", "name": "Qwen3-VL 4B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
-        {"id": "lmstudio:Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen3-VL 8B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
-        {"id": "lmstudio:Qwen/Qwen3-VL-30B-A3B-Instruct", "name": "Qwen3-VL 30B A3B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
-        {"id": "lmstudio:Qwen/Qwen2.5-VL-7B-Instruct", "name": "Qwen2.5-VL 7B via LM Studio", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
-        {"id": "lmstudio:google/gemma-2-2b-it", "name": "Gemma 2 2B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "lmstudio:google/gemma-2-9b-it", "name": "Gemma 2 9B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "lmstudio:google/gemma-3-4b-it", "name": "Gemma 3 4B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "lmstudio:google/gemma-3-12b-it", "name": "Gemma 3 12B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "lmstudio:microsoft/Phi-3.5-mini-instruct", "name": "Phi 3.5 Mini via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
-        {"id": "lmstudio:microsoft/Phi-4-mini-instruct", "name": "Phi 4 Mini via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
-        {"id": "lmstudio:microsoft/phi-4", "name": "Phi 4 via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
-        {"id": "lmstudio:mistralai/Mistral-7B-Instruct-v0.3", "name": "Mistral 7B via LM Studio", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "lmstudio:mistralai/Ministral-8B-Instruct-2410", "name": "Ministral 8B via LM Studio", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "lmstudio:mistralai/Mistral-Small-24B-Instruct-2501", "name": "Mistral Small 24B via LM Studio", "family": "Mistral", "tag": "local-large", "size": "server model", "pullable": True},
-        {"id": "lmstudio:meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B via LM Studio", "family": "Llama 3.x", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "lmstudio:meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B via LM Studio", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
-        {"id": "lmstudio:meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B via LM Studio", "family": "Llama 3.x", "tag": "local-large", "size": "server model", "pullable": True},
-        {"id": "lmstudio:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B via LM Studio", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
+        _model("lmstudio:Qwen/Qwen3-VL-4B-Instruct", "Qwen3-VL 4B via LM Studio", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("lmstudio:Qwen/Qwen3-VL-8B-Instruct", "Qwen3-VL 8B via LM Studio", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("lmstudio:Qwen/Qwen3-VL-30B-A3B-Instruct", "Qwen3-VL 30B A3B via LM Studio", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("lmstudio:ggml-org/gemma-4-12B-it-GGUF", "Gemma 4 12B 4-bit via LM Studio", "Gemma 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("lmstudio:ggml-org/gemma-4-31B-it-GGUF", "Gemma 4 31B 4-bit via LM Studio", "Gemma 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("lmstudio:meta-llama/Llama-4-Scout-17B-16E-Instruct", "Llama 4 Scout via LM Studio", "Llama 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
     ],
     "llamacpp": [
-        {"id": "llamacpp:ggml-org/gpt-oss-20b-GGUF", "name": "GPT-OSS 20B GGUF via llama.cpp", "family": "GPT-OSS", "tag": "gguf-q4", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:ggml-org/gpt-oss-120b-GGUF", "name": "GPT-OSS 120B GGUF via llama.cpp", "family": "GPT-OSS", "tag": "gguf-q4", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:ggml-org/gemma-4-31B-it-GGUF", "name": "Gemma 4 31B GGUF via llama.cpp", "family": "Gemma 4", "tag": "gguf-q4", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:Qwen/Qwen3-VL-4B-Instruct-GGUF", "name": "Qwen3-VL 4B GGUF via llama.cpp", "family": "Qwen3-VL", "tag": "gguf-vlm", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:Qwen/Qwen3-VL-8B-Instruct-GGUF", "name": "Qwen3-VL 8B GGUF via llama.cpp", "family": "Qwen3-VL", "tag": "gguf-vlm", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:unsloth/gemma-2-2b-it-GGUF", "name": "Gemma 2 2B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:unsloth/gemma-2-9b-it-GGUF", "name": "Gemma 2 9B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:unsloth/gemma-3-4b-it-GGUF", "name": "Gemma 3 4B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:bartowski/Mistral-7B-Instruct-v0.3-GGUF", "name": "Mistral 7B GGUF via llama.cpp", "family": "Mistral", "tag": "gguf-q4", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:bartowski/Phi-3.5-mini-instruct-GGUF", "name": "Phi 3.5 Mini GGUF via llama.cpp", "family": "Phi", "tag": "gguf-q4", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:bartowski/phi-4-GGUF", "name": "Phi 4 GGUF via llama.cpp", "family": "Phi", "tag": "gguf-q4", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:bartowski/Llama-3.2-3B-Instruct-GGUF", "name": "Llama 3.2 3B GGUF via llama.cpp", "family": "Llama 3.x", "tag": "gguf-q4", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:bartowski/Llama-3.1-8B-Instruct-GGUF", "name": "Llama 3.1 8B GGUF via llama.cpp", "family": "Llama 3.1", "tag": "local-server", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:bartowski/Llama-3.3-70B-Instruct-GGUF", "name": "Llama 3.3 70B GGUF via llama.cpp", "family": "Llama 3.x", "tag": "local-large", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:bartowski/Llama-3.1-70B-Instruct-GGUF", "name": "Llama 3.1 70B GGUF via llama.cpp", "family": "Llama 3.1", "tag": "local-server", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:unsloth/DeepSeek-R1-GGUF", "name": "DeepSeek-R1 GGUF via llama.cpp", "family": "DeepSeek", "tag": "gguf-q4", "size": "gguf", "pullable": True},
-        {"id": "llamacpp:bartowski/DeepSeek-Coder-V2-Lite-Instruct-GGUF", "name": "DeepSeek-Coder-V2 Lite GGUF via llama.cpp", "family": "DeepSeek", "tag": "gguf-q4", "size": "gguf", "pullable": True},
+        _model("llamacpp:Qwen/Qwen3-VL-4B-Instruct-GGUF", "Qwen3-VL 4B GGUF via llama.cpp", "Qwen3-VL", "gguf-vlm", "gguf", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("llamacpp:Qwen/Qwen3-VL-8B-Instruct-GGUF", "Qwen3-VL 8B GGUF via llama.cpp", "Qwen3-VL", "gguf-vlm", "gguf", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("llamacpp:Qwen/Qwen3-VL-30B-A3B-Instruct-GGUF", "Qwen3-VL 30B GGUF via llama.cpp", "Qwen3-VL", "gguf-vlm", "gguf", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("llamacpp:ggml-org/gemma-4-12B-it-GGUF", "Gemma 4 12B GGUF via llama.cpp", "Gemma 4", "gguf-vlm", "gguf", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("llamacpp:ggml-org/gemma-4-31B-it-GGUF", "Gemma 4 31B GGUF via llama.cpp", "Gemma 4", "gguf-vlm", "gguf", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
+        _model("llamacpp:ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF", "Llama 4 Scout GGUF via llama.cpp", "Llama 4", "gguf-vlm", "gguf", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
     ],
 }
 MODEL_ENGINE_ALIASES = {
-    "gpt-oss-20b": {
-        "local_mlx": "mlx-community/gpt-oss-20b-MXFP4-Q8",
-        "ollama": "gpt-oss:20b",
-        "vllm": "openai/gpt-oss-20b",
-        "lmstudio": "openai/gpt-oss-20b",
-        "llamacpp": "ggml-org/gpt-oss-20b-GGUF",
-    },
-    "openai/gpt-oss-20b": {
-        "local_mlx": "mlx-community/gpt-oss-20b-MXFP4-Q8",
-        "ollama": "gpt-oss:20b",
-        "vllm": "openai/gpt-oss-20b",
-        "lmstudio": "openai/gpt-oss-20b",
-        "llamacpp": "ggml-org/gpt-oss-20b-GGUF",
-    },
-    "gpt-oss-120b": {
-        "local_mlx": "mlx-community/gpt-oss-120b-MXFP4-Q4",
-        "ollama": "gpt-oss:120b",
-        "vllm": "openai/gpt-oss-120b",
-        "lmstudio": "openai/gpt-oss-120b",
-        "llamacpp": "ggml-org/gpt-oss-120b-GGUF",
-    },
-    "openai/gpt-oss-120b": {
-        "local_mlx": "mlx-community/gpt-oss-120b-MXFP4-Q4",
-        "ollama": "gpt-oss:120b",
-        "vllm": "openai/gpt-oss-120b",
-        "lmstudio": "openai/gpt-oss-120b",
-        "llamacpp": "ggml-org/gpt-oss-120b-GGUF",
+    "gemma-4-12b-it-4bit": {
+        "local_mlx": "mlx-community/gemma-4-12b-it-4bit",
+        "ollama": "hf.co/ggml-org/gemma-4-12B-it-GGUF:Q4_K_M",
+        "vllm": "google/gemma-4-12b-it",
+        "lmstudio": "ggml-org/gemma-4-12B-it-GGUF",
+        "llamacpp": "ggml-org/gemma-4-12B-it-GGUF",
+    },
+    "mlx-community/gemma-4-12b-it-4bit": {
+        "local_mlx": "mlx-community/gemma-4-12b-it-4bit",
+        "ollama": "hf.co/ggml-org/gemma-4-12B-it-GGUF:Q4_K_M",
+        "vllm": "google/gemma-4-12b-it",
+        "lmstudio": "ggml-org/gemma-4-12B-it-GGUF",
+        "llamacpp": "ggml-org/gemma-4-12B-it-GGUF",
     },
     "gemma-4-31b-it-4bit": {
         "local_mlx": "mlx-community/gemma-4-31b-it-4bit",
@@ -247,13 +186,26 @@ MODEL_ENGINE_ALIASES = {
         "lmstudio": "ggml-org/gemma-4-31B-it-GGUF",
         "llamacpp": "ggml-org/gemma-4-31B-it-GGUF",
     },
+    "qwen3-vl-8b": {
+        "local_mlx": "mlx-community/Qwen3-VL-8B-Instruct-4bit",
+        "ollama": "qwen3-vl:8b",
+        "vllm": "Qwen/Qwen3-VL-8B-Instruct",
+        "lmstudio": "Qwen/Qwen3-VL-8B-Instruct",
+        "llamacpp": "Qwen/Qwen3-VL-8B-Instruct-GGUF",
+    },
+    "llama-4-scout": {
+        "local_mlx": "mlx-community/Llama-4-Scout-17B-16E-Instruct-4bit",
+        "ollama": "hf.co/ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_M",
+        "vllm": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "lmstudio": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "llamacpp": "ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF",
+    },
 }
 _VERSIONED_MODEL_PATTERNS = (
     ("gemma", re.compile(r"\bgemma[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
     ("qwen", re.compile(r"\bqwen[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
     ("llama", re.compile(r"\bllama[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
-    ("phi", re.compile(r"\bphi[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
 )

package/latticeai/services/model_recommendation.py CHANGED Viewed

@@ -3,7 +3,8 @@
 Given a detected system profile (from :func:`auto_setup.probe`) this module
 classifies every model in :data:`model_catalog.ENGINE_MODEL_CATALOG` into one of
 three states — **recommended**, **compatible**, or **not_recommended** — and
-groups the result by model family (Gemma, Qwen, Llama, Phi, DeepSeek, …).
+groups the result by current multimodal model family (Gemma 4, Qwen3-VL,
+Llama 4).
 It is intentionally pure and dependency-light: the only input is a plain dict
 describing the machine, so it is fully unit-testable without touching real
@@ -28,12 +29,11 @@ NOT_RECOMMENDED = "not_recommended"
 # Apple-Silicon only.  Used to decide platform availability before sizing.
 _APPLE_ONLY_ENGINES = {"local_mlx"}
-# Family display order for the grouped view (best/newest first within a brand).
+# Family display order for the grouped view (newest multimodal generations first).
 _FAMILY_ORDER = [
-    "Gemma 4", "Gemma 3", "Gemma 2", "Gemma",
-    "Qwen3-VL", "Qwen2.5-VL", "Qwen2.5", "Qwen",
-    "Llama 3.x", "Llama 3.1", "Llama",
-    "Mistral", "Phi", "GPT-OSS", "DeepSeek", "SmolLM",
+    "Gemma 4",
+    "Qwen3-VL",
+    "Llama 4",
 ]
 _SIZE_RE = re.compile(r"([\d.]+)\s*(TB|GB|MB)", re.IGNORECASE)
@@ -44,7 +44,7 @@ def parse_size_gb(size: Any) -> Optional[float]:
     """Parse a catalog ``size`` string (``"4.7GB"``, ``"963MB"``, ``"40GB+"``).
     Returns ``None`` when the size is non-numeric (e.g. ``"pull required"`` or
-    ``"server model"``) so callers can treat it as "size unknown".
+    ``"실행 도구에서 관리"``) so callers can treat it as "size unknown".
     """
     if not isinstance(size, str):
         return None
@@ -92,30 +92,38 @@ def _classify_one(
     need_gb = estimated_ram_gb(size_gb) if size_gb is not None else None
     if not engine_available:
-        status, reason = NOT_RECOMMENDED, "Requires Apple Silicon (MLX runtime)"
+        status, reason = NOT_RECOMMENDED, "Apple Silicon과 MLX-VLM이 필요합니다"
     elif need_gb is None:
-        # Server/pull models have no fixed on-disk size — treat as compatible
-        # (the engine streams/pulls weights on demand).
-        status, reason = COMPATIBLE, "Served/pulled on demand by the engine"
+        # Tool-managed/pull models have no fixed on-disk size, so treat them as
+        # compatible and let the execution tool validate the exact model.
+        status, reason = COMPATIBLE, "선택한 실행 방식에서 필요할 때 모델을 받습니다"
     elif ram_gb <= 0:
-        status, reason = COMPATIBLE, "Memory unknown — verify before loading"
-    elif need_gb <= ram_gb * 0.6:
-        status, reason = RECOMMENDED, f"Fits comfortably (~{need_gb:.0f} GB of {ram_gb:.0f} GB RAM)"
+        status, reason = COMPATIBLE, "메모리 정보를 확인하지 못했습니다. 불러오기 전에 검증합니다"
+    elif need_gb <= ram_gb * 0.75:
+        status, reason = RECOMMENDED, f"현재 메모리에서 안정적으로 사용할 가능성이 높습니다 (~{need_gb:.0f} GB / {ram_gb:.0f} GB)"
     elif need_gb <= ram_gb * 0.9:
-        status, reason = COMPATIBLE, f"Runs but tight (~{need_gb:.0f} GB of {ram_gb:.0f} GB RAM)"
+        status, reason = COMPATIBLE, f"사용 가능하지만 여유가 적습니다 (~{need_gb:.0f} GB / {ram_gb:.0f} GB)"
     else:
-        status, reason = NOT_RECOMMENDED, f"Needs ~{need_gb:.0f} GB RAM (have {ram_gb:.0f} GB)"
+        status, reason = NOT_RECOMMENDED, f"권장 메모리가 부족합니다 (~{need_gb:.0f} GB 필요, 현재 {ram_gb:.0f} GB)"
     return {
         "id": model.get("id"),
         "name": model.get("name"),
+        "model_name": model.get("model_name") or model.get("name"),
         "family": model.get("family"),
         "tag": model.get("tag"),
+        "modality": model.get("modality") or "multimodal",
         "size": model.get("size"),
         "size_gb": size_gb,
         "required_ram_gb": need_gb,
         "status": status,
         "reason": reason,
+        "source_country": model.get("source_country"),
+        "source_company": model.get("source_company"),
+        "execution_method": model.get("execution_method"),
+        "run_location": model.get("run_location"),
+        "internet_requirement": model.get("internet_requirement"),
+        "source_display_order": model.get("source_display_order"),
     }
@@ -132,7 +140,10 @@ def recommend_catalog(profile: Dict[str, Any], *, engine: str = "local_mlx") ->
     ``profile`` is a dict shaped like ``auto_setup.SystemProfile.to_json()``
     (``os``, ``arch``, ``ram_mb``, ``gpu={vendor,vram_mb}`` …).
     """
-    models = ENGINE_MODEL_CATALOG.get(engine, [])
+    models = [
+        model for model in ENGINE_MODEL_CATALOG.get(engine, [])
+        if str(model.get("modality") or "").lower() == "multimodal"
+    ]
     engine_available = _engine_available(engine, profile)
     ram_gb = _ram_gb(profile)

package/latticeai/services/model_runtime.py CHANGED Viewed

@@ -64,7 +64,7 @@ INVITE_GATE_ENABLED = False
 ALLOW_PLAINTEXT_API_KEYS = False
 CORS_ALLOW_NETWORK = False
 PUBLIC_MODEL = "openai:gpt-4o-mini"
-LOCAL_MODEL = "mlx-community/SmolLM-1.7B-Instruct-4bit"
+LOCAL_MODEL = "mlx-community/gemma-4-12b-it-4bit"
 IS_PUBLIC_MODE = False
 keyring = None
@@ -889,7 +889,7 @@ def ensure_llamacpp_server(model_name: str) -> None:
 def engine_installed(engine: str) -> bool:
     if engine == "local_mlx":
-        return bool(importlib.util.find_spec("mlx") and importlib.util.find_spec("mlx_lm"))
+        return bool(importlib.util.find_spec("mlx") and importlib.util.find_spec("mlx_vlm"))
     if engine == "ollama":
         return local_binary("ollama") is not None
     if engine == "vllm":

package/latticeai/services/platform_runtime.py CHANGED Viewed

@@ -1,6 +1,6 @@
-"""v2.0 Agentic Workspace Platform runtime — cross-system wiring.
+"""v2 Agentic Workspace Platform runtime — cross-system wiring.
-This is the single place the four v2.0 subsystems (Plugin SDK, Workflow
+This is the single place the v2 subsystems (Plugin SDK, Workflow
 Designer, Multi-Agent Runtime, Realtime) connect to one another and to the
 workspace. Keeping it out of ``server_app`` honours the AGENTS.md preference for
 small, composable modules and keeps the wiring independently testable.
@@ -132,7 +132,7 @@ class PlatformRuntime:
             plugin_id = cfg.get("plugin_id") or cfg.get("plugin") or ""
             action = cfg.get("action") or "run_skill"
             result = self.registry.execute_action(
-                plugin_id, action, cfg.get("args") or {}, runners=self.plugin_capability_runners(user, scope)
+                plugin_id, action, cfg.get("args") or {}, runners=self.plugin_capability_runners(user, scope), workspace_id=scope
             )
             return result.as_dict()
         return runner
@@ -169,7 +169,7 @@ class PlatformRuntime:
     def run_agent(self, goal, user, scope, *, with_workflow: bool, roles=None, inputs=None) -> Dict[str, Any]:
         role_runner = default_role_runner(
             workflow_runner=(lambda wf_ref, ctx: self.run_workflow_by_id(wf_ref, user, scope, with_agent=False, inputs=ctx.inputs)) if with_workflow else None,
-            plugin_runner=lambda pid, ctx: self.registry.execute_action(pid, "run_skill", {}, runners=self.plugin_capability_runners(user, scope)).as_dict(),
+            plugin_runner=lambda pid, ctx: self.registry.execute_action(pid, "run_skill", {}, runners=self.plugin_capability_runners(user, scope), workspace_id=scope).as_dict(),
             context_provider=self._context_provider(user, scope),
         )
         result = MultiAgentOrchestrator(role_runner=role_runner).run(
@@ -178,6 +178,10 @@ class PlatformRuntime:
         run = self.store.record_agent_run(
             agent_id=result.agent_id, status=result.status, input_text=goal,
             output_text=result.output, timeline=result.timeline, relationships=[],
+            handoffs=result.handoffs, context_packets=result.context_packets,
+            plan=result.plan, plan_review=result.plan_review,
+            review_history=result.review_history, retry_history=result.retry_history,
+            memory_snapshots=result.memory_snapshots,
             user_email=user, graph=self.workspace_graph(), workspace_id=scope,
         )
         return {"agent_run_id": run["id"], "status": result.status, "output": result.output}
@@ -195,6 +199,6 @@ class PlatformRuntime:
     def build_orchestrator(self, user, scope) -> MultiAgentOrchestrator:
         return MultiAgentOrchestrator(role_runner=default_role_runner(
             workflow_runner=lambda wf_ref, ctx: self.run_workflow_by_id(wf_ref, user, scope, with_agent=False, inputs=ctx.inputs),
-            plugin_runner=lambda pid, ctx: self.registry.execute_action(pid, "run_skill", {}, runners=self.plugin_capability_runners(user, scope)).as_dict(),
+            plugin_runner=lambda pid, ctx: self.registry.execute_action(pid, "run_skill", {}, runners=self.plugin_capability_runners(user, scope), workspace_id=scope).as_dict(),
             context_provider=self._context_provider(user, scope),
         ))