ltcai 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,289 @@
1
+ """Static local-model catalog, engine installers, and family-version filtering.
2
+
3
+ Extracted from :mod:`latticeai.services.model_runtime` so the runtime module
4
+ owns model lifecycle/loading logic while this module owns the behaviour-free
5
+ catalog data (engine installers, the per-engine model catalog, cross-engine
6
+ aliases) and the pure version-dedup helpers. Re-exported by ``model_runtime``
7
+ for backward compatibility, so existing imports such as
8
+ ``from latticeai.services.model_runtime import ENGINE_MODEL_CATALOG`` keep
9
+ working unchanged.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import re
15
+ import sys
16
+ from typing import Dict, List, Optional
17
+
18
+ ENGINE_INSTALLERS = {
19
+ "local_mlx": {
20
+ "command": [sys.executable, "-m", "pip", "install", "--upgrade", "mlx-lm", "mlx-vlm", "huggingface_hub[cli]"],
21
+ "label": "Install MLX runtime",
22
+ },
23
+ "openai": {
24
+ "command": [sys.executable, "-m", "pip", "install", "openai"],
25
+ "label": "Install OpenAI-compatible SDK",
26
+ },
27
+ "openrouter": {
28
+ "command": [sys.executable, "-m", "pip", "install", "openai"],
29
+ "label": "Install OpenAI-compatible SDK",
30
+ },
31
+ "groq": {
32
+ "command": [sys.executable, "-m", "pip", "install", "openai"],
33
+ "label": "Install OpenAI-compatible SDK",
34
+ },
35
+ "together": {
36
+ "command": [sys.executable, "-m", "pip", "install", "openai"],
37
+ "label": "Install OpenAI-compatible SDK",
38
+ },
39
+ "xai": {
40
+ "command": [sys.executable, "-m", "pip", "install", "openai"],
41
+ "label": "Install OpenAI-compatible SDK",
42
+ },
43
+ "ollama": {
44
+ "command": ["brew", "install", "ollama"],
45
+ "label": "Install Ollama",
46
+ "requires_binary": "brew",
47
+ },
48
+ "vllm": {
49
+ "command": [sys.executable, "-m", "pip", "install", "vllm", "huggingface_hub[cli]"],
50
+ "label": "Install vLLM runtime",
51
+ },
52
+ "lmstudio": {
53
+ "command": ["brew", "install", "--cask", "lm-studio"],
54
+ "label": "Install LM Studio",
55
+ "requires_binary": "brew",
56
+ },
57
+ "llamacpp": {
58
+ "command": ["brew", "install", "llama.cpp"],
59
+ "label": "Install llama.cpp",
60
+ "requires_binary": "brew",
61
+ },
62
+ }
63
+
64
+ ENGINE_MODEL_CATALOG = {
65
+ "local_mlx": [
66
+ {"id": "mlx-community/SmolLM-1.7B-Instruct-4bit", "name": "SmolLM 1.7B", "family": "SmolLM", "tag": "local-light", "size": "963MB", "pullable": True},
67
+ {"id": "mlx-community/gemma-3-1b-it-4bit", "name": "Gemma 3 1B", "family": "Gemma 3", "tag": "local-light", "size": "733MB", "pullable": True},
68
+ {"id": "mlx-community/Llama-3.2-1B-Instruct-4bit", "name": "Llama 3.2 1B", "family": "Llama 3.x", "tag": "local-light", "size": "1.3GB", "pullable": True},
69
+ {"id": "mlx-community/gemma-2-2b-it-4bit", "name": "Gemma 2 2B", "family": "Gemma 2", "tag": "local-light", "size": "1.6GB", "pullable": True},
70
+ {"id": "mlx-community/gemma-4-e2b-4bit", "name": "Gemma 4 E2B Base", "family": "Gemma 4", "tag": "local-vlm", "size": "3.6GB", "pullable": True},
71
+ {"id": "mlx-community/gemma-4-e2b-it-4bit", "name": "Gemma 4 E2B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "3.6GB", "pullable": True},
72
+ {"id": "mlx-community/gemma-4-e4b-4bit", "name": "Gemma 4 E4B Base", "family": "Gemma 4", "tag": "local-vlm", "size": "5.2GB", "pullable": True},
73
+ {"id": "mlx-community/gemma-4-e4b-it-4bit", "name": "Gemma 4 E4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "5.2GB", "pullable": True},
74
+ {"id": "mlx-community/Qwen3-VL-4B-Instruct-4bit", "name": "Qwen3-VL 4B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "2.7GB", "pullable": True},
75
+ {"id": "mlx-community/Qwen3-VL-8B-Instruct-4bit", "name": "Qwen3-VL 8B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "4.8GB", "pullable": True},
76
+ {"id": "mlx-community/Qwen2.5-VL-7B-Instruct-4bit", "name": "Qwen2.5-VL 7B", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "4.4GB", "pullable": True},
77
+ {"id": "mlx-community/gemma-3-4b-it-4bit", "name": "Gemma 3 4B", "family": "Gemma 3", "tag": "local-vlm", "size": "3.3GB", "pullable": True},
78
+ {"id": "mlx-community/Llama-3.2-3B-Instruct-4bit", "name": "Llama 3.2 3B", "family": "Llama 3.x", "tag": "local-general", "size": "2.0GB", "pullable": True},
79
+ {"id": "mlx-community/Llama-3.1-8B-Instruct-4bit", "name": "Llama 3.1 8B", "family": "Llama 3.1", "tag": "local-general", "size": "4.7GB", "pullable": True},
80
+ {"id": "mlx-community/gemma-2-9b-it-4bit", "name": "Gemma 2 9B", "family": "Gemma 2", "tag": "local-general", "size": "5.4GB", "pullable": True},
81
+ {"id": "mlx-community/gemma-3-12b-it-4bit", "name": "Gemma 3 12B", "family": "Gemma 3", "tag": "local-vlm", "size": "8.0GB", "pullable": True},
82
+ {"id": "mlx-community/Phi-3.5-mini-instruct-4bit", "name": "Phi 3.5 Mini", "family": "Phi", "tag": "local-coding", "size": "2.2GB", "pullable": True},
83
+ {"id": "mlx-community/Phi-4-mini-instruct-4bit", "name": "Phi 4 Mini", "family": "Phi", "tag": "local-coding", "size": "2.2GB", "pullable": True},
84
+ {"id": "mlx-community/phi-4-4bit", "name": "Phi 4", "family": "Phi", "tag": "local-coding", "size": "8.3GB", "pullable": True},
85
+ {"id": "mlx-community/Mistral-7B-Instruct-v0.3-4bit", "name": "Mistral 7B Instruct v0.3", "family": "Mistral", "tag": "local-general", "size": "4.1GB", "pullable": True},
86
+ {"id": "mlx-community/Ministral-8B-Instruct-2410-4bit", "name": "Ministral 8B Instruct", "family": "Mistral", "tag": "local-general", "size": "4.5GB", "pullable": True},
87
+ {"id": "mlx-community/Mistral-Small-24B-Instruct-2501-4bit", "name": "Mistral Small 24B", "family": "Mistral", "tag": "local-large", "size": "13.3GB", "pullable": True},
88
+ {"id": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", "name": "Qwen2.5 Coder 32B", "family": "Qwen2.5", "tag": "local-coding", "size": "18.5GB", "pullable": True},
89
+ {"id": "mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit", "name": "Qwen3-VL 30B A3B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "18GB", "pullable": True},
90
+ {"id": "mlx-community/gemma-3-27b-it-4bit", "name": "Gemma 3 27B", "family": "Gemma 3", "tag": "local-vlm", "size": "17GB", "pullable": True},
91
+ {"id": "mlx-community/gemma-4-26b-a4b-it-4bit", "name": "Gemma 4 26B A4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "15.6GB", "pullable": True},
92
+ {"id": "mlx-community/gemma-4-31b-it-4bit", "name": "Gemma 4 31B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "18.4GB", "pullable": True},
93
+ {"id": "mlx-community/gpt-oss-20b-MXFP4-Q8", "name": "GPT-OSS 20B", "family": "GPT-OSS", "tag": "local-reasoning", "size": "12.1GB", "pullable": True},
94
+ {"id": "mlx-community/gpt-oss-120b-MXFP4-Q4", "name": "GPT-OSS 120B", "family": "GPT-OSS", "tag": "local-large", "size": "62.3GB", "pullable": True},
95
+ {"id": "mlx-community/Llama-3.3-70B-Instruct-4bit", "name": "Llama 3.3 70B", "family": "Llama 3.x", "tag": "local-general", "size": "40GB+", "pullable": True},
96
+ {"id": "mlx-community/Llama-3.1-70B-Instruct-4bit", "name": "Llama 3.1 70B", "family": "Llama 3.1", "tag": "local-general", "size": "40GB+", "pullable": True},
97
+ ],
98
+ "ollama": [
99
+ {"id": "ollama:qwen3-vl:4b", "name": "Qwen3-VL 4B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
100
+ {"id": "ollama:qwen3-vl:8b", "name": "Qwen3-VL 8B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
101
+ {"id": "ollama:qwen3-vl:30b", "name": "Qwen3-VL 30B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
102
+ {"id": "ollama:gpt-oss:20b", "name": "GPT-OSS 20B via Ollama", "family": "GPT-OSS", "tag": "local-reasoning", "size": "pull required", "pullable": True},
103
+ {"id": "ollama:gpt-oss:120b", "name": "GPT-OSS 120B via Ollama", "family": "GPT-OSS", "tag": "local-large", "size": "pull required", "pullable": True},
104
+ {"id": "ollama:hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M", "name": "Gemma 4 31B Q4 via Ollama", "family": "Gemma 4", "tag": "local-vlm", "size": "18.7GB", "pullable": True},
105
+ {"id": "ollama:qwen3:8b", "name": "Qwen3 8B via Ollama", "family": "Qwen", "tag": "local-server", "size": "pull required", "pullable": True},
106
+ {"id": "ollama:qwen2.5-coder:14b", "name": "Qwen2.5 Coder 14B via Ollama", "family": "Qwen", "tag": "local-coding", "size": "pull required", "pullable": True},
107
+ {"id": "ollama:gemma3:1b", "name": "Gemma 3 1B via Ollama", "family": "Gemma", "tag": "local-light", "size": "pull required", "pullable": True},
108
+ {"id": "ollama:gemma3:4b", "name": "Gemma 3 4B via Ollama", "family": "Gemma", "tag": "local-server", "size": "pull required", "pullable": True},
109
+ {"id": "ollama:gemma3:4b-it-q4_K_M", "name": "Gemma 3 4B q4_K_M via Ollama", "family": "Gemma", "tag": "quantized", "size": "pull required", "pullable": True},
110
+ {"id": "ollama:gemma3:12b", "name": "Gemma 3 12B via Ollama", "family": "Gemma", "tag": "local-server", "size": "pull required", "pullable": True},
111
+ {"id": "ollama:gemma3:12b-it-q4_K_M", "name": "Gemma 3 12B q4_K_M via Ollama", "family": "Gemma", "tag": "quantized", "size": "pull required", "pullable": True},
112
+ {"id": "ollama:gemma3:27b", "name": "Gemma 3 27B via Ollama", "family": "Gemma", "tag": "local-large", "size": "pull required", "pullable": True},
113
+ {"id": "ollama:llama3.2:1b", "name": "Llama 3.2 1B via Ollama", "family": "Llama 3.x", "tag": "local-light", "size": "pull required", "pullable": True},
114
+ {"id": "ollama:llama3.2:3b", "name": "Llama 3.2 3B via Ollama", "family": "Llama 3.x", "tag": "local-server", "size": "pull required", "pullable": True},
115
+ {"id": "ollama:llama3.1:8b", "name": "Llama 3.1 8B via Ollama", "family": "Llama 3.1", "tag": "local-server", "size": "pull required", "pullable": True},
116
+ {"id": "ollama:llama3.1:8b-instruct-q4_0", "name": "Llama 3.1 8B q4_0 via Ollama", "family": "Llama 3.1", "tag": "quantized", "size": "pull required", "pullable": True},
117
+ {"id": "ollama:llama3.1:8b-instruct-q8_0", "name": "Llama 3.1 8B q8_0 via Ollama", "family": "Llama 3.1", "tag": "quantized", "size": "pull required", "pullable": True},
118
+ {"id": "ollama:llama3.1:70b", "name": "Llama 3.1 70B via Ollama", "family": "Llama 3.1", "tag": "local-server", "size": "pull required", "pullable": True},
119
+ {"id": "ollama:llama3.3:70b", "name": "Llama 3.3 70B via Ollama", "family": "Llama 3.x", "tag": "local-large", "size": "pull required", "pullable": True},
120
+ {"id": "ollama:mistral:7b", "name": "Mistral 7B via Ollama", "family": "Mistral", "tag": "local-server", "size": "pull required", "pullable": True},
121
+ {"id": "ollama:mixtral:8x7b", "name": "Mixtral 8x7B via Ollama", "family": "Mistral", "tag": "local-large", "size": "pull required", "pullable": True},
122
+ {"id": "ollama:phi4-mini", "name": "Phi 4 Mini via Ollama", "family": "Phi", "tag": "local-coding", "size": "pull required", "pullable": True},
123
+ {"id": "ollama:phi4", "name": "Phi 4 via Ollama", "family": "Phi", "tag": "local-coding", "size": "pull required", "pullable": True},
124
+ {"id": "ollama:smollm2:1.7b", "name": "SmolLM2 1.7B via Ollama", "family": "SmolLM", "tag": "local-light", "size": "pull required", "pullable": True},
125
+ {"id": "ollama:deepseek-r1:1.5b", "name": "DeepSeek-R1 1.5B via Ollama", "family": "DeepSeek", "tag": "local-light", "size": "pull required", "pullable": True},
126
+ {"id": "ollama:deepseek-r1:7b", "name": "DeepSeek-R1 7B via Ollama", "family": "DeepSeek", "tag": "local-reasoning", "size": "pull required", "pullable": True},
127
+ {"id": "ollama:deepseek-r1:8b", "name": "DeepSeek-R1 8B via Ollama", "family": "DeepSeek", "tag": "local-reasoning", "size": "pull required", "pullable": True},
128
+ {"id": "ollama:deepseek-r1:14b", "name": "DeepSeek-R1 14B via Ollama", "family": "DeepSeek", "tag": "local-reasoning", "size": "pull required", "pullable": True},
129
+ {"id": "ollama:deepseek-r1:32b", "name": "DeepSeek-R1 32B via Ollama", "family": "DeepSeek", "tag": "local-large", "size": "pull required", "pullable": True},
130
+ {"id": "ollama:deepseek-coder-v2:16b", "name": "DeepSeek-Coder-V2 16B via Ollama", "family": "DeepSeek", "tag": "local-coding", "size": "pull required", "pullable": True},
131
+ ],
132
+ "vllm": [
133
+ {"id": "vllm:openai/gpt-oss-20b", "name": "GPT-OSS 20B via vLLM", "family": "GPT-OSS", "tag": "local-reasoning", "size": "server model", "pullable": True},
134
+ {"id": "vllm:openai/gpt-oss-120b", "name": "GPT-OSS 120B via vLLM", "family": "GPT-OSS", "tag": "local-large", "size": "server model", "pullable": True},
135
+ {"id": "vllm:Qwen/Qwen3-VL-4B-Instruct", "name": "Qwen3-VL 4B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
136
+ {"id": "vllm:Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen3-VL 8B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
137
+ {"id": "vllm:Qwen/Qwen3-VL-30B-A3B-Instruct", "name": "Qwen3-VL 30B A3B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
138
+ {"id": "vllm:Qwen/Qwen2.5-VL-7B-Instruct", "name": "Qwen2.5-VL 7B via vLLM", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
139
+ {"id": "vllm:google/gemma-2-2b", "name": "Gemma 2 2B Base via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
140
+ {"id": "vllm:google/gemma-2-2b-it", "name": "Gemma 2 2B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
141
+ {"id": "vllm:google/gemma-2-9b", "name": "Gemma 2 9B Base via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
142
+ {"id": "vllm:google/gemma-2-9b-it", "name": "Gemma 2 9B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
143
+ {"id": "vllm:google/gemma-3-4b-it", "name": "Gemma 3 4B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
144
+ {"id": "vllm:google/gemma-3-12b-it", "name": "Gemma 3 12B via vLLM", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
145
+ {"id": "vllm:microsoft/Phi-3.5-mini-instruct", "name": "Phi 3.5 Mini via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
146
+ {"id": "vllm:microsoft/Phi-4-mini-instruct", "name": "Phi 4 Mini via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
147
+ {"id": "vllm:microsoft/phi-4", "name": "Phi 4 via vLLM", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
148
+ {"id": "vllm:mistralai/Mistral-7B-Instruct-v0.3", "name": "Mistral 7B via vLLM", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
149
+ {"id": "vllm:mistralai/Ministral-8B-Instruct-2410", "name": "Ministral 8B via vLLM", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
150
+ {"id": "vllm:mistralai/Mistral-Small-24B-Instruct-2501", "name": "Mistral Small 24B via vLLM", "family": "Mistral", "tag": "local-large", "size": "server model", "pullable": True},
151
+ {"id": "vllm:meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B via vLLM", "family": "Llama 3.x", "tag": "local-server", "size": "server model", "pullable": True},
152
+ {"id": "vllm:meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B via vLLM", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
153
+ {"id": "vllm:meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B via vLLM", "family": "Llama 3.x", "tag": "local-large", "size": "server model", "pullable": True},
154
+ {"id": "vllm:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B via vLLM", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
155
+ ],
156
+ "lmstudio": [
157
+ {"id": "lmstudio:openai/gpt-oss-20b", "name": "GPT-OSS 20B via LM Studio", "family": "GPT-OSS", "tag": "local-reasoning", "size": "server model", "pullable": True},
158
+ {"id": "lmstudio:openai/gpt-oss-120b", "name": "GPT-OSS 120B via LM Studio", "family": "GPT-OSS", "tag": "local-large", "size": "server model", "pullable": True},
159
+ {"id": "lmstudio:ggml-org/gemma-4-31B-it-GGUF", "name": "Gemma 4 31B 4-bit via LM Studio", "family": "Gemma 4", "tag": "local-vlm", "size": "server model", "pullable": True},
160
+ {"id": "lmstudio:Qwen/Qwen3-VL-4B-Instruct", "name": "Qwen3-VL 4B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
161
+ {"id": "lmstudio:Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen3-VL 8B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
162
+ {"id": "lmstudio:Qwen/Qwen3-VL-30B-A3B-Instruct", "name": "Qwen3-VL 30B A3B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
163
+ {"id": "lmstudio:Qwen/Qwen2.5-VL-7B-Instruct", "name": "Qwen2.5-VL 7B via LM Studio", "family": "Qwen2.5-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
164
+ {"id": "lmstudio:google/gemma-2-2b-it", "name": "Gemma 2 2B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
165
+ {"id": "lmstudio:google/gemma-2-9b-it", "name": "Gemma 2 9B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
166
+ {"id": "lmstudio:google/gemma-3-4b-it", "name": "Gemma 3 4B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
167
+ {"id": "lmstudio:google/gemma-3-12b-it", "name": "Gemma 3 12B via LM Studio", "family": "Gemma", "tag": "local-server", "size": "server model", "pullable": True},
168
+ {"id": "lmstudio:microsoft/Phi-3.5-mini-instruct", "name": "Phi 3.5 Mini via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
169
+ {"id": "lmstudio:microsoft/Phi-4-mini-instruct", "name": "Phi 4 Mini via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
170
+ {"id": "lmstudio:microsoft/phi-4", "name": "Phi 4 via LM Studio", "family": "Phi", "tag": "local-coding", "size": "server model", "pullable": True},
171
+ {"id": "lmstudio:mistralai/Mistral-7B-Instruct-v0.3", "name": "Mistral 7B via LM Studio", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
172
+ {"id": "lmstudio:mistralai/Ministral-8B-Instruct-2410", "name": "Ministral 8B via LM Studio", "family": "Mistral", "tag": "local-server", "size": "server model", "pullable": True},
173
+ {"id": "lmstudio:mistralai/Mistral-Small-24B-Instruct-2501", "name": "Mistral Small 24B via LM Studio", "family": "Mistral", "tag": "local-large", "size": "server model", "pullable": True},
174
+ {"id": "lmstudio:meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B via LM Studio", "family": "Llama 3.x", "tag": "local-server", "size": "server model", "pullable": True},
175
+ {"id": "lmstudio:meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B via LM Studio", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
176
+ {"id": "lmstudio:meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B via LM Studio", "family": "Llama 3.x", "tag": "local-large", "size": "server model", "pullable": True},
177
+ {"id": "lmstudio:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B via LM Studio", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
178
+ ],
179
+ "llamacpp": [
180
+ {"id": "llamacpp:ggml-org/gpt-oss-20b-GGUF", "name": "GPT-OSS 20B GGUF via llama.cpp", "family": "GPT-OSS", "tag": "gguf-q4", "size": "gguf", "pullable": True},
181
+ {"id": "llamacpp:ggml-org/gpt-oss-120b-GGUF", "name": "GPT-OSS 120B GGUF via llama.cpp", "family": "GPT-OSS", "tag": "gguf-q4", "size": "gguf", "pullable": True},
182
+ {"id": "llamacpp:ggml-org/gemma-4-31B-it-GGUF", "name": "Gemma 4 31B GGUF via llama.cpp", "family": "Gemma 4", "tag": "gguf-q4", "size": "gguf", "pullable": True},
183
+ {"id": "llamacpp:Qwen/Qwen3-VL-4B-Instruct-GGUF", "name": "Qwen3-VL 4B GGUF via llama.cpp", "family": "Qwen3-VL", "tag": "gguf-vlm", "size": "gguf", "pullable": True},
184
+ {"id": "llamacpp:Qwen/Qwen3-VL-8B-Instruct-GGUF", "name": "Qwen3-VL 8B GGUF via llama.cpp", "family": "Qwen3-VL", "tag": "gguf-vlm", "size": "gguf", "pullable": True},
185
+ {"id": "llamacpp:unsloth/gemma-2-2b-it-GGUF", "name": "Gemma 2 2B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
186
+ {"id": "llamacpp:unsloth/gemma-2-9b-it-GGUF", "name": "Gemma 2 9B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
187
+ {"id": "llamacpp:unsloth/gemma-3-4b-it-GGUF", "name": "Gemma 3 4B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
188
+ {"id": "llamacpp:bartowski/Mistral-7B-Instruct-v0.3-GGUF", "name": "Mistral 7B GGUF via llama.cpp", "family": "Mistral", "tag": "gguf-q4", "size": "gguf", "pullable": True},
189
+ {"id": "llamacpp:bartowski/Phi-3.5-mini-instruct-GGUF", "name": "Phi 3.5 Mini GGUF via llama.cpp", "family": "Phi", "tag": "gguf-q4", "size": "gguf", "pullable": True},
190
+ {"id": "llamacpp:bartowski/phi-4-GGUF", "name": "Phi 4 GGUF via llama.cpp", "family": "Phi", "tag": "gguf-q4", "size": "gguf", "pullable": True},
191
+ {"id": "llamacpp:bartowski/Llama-3.2-3B-Instruct-GGUF", "name": "Llama 3.2 3B GGUF via llama.cpp", "family": "Llama 3.x", "tag": "gguf-q4", "size": "gguf", "pullable": True},
192
+ {"id": "llamacpp:bartowski/Llama-3.1-8B-Instruct-GGUF", "name": "Llama 3.1 8B GGUF via llama.cpp", "family": "Llama 3.1", "tag": "local-server", "size": "gguf", "pullable": True},
193
+ {"id": "llamacpp:bartowski/Llama-3.3-70B-Instruct-GGUF", "name": "Llama 3.3 70B GGUF via llama.cpp", "family": "Llama 3.x", "tag": "local-large", "size": "gguf", "pullable": True},
194
+ {"id": "llamacpp:bartowski/Llama-3.1-70B-Instruct-GGUF", "name": "Llama 3.1 70B GGUF via llama.cpp", "family": "Llama 3.1", "tag": "local-server", "size": "gguf", "pullable": True},
195
+ {"id": "llamacpp:unsloth/DeepSeek-R1-GGUF", "name": "DeepSeek-R1 GGUF via llama.cpp", "family": "DeepSeek", "tag": "gguf-q4", "size": "gguf", "pullable": True},
196
+ {"id": "llamacpp:bartowski/DeepSeek-Coder-V2-Lite-Instruct-GGUF", "name": "DeepSeek-Coder-V2 Lite GGUF via llama.cpp", "family": "DeepSeek", "tag": "gguf-q4", "size": "gguf", "pullable": True},
197
+ ],
198
+ }
199
+
200
+ MODEL_ENGINE_ALIASES = {
201
+ "gpt-oss-20b": {
202
+ "local_mlx": "mlx-community/gpt-oss-20b-MXFP4-Q8",
203
+ "ollama": "gpt-oss:20b",
204
+ "vllm": "openai/gpt-oss-20b",
205
+ "lmstudio": "openai/gpt-oss-20b",
206
+ "llamacpp": "ggml-org/gpt-oss-20b-GGUF",
207
+ },
208
+ "openai/gpt-oss-20b": {
209
+ "local_mlx": "mlx-community/gpt-oss-20b-MXFP4-Q8",
210
+ "ollama": "gpt-oss:20b",
211
+ "vllm": "openai/gpt-oss-20b",
212
+ "lmstudio": "openai/gpt-oss-20b",
213
+ "llamacpp": "ggml-org/gpt-oss-20b-GGUF",
214
+ },
215
+ "gpt-oss-120b": {
216
+ "local_mlx": "mlx-community/gpt-oss-120b-MXFP4-Q4",
217
+ "ollama": "gpt-oss:120b",
218
+ "vllm": "openai/gpt-oss-120b",
219
+ "lmstudio": "openai/gpt-oss-120b",
220
+ "llamacpp": "ggml-org/gpt-oss-120b-GGUF",
221
+ },
222
+ "openai/gpt-oss-120b": {
223
+ "local_mlx": "mlx-community/gpt-oss-120b-MXFP4-Q4",
224
+ "ollama": "gpt-oss:120b",
225
+ "vllm": "openai/gpt-oss-120b",
226
+ "lmstudio": "openai/gpt-oss-120b",
227
+ "llamacpp": "ggml-org/gpt-oss-120b-GGUF",
228
+ },
229
+ "gemma-4-31b-it-4bit": {
230
+ "local_mlx": "mlx-community/gemma-4-31b-it-4bit",
231
+ "ollama": "hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M",
232
+ "vllm": "suitch/gemma-4-31B-it-4bit",
233
+ "lmstudio": "ggml-org/gemma-4-31B-it-GGUF",
234
+ "llamacpp": "ggml-org/gemma-4-31B-it-GGUF",
235
+ },
236
+ "suitch/gemma-4-31b-it-4bit": {
237
+ "local_mlx": "mlx-community/gemma-4-31b-it-4bit",
238
+ "ollama": "hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M",
239
+ "vllm": "suitch/gemma-4-31B-it-4bit",
240
+ "lmstudio": "ggml-org/gemma-4-31B-it-GGUF",
241
+ "llamacpp": "ggml-org/gemma-4-31B-it-GGUF",
242
+ },
243
+ "mlx-community/gemma-4-31b-it-4bit": {
244
+ "local_mlx": "mlx-community/gemma-4-31b-it-4bit",
245
+ "ollama": "hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M",
246
+ "vllm": "suitch/gemma-4-31B-it-4bit",
247
+ "lmstudio": "ggml-org/gemma-4-31B-it-GGUF",
248
+ "llamacpp": "ggml-org/gemma-4-31B-it-GGUF",
249
+ },
250
+ }
251
+
252
+ _VERSIONED_MODEL_PATTERNS = (
253
+ ("gemma", re.compile(r"\bgemma[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
254
+ ("qwen", re.compile(r"\bqwen[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
255
+ ("llama", re.compile(r"\bllama[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
256
+ ("phi", re.compile(r"\bphi[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
257
+ )
258
+
259
+
260
+ def _version_tuple(raw: str) -> tuple[int, ...]:
261
+ return tuple(int(part) for part in raw.split(".") if part.isdigit())
262
+
263
+
264
+ def _model_family_version(model: Dict[str, object]) -> Optional[tuple[str, tuple[int, ...]]]:
265
+ text = " ".join(str(model.get(key) or "") for key in ("family", "name", "id"))
266
+ for family, pattern in _VERSIONED_MODEL_PATTERNS:
267
+ match = pattern.search(text)
268
+ if match:
269
+ version = _version_tuple(match.group(1))
270
+ if version:
271
+ return family, version
272
+ return None
273
+
274
+
275
+ def filter_lower_family_versions(models: List[Dict[str, object]]) -> List[Dict[str, object]]:
276
+ max_versions: Dict[str, tuple[int, ...]] = {}
277
+ detected: List[tuple[Dict[str, object], Optional[tuple[str, tuple[int, ...]]]]] = []
278
+ for model in models:
279
+ version_info = _model_family_version(model)
280
+ detected.append((model, version_info))
281
+ if not version_info:
282
+ continue
283
+ family, version = version_info
284
+ if version > max_versions.get(family, (0,)):
285
+ max_versions[family] = version
286
+ return [
287
+ model for model, version_info in detected
288
+ if not version_info or version_info[1] >= max_versions.get(version_info[0], version_info[1])
289
+ ]
@@ -0,0 +1,183 @@
1
+ """Hardware-aware local model recommendation.
2
+
3
+ Given a detected system profile (from :func:`auto_setup.probe`) this module
4
+ classifies every model in :data:`model_catalog.ENGINE_MODEL_CATALOG` into one of
5
+ three states — **recommended**, **compatible**, or **not_recommended** — and
6
+ groups the result by model family (Gemma, Qwen, Llama, Phi, DeepSeek, …).
7
+
8
+ It is intentionally pure and dependency-light: the only input is a plain dict
9
+ describing the machine, so it is fully unit-testable without touching real
10
+ hardware, and it does not import the FastAPI app or the runtime. The setup /
11
+ onboarding routers build the profile via ``auto_setup.probe().to_json()`` and
12
+ hand it here.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import re
18
+ from typing import Any, Dict, List, Optional
19
+
20
+ from latticeai.services.model_catalog import ENGINE_MODEL_CATALOG
21
+
22
+ # ── status vocabulary ─────────────────────────────────────────────────────────
23
+ RECOMMENDED = "recommended"
24
+ COMPATIBLE = "compatible"
25
+ NOT_RECOMMENDED = "not_recommended"
26
+
27
+ # Engines whose models load on any OS (given the engine binary) vs. MLX which is
28
+ # Apple-Silicon only. Used to decide platform availability before sizing.
29
+ _APPLE_ONLY_ENGINES = {"local_mlx"}
30
+
31
+ # Family display order for the grouped view (best/newest first within a brand).
32
+ _FAMILY_ORDER = [
33
+ "Gemma 4", "Gemma 3", "Gemma 2", "Gemma",
34
+ "Qwen3-VL", "Qwen2.5-VL", "Qwen2.5", "Qwen",
35
+ "Llama 3.x", "Llama 3.1", "Llama",
36
+ "Mistral", "Phi", "GPT-OSS", "DeepSeek", "SmolLM",
37
+ ]
38
+
39
+ _SIZE_RE = re.compile(r"([\d.]+)\s*(TB|GB|MB)", re.IGNORECASE)
40
+ _UNIT_GB = {"TB": 1024.0, "GB": 1.0, "MB": 1.0 / 1024.0}
41
+
42
+
43
+ def parse_size_gb(size: Any) -> Optional[float]:
44
+ """Parse a catalog ``size`` string (``"4.7GB"``, ``"963MB"``, ``"40GB+"``).
45
+
46
+ Returns ``None`` when the size is non-numeric (e.g. ``"pull required"`` or
47
+ ``"server model"``) so callers can treat it as "size unknown".
48
+ """
49
+ if not isinstance(size, str):
50
+ return None
51
+ match = _SIZE_RE.search(size)
52
+ if not match:
53
+ return None
54
+ value = float(match.group(1))
55
+ return round(value * _UNIT_GB[match.group(2).upper()], 3)
56
+
57
+
58
+ def estimated_ram_gb(size_gb: float) -> float:
59
+ """Rough RAM needed to run a model: weights + KV cache + OS working set."""
60
+ return round(size_gb * 1.25 + 2.5, 2)
61
+
62
+
63
+ def is_apple_silicon(profile: Dict[str, Any]) -> bool:
64
+ os_name = str(profile.get("os") or "").lower()
65
+ arch = str(profile.get("arch") or "").lower()
66
+ gpu = profile.get("gpu") or {}
67
+ vendor = str(gpu.get("vendor") or "").lower()
68
+ return os_name == "darwin" and (vendor == "apple" or arch in {"arm64", "aarch64"})
69
+
70
+
71
+ def _ram_gb(profile: Dict[str, Any]) -> float:
72
+ try:
73
+ return max(0.0, float(profile.get("ram_mb") or 0) / 1024.0)
74
+ except (TypeError, ValueError):
75
+ return 0.0
76
+
77
+
78
+ def _engine_available(engine: str, profile: Dict[str, Any]) -> bool:
79
+ if engine in _APPLE_ONLY_ENGINES:
80
+ return is_apple_silicon(profile)
81
+ # ollama / llamacpp / lmstudio / vllm run cross-platform once installed.
82
+ return True
83
+
84
+
85
+ def _classify_one(
86
+ model: Dict[str, Any],
87
+ *,
88
+ engine_available: bool,
89
+ ram_gb: float,
90
+ ) -> Dict[str, Any]:
91
+ size_gb = parse_size_gb(model.get("size"))
92
+ need_gb = estimated_ram_gb(size_gb) if size_gb is not None else None
93
+
94
+ if not engine_available:
95
+ status, reason = NOT_RECOMMENDED, "Requires Apple Silicon (MLX runtime)"
96
+ elif need_gb is None:
97
+ # Server/pull models have no fixed on-disk size — treat as compatible
98
+ # (the engine streams/pulls weights on demand).
99
+ status, reason = COMPATIBLE, "Served/pulled on demand by the engine"
100
+ elif ram_gb <= 0:
101
+ status, reason = COMPATIBLE, "Memory unknown — verify before loading"
102
+ elif need_gb <= ram_gb * 0.6:
103
+ status, reason = RECOMMENDED, f"Fits comfortably (~{need_gb:.0f} GB of {ram_gb:.0f} GB RAM)"
104
+ elif need_gb <= ram_gb * 0.9:
105
+ status, reason = COMPATIBLE, f"Runs but tight (~{need_gb:.0f} GB of {ram_gb:.0f} GB RAM)"
106
+ else:
107
+ status, reason = NOT_RECOMMENDED, f"Needs ~{need_gb:.0f} GB RAM (have {ram_gb:.0f} GB)"
108
+
109
+ return {
110
+ "id": model.get("id"),
111
+ "name": model.get("name"),
112
+ "family": model.get("family"),
113
+ "tag": model.get("tag"),
114
+ "size": model.get("size"),
115
+ "size_gb": size_gb,
116
+ "required_ram_gb": need_gb,
117
+ "status": status,
118
+ "reason": reason,
119
+ }
120
+
121
+
122
+ def _family_rank(family: str) -> int:
123
+ try:
124
+ return _FAMILY_ORDER.index(family)
125
+ except ValueError:
126
+ return len(_FAMILY_ORDER)
127
+
128
+
129
+ def recommend_catalog(profile: Dict[str, Any], *, engine: str = "local_mlx") -> Dict[str, Any]:
130
+ """Classify ``engine``'s catalog for the given machine ``profile``.
131
+
132
+ ``profile`` is a dict shaped like ``auto_setup.SystemProfile.to_json()``
133
+ (``os``, ``arch``, ``ram_mb``, ``gpu={vendor,vram_mb}`` …).
134
+ """
135
+ models = ENGINE_MODEL_CATALOG.get(engine, [])
136
+ engine_available = _engine_available(engine, profile)
137
+ ram_gb = _ram_gb(profile)
138
+
139
+ classified = [
140
+ _classify_one(m, engine_available=engine_available, ram_gb=ram_gb)
141
+ for m in models
142
+ ]
143
+
144
+ counts = {RECOMMENDED: 0, COMPATIBLE: 0, NOT_RECOMMENDED: 0}
145
+ for item in classified:
146
+ counts[item["status"]] += 1
147
+
148
+ # Group by family, ordered, with the best pick per family surfaced.
149
+ by_family: Dict[str, Dict[str, Any]] = {}
150
+ for item in classified:
151
+ fam = item["family"] or "Other"
152
+ bucket = by_family.setdefault(fam, {"family": fam, "models": [], "best": None})
153
+ bucket["models"].append(item)
154
+
155
+ def _best(models: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
156
+ # Prefer recommended, then compatible; within a tier prefer the largest
157
+ # model that still fits (more capable).
158
+ for tier in (RECOMMENDED, COMPATIBLE):
159
+ tier_models = [m for m in models if m["status"] == tier]
160
+ if tier_models:
161
+ return max(tier_models, key=lambda m: m["size_gb"] or 0.0)
162
+ return None
163
+
164
+ families = []
165
+ for fam in sorted(by_family, key=_family_rank):
166
+ bucket = by_family[fam]
167
+ bucket["best"] = _best(bucket["models"])
168
+ families.append(bucket)
169
+
170
+ # Overall top pick: the largest recommended model on this machine.
171
+ recommended_models = [m for m in classified if m["status"] == RECOMMENDED]
172
+ top_pick = max(recommended_models, key=lambda m: m["size_gb"] or 0.0) if recommended_models else None
173
+
174
+ return {
175
+ "engine": engine,
176
+ "engine_available": engine_available,
177
+ "apple_silicon": is_apple_silicon(profile),
178
+ "ram_gb": round(ram_gb, 1),
179
+ "counts": counts,
180
+ "top_pick": top_pick,
181
+ "families": families,
182
+ "models": classified,
183
+ }