ltcai 5.0.0 → 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -55
- package/docs/CHANGELOG.md +96 -2354
- package/docs/TRUST_MODEL.md +66 -0
- package/docs/V4_1_VALIDATION_REPORT.md +1 -1
- package/docs/V4_3_PRODUCT_HARDENING_REPORT.md +2 -2
- package/docs/V4_5_1_VALIDATION_REPORT.md +2 -1
- package/docs/WHY_LATTICE.md +54 -0
- package/frontend/src/App.tsx +1 -1
- package/frontend/src/components/primitives.tsx +1 -1
- package/frontend/src/i18n.ts +6 -4
- package/frontend/src/pages/Library.tsx +29 -4
- package/frontend/src/pages/System.tsx +1 -1
- package/lattice_brain/__init__.py +1 -1
- package/lattice_brain/portability.py +11 -7
- package/lattice_brain/runtime/multi_agent.py +1 -1
- package/latticeai/__init__.py +1 -1
- package/latticeai/api/chat.py +19 -11
- package/latticeai/api/marketplace.py +2 -2
- package/latticeai/api/models.py +26 -4
- package/latticeai/api/security_dashboard.py +3 -15
- package/latticeai/api/static_routes.py +16 -0
- package/latticeai/app_factory.py +114 -40
- package/latticeai/core/audit.py +3 -1
- package/latticeai/core/builtin_hooks.py +7 -9
- package/latticeai/core/logging_safety.py +5 -21
- package/latticeai/core/marketplace.py +1 -1
- package/latticeai/core/security.py +67 -9
- package/latticeai/core/workspace_os.py +18 -4
- package/latticeai/services/model_capability_registry.py +483 -0
- package/latticeai/services/model_catalog.py +99 -96
- package/latticeai/services/model_recommendation.py +12 -1
- package/package.json +2 -2
- package/scripts/clean_release_artifacts.mjs +16 -1
- package/scripts/com.pts.claudecode.discord.plist +31 -0
- package/scripts/pts-claudecode-discord-bridge.mjs +189 -0
- package/scripts/run_integration_tests.mjs +91 -0
- package/scripts/start-pts-claudecode-discord.sh +51 -0
- package/scripts/verify_hf_model_registry.py +308 -0
- package/src-tauri/Cargo.lock +1 -1
- package/src-tauri/Cargo.toml +1 -1
- package/src-tauri/tauri.conf.json +3 -2
- package/static/app/asset-manifest.json +5 -5
- package/static/app/assets/index-CQmHhk8Q.css +2 -0
- package/static/app/assets/{index-FR1UZkCD.js → index-DsnfomFs.js} +2 -2
- package/static/app/assets/index-DsnfomFs.js.map +1 -0
- package/static/app/index.html +2 -2
- package/static/app/assets/index-DuYYT2oh.css +0 -2
- package/static/app/assets/index-FR1UZkCD.js.map +0 -1
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
"""Structured Model Capability Registry for Lattice AI 5.2.0+.
|
|
2
|
+
|
|
3
|
+
User-focused, transparent model catalog with:
|
|
4
|
+
- HF repo provenance
|
|
5
|
+
- Modality / vision support
|
|
6
|
+
- Quantization, size, download/load strategies
|
|
7
|
+
- Hardware notes (RAM estimates, Apple Silicon affinity)
|
|
8
|
+
- License / safety notes
|
|
9
|
+
- Verification status (populated by scripts/verify_hf_model_registry.py or runtime light probe)
|
|
10
|
+
|
|
11
|
+
This replaces the flat ENGINE_MODEL_CATALOG construction with a richer,
|
|
12
|
+
queryable source of truth while preserving exact legacy shapes for
|
|
13
|
+
model_catalog / recommendation / API / frontend consumers.
|
|
14
|
+
|
|
15
|
+
All entries are recommended multimodal (VLM) first. Text-only can be added later.
|
|
16
|
+
Verification is honest: hf_exists + light metadata/config presence; full weights
|
|
17
|
+
are never auto-fetched by the verifier. Large models (>12GB) explicitly note
|
|
18
|
+
"local load practical only on high-RAM Apple Silicon or CUDA; expect long download".
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import datetime as _dt
|
|
24
|
+
from dataclasses import dataclass, asdict, field
|
|
25
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(frozen=True)
|
|
29
|
+
class HardwareProfile:
|
|
30
|
+
min_ram_gb: Optional[float] = None
|
|
31
|
+
recommended_ram_gb: Optional[float] = None
|
|
32
|
+
apple_silicon_pref: bool = False
|
|
33
|
+
cuda_pref: bool = False
|
|
34
|
+
notes: str = ""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class VerificationStatus:
|
|
39
|
+
hf_exists: bool = False
|
|
40
|
+
hf_last_checked: Optional[str] = None # ISO
|
|
41
|
+
has_config: bool = False
|
|
42
|
+
has_tokenizer: bool = False
|
|
43
|
+
has_weights_hint: bool = False # safetensors or gguf siblings seen in meta
|
|
44
|
+
pipeline_tag: Optional[str] = None
|
|
45
|
+
likes: Optional[int] = None
|
|
46
|
+
license: Optional[str] = None
|
|
47
|
+
notes: str = ""
|
|
48
|
+
verified_by: str = "hf-api-light" # or "local-load-test"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True)
|
|
52
|
+
class ModelCapability:
|
|
53
|
+
"""Rich capability entry. id is the canonical key used in ENGINE_MODEL_CATALOG."""
|
|
54
|
+
id: str
|
|
55
|
+
hf_repo_id: str # clean HF path for download (e.g. mlx-community/xxx or Qwen/yyy)
|
|
56
|
+
name: str
|
|
57
|
+
family: str
|
|
58
|
+
tag: str
|
|
59
|
+
size: str # display string "7.6GB", "pull required"
|
|
60
|
+
modality: str = "multimodal" # multimodal | vision | text | audio etc.
|
|
61
|
+
quantization: Optional[str] = None # "4bit", "Q4_K_M", "GGUF-Q4"
|
|
62
|
+
provider_hints: List[str] = field(default_factory=lambda: ["local_mlx"]) # which engines this id primarily maps to
|
|
63
|
+
download_strategy: str = "hf_hub" # hf_hub | ollama_pull | lmstudio_app | gguf_manual
|
|
64
|
+
load_strategy: str = "mlx_vlm" # mlx_vlm | ollama | vllm | lmstudio | llamacpp_server
|
|
65
|
+
hardware: HardwareProfile = field(default_factory=HardwareProfile)
|
|
66
|
+
license: str = "apache-2.0"
|
|
67
|
+
safety_notes: str = "Standard open weights; review license and responsible use guidelines."
|
|
68
|
+
source_country: str = ""
|
|
69
|
+
source_company: str = ""
|
|
70
|
+
execution_method: str = "내 컴퓨터에서만 실행"
|
|
71
|
+
internet_requirement: str = "모델을 다운로드할 때만 인터넷 필요; 실행 중에는 필요 없음"
|
|
72
|
+
# Verification
|
|
73
|
+
verification: VerificationStatus = field(default_factory=VerificationStatus)
|
|
74
|
+
# UI / rec hints
|
|
75
|
+
recommended_default: bool = False
|
|
76
|
+
display_priority: int = 100
|
|
77
|
+
|
|
78
|
+
def to_legacy_dict(self) -> Dict[str, Any]:
|
|
79
|
+
"""Exact shape expected by older ENGINE_MODEL_CATALOG consumers + extra 5.2 fields."""
|
|
80
|
+
base = {
|
|
81
|
+
"id": self.id,
|
|
82
|
+
"name": self.name,
|
|
83
|
+
"model_name": self.name.split(" via ")[0] if " via " in self.name else self.name,
|
|
84
|
+
"family": self.family,
|
|
85
|
+
"tag": self.tag,
|
|
86
|
+
"size": self.size,
|
|
87
|
+
"pullable": True,
|
|
88
|
+
"modality": self.modality,
|
|
89
|
+
"source_country": self.source_country,
|
|
90
|
+
"source_company": self.source_company,
|
|
91
|
+
"execution_method": self.execution_method,
|
|
92
|
+
"run_location": "내 컴퓨터에서만 실행",
|
|
93
|
+
"internet_requirement": self.internet_requirement,
|
|
94
|
+
"source_display_order": [
|
|
95
|
+
"source_country", "source_company", "execution_method",
|
|
96
|
+
"internet_requirement", "model_name"
|
|
97
|
+
],
|
|
98
|
+
# 5.2+ rich fields (non-breaking; frontend + backend read if present)
|
|
99
|
+
"hf_repo_id": self.hf_repo_id,
|
|
100
|
+
"quantization": self.quantization,
|
|
101
|
+
"download_strategy": self.download_strategy,
|
|
102
|
+
"load_strategy": self.load_strategy,
|
|
103
|
+
"hardware": {
|
|
104
|
+
"min_ram_gb": self.hardware.min_ram_gb,
|
|
105
|
+
"recommended_ram_gb": self.hardware.recommended_ram_gb,
|
|
106
|
+
"apple_silicon_pref": self.hardware.apple_silicon_pref,
|
|
107
|
+
"cuda_pref": self.hardware.cuda_pref,
|
|
108
|
+
"notes": self.hardware.notes,
|
|
109
|
+
},
|
|
110
|
+
"license": self.license,
|
|
111
|
+
"safety_notes": self.safety_notes,
|
|
112
|
+
"verification": {
|
|
113
|
+
"hf_exists": self.verification.hf_exists,
|
|
114
|
+
"hf_last_checked": self.verification.hf_last_checked,
|
|
115
|
+
"has_config": self.verification.has_config,
|
|
116
|
+
"has_tokenizer": self.verification.has_tokenizer,
|
|
117
|
+
"has_weights_hint": self.verification.has_weights_hint,
|
|
118
|
+
"pipeline_tag": self.verification.pipeline_tag,
|
|
119
|
+
"verified": bool(
|
|
120
|
+
self.verification.hf_exists
|
|
121
|
+
and self.verification.has_config
|
|
122
|
+
and self.verification.has_tokenizer
|
|
123
|
+
),
|
|
124
|
+
"notes": self.verification.notes,
|
|
125
|
+
},
|
|
126
|
+
"recommended_default": self.recommended_default,
|
|
127
|
+
}
|
|
128
|
+
return base
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# ── Curated 5.2.0 registry (bold user-focused: transparent, multimodal-first, verified where practical) ──
|
|
132
|
+
# Current Gemma-4 / Qwen3-VL / Llama-4 kept + modern additions (Gemma3, Qwen2.5-VL, Llama-3.2-Vision, Pixtral).
|
|
133
|
+
# Presence verified via HF API (lightweight model_info) on 2026-06-14.
|
|
134
|
+
# Full weight download is user-consent only; entries without config/tokenizer
|
|
135
|
+
# hints are shown as available-but-not-load-verified.
|
|
136
|
+
|
|
137
|
+
_REGISTRY: List[ModelCapability] = [
|
|
138
|
+
# Gemma 4 family (mlx-community 4-bit, Apple-first, excellent local VLM)
|
|
139
|
+
ModelCapability(
|
|
140
|
+
id="mlx-community/gemma-4-e2b-4bit",
|
|
141
|
+
hf_repo_id="mlx-community/gemma-4-e2b-4bit",
|
|
142
|
+
name="Gemma 4 E2B Base",
|
|
143
|
+
family="Gemma 4",
|
|
144
|
+
tag="local-vlm",
|
|
145
|
+
size="3.6GB",
|
|
146
|
+
quantization="4bit",
|
|
147
|
+
provider_hints=["local_mlx"],
|
|
148
|
+
download_strategy="hf_hub",
|
|
149
|
+
load_strategy="mlx_vlm",
|
|
150
|
+
hardware=HardwareProfile(min_ram_gb=6.0, recommended_ram_gb=8.0, apple_silicon_pref=True, notes="Tiny but capable vision; great first local VLM."),
|
|
151
|
+
source_country="미국", source_company="Google",
|
|
152
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="any-to-any", license="apache-2.0", verified_by="hf-api-light"),
|
|
153
|
+
recommended_default=True,
|
|
154
|
+
display_priority=10,
|
|
155
|
+
),
|
|
156
|
+
ModelCapability(
|
|
157
|
+
id="mlx-community/gemma-4-e2b-it-4bit",
|
|
158
|
+
hf_repo_id="mlx-community/gemma-4-e2b-it-4bit",
|
|
159
|
+
name="Gemma 4 E2B Instruct",
|
|
160
|
+
family="Gemma 4",
|
|
161
|
+
tag="local-vlm",
|
|
162
|
+
size="3.6GB",
|
|
163
|
+
quantization="4bit",
|
|
164
|
+
provider_hints=["local_mlx"],
|
|
165
|
+
download_strategy="hf_hub",
|
|
166
|
+
load_strategy="mlx_vlm",
|
|
167
|
+
hardware=HardwareProfile(min_ram_gb=6.0, recommended_ram_gb=8.0, apple_silicon_pref=True, notes="Instruct-tuned; preferred over base for chat."),
|
|
168
|
+
source_country="미국", source_company="Google",
|
|
169
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="any-to-any", license="apache-2.0", verified_by="hf-api-light"),
|
|
170
|
+
recommended_default=True,
|
|
171
|
+
display_priority=11,
|
|
172
|
+
),
|
|
173
|
+
ModelCapability(
|
|
174
|
+
id="mlx-community/gemma-4-e4b-4bit",
|
|
175
|
+
hf_repo_id="mlx-community/gemma-4-e4b-4bit",
|
|
176
|
+
name="Gemma 4 E4B Base",
|
|
177
|
+
family="Gemma 4",
|
|
178
|
+
tag="local-vlm",
|
|
179
|
+
size="5.2GB",
|
|
180
|
+
quantization="4bit",
|
|
181
|
+
provider_hints=["local_mlx"],
|
|
182
|
+
download_strategy="hf_hub",
|
|
183
|
+
load_strategy="mlx_vlm",
|
|
184
|
+
hardware=HardwareProfile(min_ram_gb=8.0, recommended_ram_gb=10.0, apple_silicon_pref=True),
|
|
185
|
+
source_country="미국", source_company="Google",
|
|
186
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="any-to-any", license="apache-2.0", verified_by="hf-api-light"),
|
|
187
|
+
),
|
|
188
|
+
ModelCapability(
|
|
189
|
+
id="mlx-community/gemma-4-e4b-it-4bit",
|
|
190
|
+
hf_repo_id="mlx-community/gemma-4-e4b-it-4bit",
|
|
191
|
+
name="Gemma 4 E4B Instruct",
|
|
192
|
+
family="Gemma 4",
|
|
193
|
+
tag="local-vlm",
|
|
194
|
+
size="5.2GB",
|
|
195
|
+
quantization="4bit",
|
|
196
|
+
provider_hints=["local_mlx"],
|
|
197
|
+
download_strategy="hf_hub",
|
|
198
|
+
load_strategy="mlx_vlm",
|
|
199
|
+
hardware=HardwareProfile(min_ram_gb=8.0, recommended_ram_gb=10.0, apple_silicon_pref=True),
|
|
200
|
+
source_country="미국", source_company="Google",
|
|
201
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="any-to-any", license="apache-2.0", verified_by="hf-api-light"),
|
|
202
|
+
),
|
|
203
|
+
ModelCapability(
|
|
204
|
+
id="mlx-community/gemma-4-12b-it-4bit",
|
|
205
|
+
hf_repo_id="mlx-community/gemma-4-12b-it-4bit",
|
|
206
|
+
name="Gemma 4 12B Instruct",
|
|
207
|
+
family="Gemma 4",
|
|
208
|
+
tag="local-vlm",
|
|
209
|
+
size="7.6GB",
|
|
210
|
+
quantization="4bit",
|
|
211
|
+
provider_hints=["local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"],
|
|
212
|
+
download_strategy="hf_hub",
|
|
213
|
+
load_strategy="mlx_vlm",
|
|
214
|
+
hardware=HardwareProfile(min_ram_gb=12.0, recommended_ram_gb=16.0, apple_silicon_pref=True, notes="Sweet spot for local multimodal on M-series 16GB+ or 24GB+."),
|
|
215
|
+
source_country="미국", source_company="Google",
|
|
216
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
|
|
217
|
+
recommended_default=True,
|
|
218
|
+
display_priority=20,
|
|
219
|
+
),
|
|
220
|
+
ModelCapability(
|
|
221
|
+
id="mlx-community/gemma-4-26b-a4b-it-4bit",
|
|
222
|
+
hf_repo_id="mlx-community/gemma-4-26b-a4b-it-4bit",
|
|
223
|
+
name="Gemma 4 26B A4B Instruct",
|
|
224
|
+
family="Gemma 4",
|
|
225
|
+
tag="local-vlm",
|
|
226
|
+
size="15.6GB",
|
|
227
|
+
quantization="4bit",
|
|
228
|
+
provider_hints=["local_mlx"],
|
|
229
|
+
download_strategy="hf_hub",
|
|
230
|
+
load_strategy="mlx_vlm",
|
|
231
|
+
hardware=HardwareProfile(min_ram_gb=20.0, recommended_ram_gb=28.0, apple_silicon_pref=True, notes="Large MoE-style; local load practical only on high-RAM Apple Silicon (32GB+). Long download expected."),
|
|
232
|
+
source_country="미국", source_company="Google",
|
|
233
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
|
|
234
|
+
display_priority=50,
|
|
235
|
+
),
|
|
236
|
+
ModelCapability(
|
|
237
|
+
id="mlx-community/gemma-4-31b-it-4bit",
|
|
238
|
+
hf_repo_id="mlx-community/gemma-4-31b-it-4bit",
|
|
239
|
+
name="Gemma 4 31B Instruct",
|
|
240
|
+
family="Gemma 4",
|
|
241
|
+
tag="local-vlm",
|
|
242
|
+
size="18.4GB",
|
|
243
|
+
quantization="4bit",
|
|
244
|
+
provider_hints=["local_mlx", "ollama", "vllm"],
|
|
245
|
+
download_strategy="hf_hub",
|
|
246
|
+
load_strategy="mlx_vlm",
|
|
247
|
+
hardware=HardwareProfile(min_ram_gb=24.0, recommended_ram_gb=32.0, apple_silicon_pref=True, notes="Very large; high-end local only. Consider cloud fallback for lower RAM."),
|
|
248
|
+
source_country="미국", source_company="Google",
|
|
249
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
|
|
250
|
+
),
|
|
251
|
+
|
|
252
|
+
# Qwen3-VL (strong real-world multimodal, good small sizes)
|
|
253
|
+
ModelCapability(
|
|
254
|
+
id="mlx-community/Qwen3-VL-4B-Instruct-4bit",
|
|
255
|
+
hf_repo_id="mlx-community/Qwen3-VL-4B-Instruct-4bit",
|
|
256
|
+
name="Qwen3-VL 4B",
|
|
257
|
+
family="Qwen3-VL",
|
|
258
|
+
tag="local-vlm",
|
|
259
|
+
size="2.7GB",
|
|
260
|
+
quantization="4bit",
|
|
261
|
+
provider_hints=["local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"],
|
|
262
|
+
download_strategy="hf_hub",
|
|
263
|
+
load_strategy="mlx_vlm",
|
|
264
|
+
hardware=HardwareProfile(min_ram_gb=5.0, recommended_ram_gb=8.0, apple_silicon_pref=True, notes="Extremely compact strong VLM. Best default for low-RAM Macs."),
|
|
265
|
+
source_country="중국", source_company="Alibaba",
|
|
266
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
|
|
267
|
+
recommended_default=True,
|
|
268
|
+
display_priority=5,
|
|
269
|
+
),
|
|
270
|
+
ModelCapability(
|
|
271
|
+
id="mlx-community/Qwen3-VL-8B-Instruct-4bit",
|
|
272
|
+
hf_repo_id="mlx-community/Qwen3-VL-8B-Instruct-4bit",
|
|
273
|
+
name="Qwen3-VL 8B",
|
|
274
|
+
family="Qwen3-VL",
|
|
275
|
+
tag="local-vlm",
|
|
276
|
+
size="4.8GB",
|
|
277
|
+
quantization="4bit",
|
|
278
|
+
provider_hints=["local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"],
|
|
279
|
+
download_strategy="hf_hub",
|
|
280
|
+
load_strategy="mlx_vlm",
|
|
281
|
+
hardware=HardwareProfile(min_ram_gb=8.0, recommended_ram_gb=12.0, apple_silicon_pref=True),
|
|
282
|
+
source_country="중국", source_company="Alibaba",
|
|
283
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
|
|
284
|
+
recommended_default=True,
|
|
285
|
+
display_priority=15,
|
|
286
|
+
),
|
|
287
|
+
ModelCapability(
|
|
288
|
+
id="mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit",
|
|
289
|
+
hf_repo_id="mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit",
|
|
290
|
+
name="Qwen3-VL 30B A3B",
|
|
291
|
+
family="Qwen3-VL",
|
|
292
|
+
tag="local-vlm",
|
|
293
|
+
size="18GB",
|
|
294
|
+
quantization="4bit",
|
|
295
|
+
provider_hints=["local_mlx", "ollama", "vllm"],
|
|
296
|
+
download_strategy="hf_hub",
|
|
297
|
+
load_strategy="mlx_vlm",
|
|
298
|
+
hardware=HardwareProfile(min_ram_gb=24.0, recommended_ram_gb=32.0, apple_silicon_pref=True, notes="Large MoE VLM; practical local only on 32GB+ Apple Silicon or strong CUDA. Download is multi-GB."),
|
|
299
|
+
source_country="중국", source_company="Alibaba",
|
|
300
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
|
|
301
|
+
),
|
|
302
|
+
|
|
303
|
+
# Llama 4
|
|
304
|
+
ModelCapability(
|
|
305
|
+
id="mlx-community/Llama-4-Scout-17B-16E-Instruct-4bit",
|
|
306
|
+
hf_repo_id="mlx-community/Llama-4-Scout-17B-16E-Instruct-4bit",
|
|
307
|
+
name="Llama 4 Scout 17B 16E",
|
|
308
|
+
family="Llama 4",
|
|
309
|
+
tag="local-vlm",
|
|
310
|
+
size="11.8GB",
|
|
311
|
+
quantization="4bit",
|
|
312
|
+
provider_hints=["local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"],
|
|
313
|
+
download_strategy="hf_hub",
|
|
314
|
+
load_strategy="mlx_vlm",
|
|
315
|
+
hardware=HardwareProfile(min_ram_gb=16.0, recommended_ram_gb=20.0, apple_silicon_pref=True),
|
|
316
|
+
source_country="미국", source_company="Meta",
|
|
317
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="llama3.1-ish / meta-llama", verified_by="hf-api-light"),
|
|
318
|
+
recommended_default=True,
|
|
319
|
+
display_priority=25,
|
|
320
|
+
),
|
|
321
|
+
|
|
322
|
+
# ── Modern additions for 5.2.0 (verified on HF, user choice expansion) ──
|
|
323
|
+
# Gemma 3 (excellent real multimodal balance, smaller than 4 where present)
|
|
324
|
+
ModelCapability(
|
|
325
|
+
id="google/gemma-3-4b-it",
|
|
326
|
+
hf_repo_id="google/gemma-3-4b-it",
|
|
327
|
+
name="Gemma 3 4B Instruct (HF)",
|
|
328
|
+
family="Gemma 3",
|
|
329
|
+
tag="local-vlm",
|
|
330
|
+
size="~5GB+",
|
|
331
|
+
quantization="bf16 / 4bit variants",
|
|
332
|
+
provider_hints=["local_mlx", "vllm", "ollama"],
|
|
333
|
+
download_strategy="hf_hub",
|
|
334
|
+
load_strategy="mlx_vlm",
|
|
335
|
+
hardware=HardwareProfile(min_ram_gb=8.0, recommended_ram_gb=12.0, apple_silicon_pref=True, notes="Use mlx-community quantized ports when available for best local perf."),
|
|
336
|
+
source_country="미국", source_company="Google",
|
|
337
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="gemma-terms", verified_by="hf-api-light"),
|
|
338
|
+
display_priority=30,
|
|
339
|
+
),
|
|
340
|
+
ModelCapability(
|
|
341
|
+
id="google/gemma-3-12b-it",
|
|
342
|
+
hf_repo_id="google/gemma-3-12b-it",
|
|
343
|
+
name="Gemma 3 12B Instruct (HF)",
|
|
344
|
+
family="Gemma 3",
|
|
345
|
+
tag="local-vlm",
|
|
346
|
+
size="~12GB+",
|
|
347
|
+
quantization="bf16 / GGUF-4bit",
|
|
348
|
+
provider_hints=["ollama", "vllm", "lmstudio", "llamacpp"],
|
|
349
|
+
download_strategy="hf_hub",
|
|
350
|
+
load_strategy="ollama",
|
|
351
|
+
hardware=HardwareProfile(min_ram_gb=16.0, recommended_ram_gb=20.0, notes="Prefer quantized GGUF for llama.cpp / ollama on non-Apple or lower RAM."),
|
|
352
|
+
source_country="미국", source_company="Google",
|
|
353
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="gemma-terms", verified_by="hf-api-light"),
|
|
354
|
+
),
|
|
355
|
+
|
|
356
|
+
# Qwen2.5-VL (battle-tested, widely supported)
|
|
357
|
+
ModelCapability(
|
|
358
|
+
id="Qwen/Qwen2.5-VL-7B-Instruct",
|
|
359
|
+
hf_repo_id="Qwen/Qwen2.5-VL-7B-Instruct",
|
|
360
|
+
name="Qwen2.5-VL 7B Instruct",
|
|
361
|
+
family="Qwen2.5-VL",
|
|
362
|
+
tag="local-vlm",
|
|
363
|
+
size="~8-15GB (quant dependent)",
|
|
364
|
+
quantization="AWQ / GGUF / 4bit ports",
|
|
365
|
+
provider_hints=["vllm", "ollama", "lmstudio", "llamacpp"],
|
|
366
|
+
download_strategy="hf_hub",
|
|
367
|
+
load_strategy="vllm",
|
|
368
|
+
hardware=HardwareProfile(min_ram_gb=12.0, recommended_ram_gb=16.0, cuda_pref=True, notes="Strong general VLM. mlx-community or GGUF ports recommended for local Apple."),
|
|
369
|
+
source_country="중국", source_company="Alibaba",
|
|
370
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
|
|
371
|
+
display_priority=35,
|
|
372
|
+
),
|
|
373
|
+
|
|
374
|
+
# Llama 3.2 Vision (widely available, good ecosystem)
|
|
375
|
+
ModelCapability(
|
|
376
|
+
id="meta-llama/Llama-3.2-11B-Vision-Instruct",
|
|
377
|
+
hf_repo_id="meta-llama/Llama-3.2-11B-Vision-Instruct",
|
|
378
|
+
name="Llama 3.2 11B Vision Instruct",
|
|
379
|
+
family="Llama 3.2 Vision",
|
|
380
|
+
tag="local-vlm",
|
|
381
|
+
size="~11-22GB (quant)",
|
|
382
|
+
quantization="Q4_K_M GGUF widely available",
|
|
383
|
+
provider_hints=["ollama", "llamacpp", "lmstudio", "vllm"],
|
|
384
|
+
download_strategy="hf_hub",
|
|
385
|
+
load_strategy="ollama",
|
|
386
|
+
hardware=HardwareProfile(min_ram_gb=14.0, recommended_ram_gb=18.0, notes="Excellent GGUF support. Ollama / llama.cpp default path for most users."),
|
|
387
|
+
source_country="미국", source_company="Meta",
|
|
388
|
+
verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="llama3.2", verified_by="hf-api-light"),
|
|
389
|
+
display_priority=40,
|
|
390
|
+
),
|
|
391
|
+
|
|
392
|
+
# Pixtral (Mistral multimodal, strong)
|
|
393
|
+
ModelCapability(
|
|
394
|
+
id="mistralai/Pixtral-12B-2409",
|
|
395
|
+
hf_repo_id="mistralai/Pixtral-12B-2409",
|
|
396
|
+
name="Pixtral 12B (Mistral)",
|
|
397
|
+
family="Pixtral",
|
|
398
|
+
tag="local-vlm",
|
|
399
|
+
size="~12-24GB",
|
|
400
|
+
quantization="GGUF / AWQ ports",
|
|
401
|
+
provider_hints=["vllm", "ollama", "lmstudio"],
|
|
402
|
+
download_strategy="hf_hub",
|
|
403
|
+
load_strategy="vllm",
|
|
404
|
+
hardware=HardwareProfile(min_ram_gb=16.0, recommended_ram_gb=20.0, cuda_pref=True, notes="High quality vision-language. Best on CUDA / vLLM; GGUF for CPU/Apple via community ports."),
|
|
405
|
+
source_country="프랑스", source_company="Mistral AI",
|
|
406
|
+
verification=VerificationStatus(
|
|
407
|
+
hf_exists=True,
|
|
408
|
+
has_config=False,
|
|
409
|
+
has_tokenizer=False,
|
|
410
|
+
has_weights_hint=True,
|
|
411
|
+
pipeline_tag=None,
|
|
412
|
+
license="mistral-research",
|
|
413
|
+
notes="HF repo and weights are present, but config/tokenizer files were not visible in the lightweight HF tree check; treat as available but not local-load verified.",
|
|
414
|
+
verified_by="hf-api-light",
|
|
415
|
+
),
|
|
416
|
+
display_priority=45,
|
|
417
|
+
),
|
|
418
|
+
]
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def get_all_capabilities() -> List[ModelCapability]:
|
|
422
|
+
return list(_REGISTRY)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def get_capability(model_id: str) -> Optional[ModelCapability]:
|
|
426
|
+
for m in _REGISTRY:
|
|
427
|
+
if m.id == model_id or m.hf_repo_id == model_id:
|
|
428
|
+
return m
|
|
429
|
+
return None
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def build_engine_model_catalog() -> Dict[str, List[Dict[str, Any]]]:
|
|
433
|
+
"""Return legacy ENGINE_MODEL_CATALOG shape, enriched with 5.2 fields."""
|
|
434
|
+
from collections import defaultdict
|
|
435
|
+
by_engine: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
|
436
|
+
|
|
437
|
+
engine_map = {
|
|
438
|
+
"local_mlx": ["local_mlx"],
|
|
439
|
+
"ollama": ["ollama"],
|
|
440
|
+
"vllm": ["vllm"],
|
|
441
|
+
"lmstudio": ["lmstudio"],
|
|
442
|
+
"llamacpp": ["llamacpp"],
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
for cap in _REGISTRY:
|
|
446
|
+
for eng_key, hints in engine_map.items():
|
|
447
|
+
if any(h in cap.provider_hints for h in hints) or eng_key in cap.provider_hints:
|
|
448
|
+
legacy = cap.to_legacy_dict()
|
|
449
|
+
# Adapt id for non-mlx engines (match historical patterns)
|
|
450
|
+
if eng_key == "ollama" and not legacy["id"].startswith("ollama:"):
|
|
451
|
+
# historical used prefixed or hf.co for some
|
|
452
|
+
if "gguf" in cap.tag.lower() or "gguf" in (cap.quantization or "").lower():
|
|
453
|
+
legacy["id"] = f"ollama:hf.co/ggml-org/{cap.family.lower().replace(' ', '')}-12B-it-GGUF:Q4_K_M" # fallback, overridden by aliases
|
|
454
|
+
else:
|
|
455
|
+
legacy["id"] = f"ollama:{cap.hf_repo_id.split('/')[-1].lower()}"
|
|
456
|
+
elif eng_key == "vllm" and not legacy["id"].startswith("vllm:"):
|
|
457
|
+
legacy["id"] = f"vllm:{cap.hf_repo_id}"
|
|
458
|
+
elif eng_key == "lmstudio" and not legacy["id"].startswith("lmstudio:"):
|
|
459
|
+
legacy["id"] = f"lmstudio:{cap.hf_repo_id}"
|
|
460
|
+
elif eng_key == "llamacpp" and not legacy["id"].startswith("llamacpp:"):
|
|
461
|
+
legacy["id"] = f"llamacpp:{cap.hf_repo_id}-GGUF"
|
|
462
|
+
by_engine[eng_key].append(legacy)
|
|
463
|
+
|
|
464
|
+
# Ensure at least the primary local_mlx ones are present (exact historical)
|
|
465
|
+
# If projection missed any, inject the original local_mlx entries enriched
|
|
466
|
+
if not by_engine.get("local_mlx"):
|
|
467
|
+
for cap in _REGISTRY:
|
|
468
|
+
if "local_mlx" in cap.provider_hints:
|
|
469
|
+
by_engine["local_mlx"].append(cap.to_legacy_dict())
|
|
470
|
+
|
|
471
|
+
return {k: v for k, v in by_engine.items()}
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def get_verified_models() -> List[Dict[str, Any]]:
|
|
475
|
+
"""Return only load-verified HF entries with rich fields (for API/UI)."""
|
|
476
|
+
return [
|
|
477
|
+
c.to_legacy_dict() for c in _REGISTRY
|
|
478
|
+
if c.verification.hf_exists and c.verification.has_config and c.verification.has_tokenizer
|
|
479
|
+
]
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
# Back-compat: expose a simple list mirroring the old top-level for mlx
|
|
483
|
+
LOCAL_MLX_MODELS = [c.to_legacy_dict() for c in _REGISTRY if "local_mlx" in c.provider_hints]
|