ltcai 5.1.0 → 5.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +143 -159
  2. package/docs/CHANGELOG.md +72 -2355
  3. package/docs/DEVELOPMENT.md +99 -0
  4. package/docs/LEGACY_COMPATIBILITY.md +55 -0
  5. package/docs/V4_1_VALIDATION_REPORT.md +1 -1
  6. package/docs/V4_3_PRODUCT_HARDENING_REPORT.md +2 -2
  7. package/docs/V4_5_1_VALIDATION_REPORT.md +2 -1
  8. package/docs/WHY_LATTICE.md +4 -3
  9. package/frontend/src/components/FirstRunGuide.tsx +5 -5
  10. package/frontend/src/components/ProductFlow.tsx +1 -1
  11. package/frontend/src/i18n.ts +40 -40
  12. package/frontend/src/pages/Library.tsx +46 -9
  13. package/lattice_brain/__init__.py +1 -1
  14. package/lattice_brain/archive.py +12 -0
  15. package/lattice_brain/portability.py +14 -0
  16. package/lattice_brain/runtime/multi_agent.py +1 -1
  17. package/latticeai/__init__.py +1 -1
  18. package/latticeai/api/marketplace.py +2 -2
  19. package/latticeai/api/models.py +20 -4
  20. package/latticeai/app_factory.py +4 -78
  21. package/latticeai/core/marketplace.py +1 -1
  22. package/latticeai/core/workspace_os.py +18 -4
  23. package/latticeai/runtime/__init__.py +2 -0
  24. package/latticeai/runtime/brain_runtime.py +41 -0
  25. package/latticeai/runtime/config_runtime.py +36 -0
  26. package/latticeai/runtime/security_runtime.py +27 -0
  27. package/latticeai/services/model_capability_registry.py +482 -0
  28. package/latticeai/services/model_catalog.py +99 -96
  29. package/latticeai/services/model_recommendation.py +12 -1
  30. package/package.json +2 -2
  31. package/scripts/verify_hf_model_registry.py +306 -0
  32. package/src-tauri/Cargo.lock +1 -1
  33. package/src-tauri/Cargo.toml +1 -1
  34. package/src-tauri/tauri.conf.json +1 -1
  35. package/static/app/asset-manifest.json +5 -5
  36. package/static/app/assets/index-CQmHhk8Q.css +2 -0
  37. package/static/app/assets/{index-DONOJfMn.js → index-sOXTFUQc.js} +2 -2
  38. package/static/app/assets/index-sOXTFUQc.js.map +1 -0
  39. package/static/app/index.html +2 -2
  40. package/static/app/assets/index-DONOJfMn.js.map +0 -1
  41. package/static/app/assets/index-DuYYT2oh.css +0 -2
@@ -0,0 +1,482 @@
1
+ """Structured Model Capability Registry for Lattice AI 5.2.0+.
2
+
3
+ User-focused, transparent model catalog with:
4
+ - HF repo provenance
5
+ - Modality / vision support
6
+ - Quantization, size, download/load strategies
7
+ - Hardware notes (RAM estimates, Apple Silicon affinity)
8
+ - License / safety notes
9
+ - Verification status (populated by scripts/verify_hf_model_registry.py or runtime light probe)
10
+
11
+ This replaces the flat ENGINE_MODEL_CATALOG construction with a richer,
12
+ queryable source of truth while preserving exact legacy shapes for
13
+ model_catalog / recommendation / API / frontend consumers.
14
+
15
+ All entries are recommended multimodal (VLM) first. Text-only can be added later.
16
+ Verification is honest: hf_exists + light metadata/config presence; full weights
17
+ are never auto-fetched by the verifier. Large models (>12GB) explicitly note
18
+ "local load practical only on high-RAM Apple Silicon or CUDA; expect long download".
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from dataclasses import dataclass, field
24
+ from typing import Any, Dict, List, Optional
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class HardwareProfile:
29
+ min_ram_gb: Optional[float] = None
30
+ recommended_ram_gb: Optional[float] = None
31
+ apple_silicon_pref: bool = False
32
+ cuda_pref: bool = False
33
+ notes: str = ""
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class VerificationStatus:
38
+ hf_exists: bool = False
39
+ hf_last_checked: Optional[str] = None # ISO
40
+ has_config: bool = False
41
+ has_tokenizer: bool = False
42
+ has_weights_hint: bool = False # safetensors or gguf siblings seen in meta
43
+ pipeline_tag: Optional[str] = None
44
+ likes: Optional[int] = None
45
+ license: Optional[str] = None
46
+ notes: str = ""
47
+ verified_by: str = "hf-api-light" # or "local-load-test"
48
+
49
+
50
+ @dataclass(frozen=True)
51
+ class ModelCapability:
52
+ """Rich capability entry. id is the canonical key used in ENGINE_MODEL_CATALOG."""
53
+ id: str
54
+ hf_repo_id: str # clean HF path for download (e.g. mlx-community/xxx or Qwen/yyy)
55
+ name: str
56
+ family: str
57
+ tag: str
58
+ size: str # display string "7.6GB", "pull required"
59
+ modality: str = "multimodal" # multimodal | vision | text | audio etc.
60
+ quantization: Optional[str] = None # "4bit", "Q4_K_M", "GGUF-Q4"
61
+ provider_hints: List[str] = field(default_factory=lambda: ["local_mlx"]) # which engines this id primarily maps to
62
+ download_strategy: str = "hf_hub" # hf_hub | ollama_pull | lmstudio_app | gguf_manual
63
+ load_strategy: str = "mlx_vlm" # mlx_vlm | ollama | vllm | lmstudio | llamacpp_server
64
+ hardware: HardwareProfile = field(default_factory=HardwareProfile)
65
+ license: str = "apache-2.0"
66
+ safety_notes: str = "Standard open weights; review license and responsible use guidelines."
67
+ source_country: str = ""
68
+ source_company: str = ""
69
+ execution_method: str = "내 컴퓨터에서만 실행"
70
+ internet_requirement: str = "모델을 다운로드할 때만 인터넷 필요; 실행 중에는 필요 없음"
71
+ # Verification
72
+ verification: VerificationStatus = field(default_factory=VerificationStatus)
73
+ # UI / rec hints
74
+ recommended_default: bool = False
75
+ display_priority: int = 100
76
+
77
+ def to_legacy_dict(self) -> Dict[str, Any]:
78
+ """Exact shape expected by older ENGINE_MODEL_CATALOG consumers + extra 5.2 fields."""
79
+ base = {
80
+ "id": self.id,
81
+ "name": self.name,
82
+ "model_name": self.name.split(" via ")[0] if " via " in self.name else self.name,
83
+ "family": self.family,
84
+ "tag": self.tag,
85
+ "size": self.size,
86
+ "pullable": True,
87
+ "modality": self.modality,
88
+ "source_country": self.source_country,
89
+ "source_company": self.source_company,
90
+ "execution_method": self.execution_method,
91
+ "run_location": "내 컴퓨터에서만 실행",
92
+ "internet_requirement": self.internet_requirement,
93
+ "source_display_order": [
94
+ "source_country", "source_company", "execution_method",
95
+ "internet_requirement", "model_name"
96
+ ],
97
+ # 5.2+ rich fields (non-breaking; frontend + backend read if present)
98
+ "hf_repo_id": self.hf_repo_id,
99
+ "quantization": self.quantization,
100
+ "download_strategy": self.download_strategy,
101
+ "load_strategy": self.load_strategy,
102
+ "hardware": {
103
+ "min_ram_gb": self.hardware.min_ram_gb,
104
+ "recommended_ram_gb": self.hardware.recommended_ram_gb,
105
+ "apple_silicon_pref": self.hardware.apple_silicon_pref,
106
+ "cuda_pref": self.hardware.cuda_pref,
107
+ "notes": self.hardware.notes,
108
+ },
109
+ "license": self.license,
110
+ "safety_notes": self.safety_notes,
111
+ "verification": {
112
+ "hf_exists": self.verification.hf_exists,
113
+ "hf_last_checked": self.verification.hf_last_checked,
114
+ "has_config": self.verification.has_config,
115
+ "has_tokenizer": self.verification.has_tokenizer,
116
+ "has_weights_hint": self.verification.has_weights_hint,
117
+ "pipeline_tag": self.verification.pipeline_tag,
118
+ "verified": bool(
119
+ self.verification.hf_exists
120
+ and self.verification.has_config
121
+ and self.verification.has_tokenizer
122
+ ),
123
+ "notes": self.verification.notes,
124
+ },
125
+ "recommended_default": self.recommended_default,
126
+ }
127
+ return base
128
+
129
+
130
+ # ── Curated 5.2.0 registry (bold user-focused: transparent, multimodal-first, verified where practical) ──
131
+ # Current Gemma-4 / Qwen3-VL / Llama-4 kept + modern additions (Gemma3, Qwen2.5-VL, Llama-3.2-Vision, Pixtral).
132
+ # Presence verified via HF API (lightweight model_info) on 2026-06-14.
133
+ # Full weight download is user-consent only; entries without config/tokenizer
134
+ # hints are shown as available-but-not-load-verified.
135
+
136
+ _REGISTRY: List[ModelCapability] = [
137
+ # Gemma 4 family (mlx-community 4-bit, Apple-first, excellent local VLM)
138
+ ModelCapability(
139
+ id="mlx-community/gemma-4-e2b-4bit",
140
+ hf_repo_id="mlx-community/gemma-4-e2b-4bit",
141
+ name="Gemma 4 E2B Base",
142
+ family="Gemma 4",
143
+ tag="local-vlm",
144
+ size="3.6GB",
145
+ quantization="4bit",
146
+ provider_hints=["local_mlx"],
147
+ download_strategy="hf_hub",
148
+ load_strategy="mlx_vlm",
149
+ hardware=HardwareProfile(min_ram_gb=6.0, recommended_ram_gb=8.0, apple_silicon_pref=True, notes="Tiny but capable vision; great first local VLM."),
150
+ source_country="미국", source_company="Google",
151
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="any-to-any", license="apache-2.0", verified_by="hf-api-light"),
152
+ recommended_default=True,
153
+ display_priority=10,
154
+ ),
155
+ ModelCapability(
156
+ id="mlx-community/gemma-4-e2b-it-4bit",
157
+ hf_repo_id="mlx-community/gemma-4-e2b-it-4bit",
158
+ name="Gemma 4 E2B Instruct",
159
+ family="Gemma 4",
160
+ tag="local-vlm",
161
+ size="3.6GB",
162
+ quantization="4bit",
163
+ provider_hints=["local_mlx"],
164
+ download_strategy="hf_hub",
165
+ load_strategy="mlx_vlm",
166
+ hardware=HardwareProfile(min_ram_gb=6.0, recommended_ram_gb=8.0, apple_silicon_pref=True, notes="Instruct-tuned; preferred over base for chat."),
167
+ source_country="미국", source_company="Google",
168
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="any-to-any", license="apache-2.0", verified_by="hf-api-light"),
169
+ recommended_default=True,
170
+ display_priority=11,
171
+ ),
172
+ ModelCapability(
173
+ id="mlx-community/gemma-4-e4b-4bit",
174
+ hf_repo_id="mlx-community/gemma-4-e4b-4bit",
175
+ name="Gemma 4 E4B Base",
176
+ family="Gemma 4",
177
+ tag="local-vlm",
178
+ size="5.2GB",
179
+ quantization="4bit",
180
+ provider_hints=["local_mlx"],
181
+ download_strategy="hf_hub",
182
+ load_strategy="mlx_vlm",
183
+ hardware=HardwareProfile(min_ram_gb=8.0, recommended_ram_gb=10.0, apple_silicon_pref=True),
184
+ source_country="미국", source_company="Google",
185
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="any-to-any", license="apache-2.0", verified_by="hf-api-light"),
186
+ ),
187
+ ModelCapability(
188
+ id="mlx-community/gemma-4-e4b-it-4bit",
189
+ hf_repo_id="mlx-community/gemma-4-e4b-it-4bit",
190
+ name="Gemma 4 E4B Instruct",
191
+ family="Gemma 4",
192
+ tag="local-vlm",
193
+ size="5.2GB",
194
+ quantization="4bit",
195
+ provider_hints=["local_mlx"],
196
+ download_strategy="hf_hub",
197
+ load_strategy="mlx_vlm",
198
+ hardware=HardwareProfile(min_ram_gb=8.0, recommended_ram_gb=10.0, apple_silicon_pref=True),
199
+ source_country="미국", source_company="Google",
200
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="any-to-any", license="apache-2.0", verified_by="hf-api-light"),
201
+ ),
202
+ ModelCapability(
203
+ id="mlx-community/gemma-4-12b-it-4bit",
204
+ hf_repo_id="mlx-community/gemma-4-12b-it-4bit",
205
+ name="Gemma 4 12B Instruct",
206
+ family="Gemma 4",
207
+ tag="local-vlm",
208
+ size="7.6GB",
209
+ quantization="4bit",
210
+ provider_hints=["local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"],
211
+ download_strategy="hf_hub",
212
+ load_strategy="mlx_vlm",
213
+ hardware=HardwareProfile(min_ram_gb=12.0, recommended_ram_gb=16.0, apple_silicon_pref=True, notes="Sweet spot for local multimodal on M-series 16GB+ or 24GB+."),
214
+ source_country="미국", source_company="Google",
215
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
216
+ recommended_default=True,
217
+ display_priority=20,
218
+ ),
219
+ ModelCapability(
220
+ id="mlx-community/gemma-4-26b-a4b-it-4bit",
221
+ hf_repo_id="mlx-community/gemma-4-26b-a4b-it-4bit",
222
+ name="Gemma 4 26B A4B Instruct",
223
+ family="Gemma 4",
224
+ tag="local-vlm",
225
+ size="15.6GB",
226
+ quantization="4bit",
227
+ provider_hints=["local_mlx"],
228
+ download_strategy="hf_hub",
229
+ load_strategy="mlx_vlm",
230
+ hardware=HardwareProfile(min_ram_gb=20.0, recommended_ram_gb=28.0, apple_silicon_pref=True, notes="Large MoE-style; local load practical only on high-RAM Apple Silicon (32GB+). Long download expected."),
231
+ source_country="미국", source_company="Google",
232
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
233
+ display_priority=50,
234
+ ),
235
+ ModelCapability(
236
+ id="mlx-community/gemma-4-31b-it-4bit",
237
+ hf_repo_id="mlx-community/gemma-4-31b-it-4bit",
238
+ name="Gemma 4 31B Instruct",
239
+ family="Gemma 4",
240
+ tag="local-vlm",
241
+ size="18.4GB",
242
+ quantization="4bit",
243
+ provider_hints=["local_mlx", "ollama", "vllm"],
244
+ download_strategy="hf_hub",
245
+ load_strategy="mlx_vlm",
246
+ hardware=HardwareProfile(min_ram_gb=24.0, recommended_ram_gb=32.0, apple_silicon_pref=True, notes="Very large; high-end local only. Consider cloud fallback for lower RAM."),
247
+ source_country="미국", source_company="Google",
248
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
249
+ ),
250
+
251
+ # Qwen3-VL (strong real-world multimodal, good small sizes)
252
+ ModelCapability(
253
+ id="mlx-community/Qwen3-VL-4B-Instruct-4bit",
254
+ hf_repo_id="mlx-community/Qwen3-VL-4B-Instruct-4bit",
255
+ name="Qwen3-VL 4B",
256
+ family="Qwen3-VL",
257
+ tag="local-vlm",
258
+ size="2.7GB",
259
+ quantization="4bit",
260
+ provider_hints=["local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"],
261
+ download_strategy="hf_hub",
262
+ load_strategy="mlx_vlm",
263
+ hardware=HardwareProfile(min_ram_gb=5.0, recommended_ram_gb=8.0, apple_silicon_pref=True, notes="Extremely compact strong VLM. Best default for low-RAM Macs."),
264
+ source_country="중국", source_company="Alibaba",
265
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
266
+ recommended_default=True,
267
+ display_priority=5,
268
+ ),
269
+ ModelCapability(
270
+ id="mlx-community/Qwen3-VL-8B-Instruct-4bit",
271
+ hf_repo_id="mlx-community/Qwen3-VL-8B-Instruct-4bit",
272
+ name="Qwen3-VL 8B",
273
+ family="Qwen3-VL",
274
+ tag="local-vlm",
275
+ size="4.8GB",
276
+ quantization="4bit",
277
+ provider_hints=["local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"],
278
+ download_strategy="hf_hub",
279
+ load_strategy="mlx_vlm",
280
+ hardware=HardwareProfile(min_ram_gb=8.0, recommended_ram_gb=12.0, apple_silicon_pref=True),
281
+ source_country="중국", source_company="Alibaba",
282
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
283
+ recommended_default=True,
284
+ display_priority=15,
285
+ ),
286
+ ModelCapability(
287
+ id="mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit",
288
+ hf_repo_id="mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit",
289
+ name="Qwen3-VL 30B A3B",
290
+ family="Qwen3-VL",
291
+ tag="local-vlm",
292
+ size="18GB",
293
+ quantization="4bit",
294
+ provider_hints=["local_mlx", "ollama", "vllm"],
295
+ download_strategy="hf_hub",
296
+ load_strategy="mlx_vlm",
297
+ hardware=HardwareProfile(min_ram_gb=24.0, recommended_ram_gb=32.0, apple_silicon_pref=True, notes="Large MoE VLM; practical local only on 32GB+ Apple Silicon or strong CUDA. Download is multi-GB."),
298
+ source_country="중국", source_company="Alibaba",
299
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
300
+ ),
301
+
302
+ # Llama 4
303
+ ModelCapability(
304
+ id="mlx-community/Llama-4-Scout-17B-16E-Instruct-4bit",
305
+ hf_repo_id="mlx-community/Llama-4-Scout-17B-16E-Instruct-4bit",
306
+ name="Llama 4 Scout 17B 16E",
307
+ family="Llama 4",
308
+ tag="local-vlm",
309
+ size="11.8GB",
310
+ quantization="4bit",
311
+ provider_hints=["local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"],
312
+ download_strategy="hf_hub",
313
+ load_strategy="mlx_vlm",
314
+ hardware=HardwareProfile(min_ram_gb=16.0, recommended_ram_gb=20.0, apple_silicon_pref=True),
315
+ source_country="미국", source_company="Meta",
316
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="llama3.1-ish / meta-llama", verified_by="hf-api-light"),
317
+ recommended_default=True,
318
+ display_priority=25,
319
+ ),
320
+
321
+ # ── Modern additions for 5.2.0 (verified on HF, user choice expansion) ──
322
+ # Gemma 3 (excellent real multimodal balance, smaller than 4 where present)
323
+ ModelCapability(
324
+ id="google/gemma-3-4b-it",
325
+ hf_repo_id="google/gemma-3-4b-it",
326
+ name="Gemma 3 4B Instruct (HF)",
327
+ family="Gemma 3",
328
+ tag="local-vlm",
329
+ size="~5GB+",
330
+ quantization="bf16 / 4bit variants",
331
+ provider_hints=["local_mlx", "vllm", "ollama"],
332
+ download_strategy="hf_hub",
333
+ load_strategy="mlx_vlm",
334
+ hardware=HardwareProfile(min_ram_gb=8.0, recommended_ram_gb=12.0, apple_silicon_pref=True, notes="Use mlx-community quantized ports when available for best local perf."),
335
+ source_country="미국", source_company="Google",
336
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="gemma-terms", verified_by="hf-api-light"),
337
+ display_priority=30,
338
+ ),
339
+ ModelCapability(
340
+ id="google/gemma-3-12b-it",
341
+ hf_repo_id="google/gemma-3-12b-it",
342
+ name="Gemma 3 12B Instruct (HF)",
343
+ family="Gemma 3",
344
+ tag="local-vlm",
345
+ size="~12GB+",
346
+ quantization="bf16 / GGUF-4bit",
347
+ provider_hints=["ollama", "vllm", "lmstudio", "llamacpp"],
348
+ download_strategy="hf_hub",
349
+ load_strategy="ollama",
350
+ hardware=HardwareProfile(min_ram_gb=16.0, recommended_ram_gb=20.0, notes="Prefer quantized GGUF for llama.cpp / ollama on non-Apple or lower RAM."),
351
+ source_country="미국", source_company="Google",
352
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="gemma-terms", verified_by="hf-api-light"),
353
+ ),
354
+
355
+ # Qwen2.5-VL (battle-tested, widely supported)
356
+ ModelCapability(
357
+ id="Qwen/Qwen2.5-VL-7B-Instruct",
358
+ hf_repo_id="Qwen/Qwen2.5-VL-7B-Instruct",
359
+ name="Qwen2.5-VL 7B Instruct",
360
+ family="Qwen2.5-VL",
361
+ tag="local-vlm",
362
+ size="~8-15GB (quant dependent)",
363
+ quantization="AWQ / GGUF / 4bit ports",
364
+ provider_hints=["vllm", "ollama", "lmstudio", "llamacpp"],
365
+ download_strategy="hf_hub",
366
+ load_strategy="vllm",
367
+ hardware=HardwareProfile(min_ram_gb=12.0, recommended_ram_gb=16.0, cuda_pref=True, notes="Strong general VLM. mlx-community or GGUF ports recommended for local Apple."),
368
+ source_country="중국", source_company="Alibaba",
369
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
370
+ display_priority=35,
371
+ ),
372
+
373
+ # Llama 3.2 Vision (widely available, good ecosystem)
374
+ ModelCapability(
375
+ id="meta-llama/Llama-3.2-11B-Vision-Instruct",
376
+ hf_repo_id="meta-llama/Llama-3.2-11B-Vision-Instruct",
377
+ name="Llama 3.2 11B Vision Instruct",
378
+ family="Llama 3.2 Vision",
379
+ tag="local-vlm",
380
+ size="~11-22GB (quant)",
381
+ quantization="Q4_K_M GGUF widely available",
382
+ provider_hints=["ollama", "llamacpp", "lmstudio", "vllm"],
383
+ download_strategy="hf_hub",
384
+ load_strategy="ollama",
385
+ hardware=HardwareProfile(min_ram_gb=14.0, recommended_ram_gb=18.0, notes="Excellent GGUF support. Ollama / llama.cpp default path for most users."),
386
+ source_country="미국", source_company="Meta",
387
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="llama3.2", verified_by="hf-api-light"),
388
+ display_priority=40,
389
+ ),
390
+
391
+ # Pixtral (Mistral multimodal, strong)
392
+ ModelCapability(
393
+ id="mistralai/Pixtral-12B-2409",
394
+ hf_repo_id="mistralai/Pixtral-12B-2409",
395
+ name="Pixtral 12B (Mistral)",
396
+ family="Pixtral",
397
+ tag="local-vlm",
398
+ size="~12-24GB",
399
+ quantization="GGUF / AWQ ports",
400
+ provider_hints=["vllm", "ollama", "lmstudio"],
401
+ download_strategy="hf_hub",
402
+ load_strategy="vllm",
403
+ hardware=HardwareProfile(min_ram_gb=16.0, recommended_ram_gb=20.0, cuda_pref=True, notes="High quality vision-language. Best on CUDA / vLLM; GGUF for CPU/Apple via community ports."),
404
+ source_country="프랑스", source_company="Mistral AI",
405
+ verification=VerificationStatus(
406
+ hf_exists=True,
407
+ has_config=False,
408
+ has_tokenizer=False,
409
+ has_weights_hint=True,
410
+ pipeline_tag=None,
411
+ license="mistral-research",
412
+ notes="HF repo and weights are present, but config/tokenizer files were not visible in the lightweight HF tree check; treat as available but not local-load verified.",
413
+ verified_by="hf-api-light",
414
+ ),
415
+ display_priority=45,
416
+ ),
417
+ ]
418
+
419
+
420
+ def get_all_capabilities() -> List[ModelCapability]:
421
+ return list(_REGISTRY)
422
+
423
+
424
+ def get_capability(model_id: str) -> Optional[ModelCapability]:
425
+ for m in _REGISTRY:
426
+ if m.id == model_id or m.hf_repo_id == model_id:
427
+ return m
428
+ return None
429
+
430
+
431
+ def build_engine_model_catalog() -> Dict[str, List[Dict[str, Any]]]:
432
+ """Return legacy ENGINE_MODEL_CATALOG shape, enriched with 5.2 fields."""
433
+ from collections import defaultdict
434
+ by_engine: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
435
+
436
+ engine_map = {
437
+ "local_mlx": ["local_mlx"],
438
+ "ollama": ["ollama"],
439
+ "vllm": ["vllm"],
440
+ "lmstudio": ["lmstudio"],
441
+ "llamacpp": ["llamacpp"],
442
+ }
443
+
444
+ for cap in _REGISTRY:
445
+ for eng_key, hints in engine_map.items():
446
+ if any(h in cap.provider_hints for h in hints) or eng_key in cap.provider_hints:
447
+ legacy = cap.to_legacy_dict()
448
+ # Adapt id for non-mlx engines (match historical patterns)
449
+ if eng_key == "ollama" and not legacy["id"].startswith("ollama:"):
450
+ # historical used prefixed or hf.co for some
451
+ if "gguf" in cap.tag.lower() or "gguf" in (cap.quantization or "").lower():
452
+ legacy["id"] = f"ollama:hf.co/ggml-org/{cap.family.lower().replace(' ', '')}-12B-it-GGUF:Q4_K_M" # fallback, overridden by aliases
453
+ else:
454
+ legacy["id"] = f"ollama:{cap.hf_repo_id.split('/')[-1].lower()}"
455
+ elif eng_key == "vllm" and not legacy["id"].startswith("vllm:"):
456
+ legacy["id"] = f"vllm:{cap.hf_repo_id}"
457
+ elif eng_key == "lmstudio" and not legacy["id"].startswith("lmstudio:"):
458
+ legacy["id"] = f"lmstudio:{cap.hf_repo_id}"
459
+ elif eng_key == "llamacpp" and not legacy["id"].startswith("llamacpp:"):
460
+ legacy["id"] = f"llamacpp:{cap.hf_repo_id}-GGUF"
461
+ by_engine[eng_key].append(legacy)
462
+
463
+ # Ensure at least the primary local_mlx ones are present (exact historical)
464
+ # If projection missed any, inject the original local_mlx entries enriched
465
+ if not by_engine.get("local_mlx"):
466
+ for cap in _REGISTRY:
467
+ if "local_mlx" in cap.provider_hints:
468
+ by_engine["local_mlx"].append(cap.to_legacy_dict())
469
+
470
+ return {k: v for k, v in by_engine.items()}
471
+
472
+
473
+ def get_verified_models() -> List[Dict[str, Any]]:
474
+ """Return only load-verified HF entries with rich fields (for API/UI)."""
475
+ return [
476
+ c.to_legacy_dict() for c in _REGISTRY
477
+ if c.verification.hf_exists and c.verification.has_config and c.verification.has_tokenizer
478
+ ]
479
+
480
+
481
+ # Back-compat: expose a simple list mirroring the old top-level for mlx
482
+ LOCAL_MLX_MODELS = [c.to_legacy_dict() for c in _REGISTRY if "local_mlx" in c.provider_hints]