ltcai 5.1.0 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,483 @@
1
+ """Structured Model Capability Registry for Lattice AI 5.2.0+.
2
+
3
+ User-focused, transparent model catalog with:
4
+ - HF repo provenance
5
+ - Modality / vision support
6
+ - Quantization, size, download/load strategies
7
+ - Hardware notes (RAM estimates, Apple Silicon affinity)
8
+ - License / safety notes
9
+ - Verification status (populated by scripts/verify_hf_model_registry.py or runtime light probe)
10
+
11
+ This replaces the flat ENGINE_MODEL_CATALOG construction with a richer,
12
+ queryable source of truth while preserving exact legacy shapes for
13
+ model_catalog / recommendation / API / frontend consumers.
14
+
15
+ All entries are recommended multimodal (VLM) first. Text-only can be added later.
16
+ Verification is honest: hf_exists + light metadata/config presence; full weights
17
+ are never auto-fetched by the verifier. Large models (>12GB) explicitly note
18
+ "local load practical only on high-RAM Apple Silicon or CUDA; expect long download".
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import datetime as _dt
24
+ from dataclasses import dataclass, asdict, field
25
+ from typing import Any, Dict, List, Optional, Tuple
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class HardwareProfile:
30
+ min_ram_gb: Optional[float] = None
31
+ recommended_ram_gb: Optional[float] = None
32
+ apple_silicon_pref: bool = False
33
+ cuda_pref: bool = False
34
+ notes: str = ""
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class VerificationStatus:
39
+ hf_exists: bool = False
40
+ hf_last_checked: Optional[str] = None # ISO
41
+ has_config: bool = False
42
+ has_tokenizer: bool = False
43
+ has_weights_hint: bool = False # safetensors or gguf siblings seen in meta
44
+ pipeline_tag: Optional[str] = None
45
+ likes: Optional[int] = None
46
+ license: Optional[str] = None
47
+ notes: str = ""
48
+ verified_by: str = "hf-api-light" # or "local-load-test"
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class ModelCapability:
53
+ """Rich capability entry. id is the canonical key used in ENGINE_MODEL_CATALOG."""
54
+ id: str
55
+ hf_repo_id: str # clean HF path for download (e.g. mlx-community/xxx or Qwen/yyy)
56
+ name: str
57
+ family: str
58
+ tag: str
59
+ size: str # display string "7.6GB", "pull required"
60
+ modality: str = "multimodal" # multimodal | vision | text | audio etc.
61
+ quantization: Optional[str] = None # "4bit", "Q4_K_M", "GGUF-Q4"
62
+ provider_hints: List[str] = field(default_factory=lambda: ["local_mlx"]) # which engines this id primarily maps to
63
+ download_strategy: str = "hf_hub" # hf_hub | ollama_pull | lmstudio_app | gguf_manual
64
+ load_strategy: str = "mlx_vlm" # mlx_vlm | ollama | vllm | lmstudio | llamacpp_server
65
+ hardware: HardwareProfile = field(default_factory=HardwareProfile)
66
+ license: str = "apache-2.0"
67
+ safety_notes: str = "Standard open weights; review license and responsible use guidelines."
68
+ source_country: str = ""
69
+ source_company: str = ""
70
+ execution_method: str = "내 컴퓨터에서만 실행"
71
+ internet_requirement: str = "모델을 다운로드할 때만 인터넷 필요; 실행 중에는 필요 없음"
72
+ # Verification
73
+ verification: VerificationStatus = field(default_factory=VerificationStatus)
74
+ # UI / rec hints
75
+ recommended_default: bool = False
76
+ display_priority: int = 100
77
+
78
+ def to_legacy_dict(self) -> Dict[str, Any]:
79
+ """Exact shape expected by older ENGINE_MODEL_CATALOG consumers + extra 5.2 fields."""
80
+ base = {
81
+ "id": self.id,
82
+ "name": self.name,
83
+ "model_name": self.name.split(" via ")[0] if " via " in self.name else self.name,
84
+ "family": self.family,
85
+ "tag": self.tag,
86
+ "size": self.size,
87
+ "pullable": True,
88
+ "modality": self.modality,
89
+ "source_country": self.source_country,
90
+ "source_company": self.source_company,
91
+ "execution_method": self.execution_method,
92
+ "run_location": "내 컴퓨터에서만 실행",
93
+ "internet_requirement": self.internet_requirement,
94
+ "source_display_order": [
95
+ "source_country", "source_company", "execution_method",
96
+ "internet_requirement", "model_name"
97
+ ],
98
+ # 5.2+ rich fields (non-breaking; frontend + backend read if present)
99
+ "hf_repo_id": self.hf_repo_id,
100
+ "quantization": self.quantization,
101
+ "download_strategy": self.download_strategy,
102
+ "load_strategy": self.load_strategy,
103
+ "hardware": {
104
+ "min_ram_gb": self.hardware.min_ram_gb,
105
+ "recommended_ram_gb": self.hardware.recommended_ram_gb,
106
+ "apple_silicon_pref": self.hardware.apple_silicon_pref,
107
+ "cuda_pref": self.hardware.cuda_pref,
108
+ "notes": self.hardware.notes,
109
+ },
110
+ "license": self.license,
111
+ "safety_notes": self.safety_notes,
112
+ "verification": {
113
+ "hf_exists": self.verification.hf_exists,
114
+ "hf_last_checked": self.verification.hf_last_checked,
115
+ "has_config": self.verification.has_config,
116
+ "has_tokenizer": self.verification.has_tokenizer,
117
+ "has_weights_hint": self.verification.has_weights_hint,
118
+ "pipeline_tag": self.verification.pipeline_tag,
119
+ "verified": bool(
120
+ self.verification.hf_exists
121
+ and self.verification.has_config
122
+ and self.verification.has_tokenizer
123
+ ),
124
+ "notes": self.verification.notes,
125
+ },
126
+ "recommended_default": self.recommended_default,
127
+ }
128
+ return base
129
+
130
+
131
+ # ── Curated 5.2.0 registry (bold user-focused: transparent, multimodal-first, verified where practical) ──
132
+ # Current Gemma-4 / Qwen3-VL / Llama-4 kept + modern additions (Gemma3, Qwen2.5-VL, Llama-3.2-Vision, Pixtral).
133
+ # Presence verified via HF API (lightweight model_info) on 2026-06-14.
134
+ # Full weight download is user-consent only; entries without config/tokenizer
135
+ # hints are shown as available-but-not-load-verified.
136
+
137
+ _REGISTRY: List[ModelCapability] = [
138
+ # Gemma 4 family (mlx-community 4-bit, Apple-first, excellent local VLM)
139
+ ModelCapability(
140
+ id="mlx-community/gemma-4-e2b-4bit",
141
+ hf_repo_id="mlx-community/gemma-4-e2b-4bit",
142
+ name="Gemma 4 E2B Base",
143
+ family="Gemma 4",
144
+ tag="local-vlm",
145
+ size="3.6GB",
146
+ quantization="4bit",
147
+ provider_hints=["local_mlx"],
148
+ download_strategy="hf_hub",
149
+ load_strategy="mlx_vlm",
150
+ hardware=HardwareProfile(min_ram_gb=6.0, recommended_ram_gb=8.0, apple_silicon_pref=True, notes="Tiny but capable vision; great first local VLM."),
151
+ source_country="미국", source_company="Google",
152
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="any-to-any", license="apache-2.0", verified_by="hf-api-light"),
153
+ recommended_default=True,
154
+ display_priority=10,
155
+ ),
156
+ ModelCapability(
157
+ id="mlx-community/gemma-4-e2b-it-4bit",
158
+ hf_repo_id="mlx-community/gemma-4-e2b-it-4bit",
159
+ name="Gemma 4 E2B Instruct",
160
+ family="Gemma 4",
161
+ tag="local-vlm",
162
+ size="3.6GB",
163
+ quantization="4bit",
164
+ provider_hints=["local_mlx"],
165
+ download_strategy="hf_hub",
166
+ load_strategy="mlx_vlm",
167
+ hardware=HardwareProfile(min_ram_gb=6.0, recommended_ram_gb=8.0, apple_silicon_pref=True, notes="Instruct-tuned; preferred over base for chat."),
168
+ source_country="미국", source_company="Google",
169
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="any-to-any", license="apache-2.0", verified_by="hf-api-light"),
170
+ recommended_default=True,
171
+ display_priority=11,
172
+ ),
173
+ ModelCapability(
174
+ id="mlx-community/gemma-4-e4b-4bit",
175
+ hf_repo_id="mlx-community/gemma-4-e4b-4bit",
176
+ name="Gemma 4 E4B Base",
177
+ family="Gemma 4",
178
+ tag="local-vlm",
179
+ size="5.2GB",
180
+ quantization="4bit",
181
+ provider_hints=["local_mlx"],
182
+ download_strategy="hf_hub",
183
+ load_strategy="mlx_vlm",
184
+ hardware=HardwareProfile(min_ram_gb=8.0, recommended_ram_gb=10.0, apple_silicon_pref=True),
185
+ source_country="미국", source_company="Google",
186
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="any-to-any", license="apache-2.0", verified_by="hf-api-light"),
187
+ ),
188
+ ModelCapability(
189
+ id="mlx-community/gemma-4-e4b-it-4bit",
190
+ hf_repo_id="mlx-community/gemma-4-e4b-it-4bit",
191
+ name="Gemma 4 E4B Instruct",
192
+ family="Gemma 4",
193
+ tag="local-vlm",
194
+ size="5.2GB",
195
+ quantization="4bit",
196
+ provider_hints=["local_mlx"],
197
+ download_strategy="hf_hub",
198
+ load_strategy="mlx_vlm",
199
+ hardware=HardwareProfile(min_ram_gb=8.0, recommended_ram_gb=10.0, apple_silicon_pref=True),
200
+ source_country="미국", source_company="Google",
201
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="any-to-any", license="apache-2.0", verified_by="hf-api-light"),
202
+ ),
203
+ ModelCapability(
204
+ id="mlx-community/gemma-4-12b-it-4bit",
205
+ hf_repo_id="mlx-community/gemma-4-12b-it-4bit",
206
+ name="Gemma 4 12B Instruct",
207
+ family="Gemma 4",
208
+ tag="local-vlm",
209
+ size="7.6GB",
210
+ quantization="4bit",
211
+ provider_hints=["local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"],
212
+ download_strategy="hf_hub",
213
+ load_strategy="mlx_vlm",
214
+ hardware=HardwareProfile(min_ram_gb=12.0, recommended_ram_gb=16.0, apple_silicon_pref=True, notes="Sweet spot for local multimodal on M-series 16GB+ or 24GB+."),
215
+ source_country="미국", source_company="Google",
216
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
217
+ recommended_default=True,
218
+ display_priority=20,
219
+ ),
220
+ ModelCapability(
221
+ id="mlx-community/gemma-4-26b-a4b-it-4bit",
222
+ hf_repo_id="mlx-community/gemma-4-26b-a4b-it-4bit",
223
+ name="Gemma 4 26B A4B Instruct",
224
+ family="Gemma 4",
225
+ tag="local-vlm",
226
+ size="15.6GB",
227
+ quantization="4bit",
228
+ provider_hints=["local_mlx"],
229
+ download_strategy="hf_hub",
230
+ load_strategy="mlx_vlm",
231
+ hardware=HardwareProfile(min_ram_gb=20.0, recommended_ram_gb=28.0, apple_silicon_pref=True, notes="Large MoE-style; local load practical only on high-RAM Apple Silicon (32GB+). Long download expected."),
232
+ source_country="미국", source_company="Google",
233
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
234
+ display_priority=50,
235
+ ),
236
+ ModelCapability(
237
+ id="mlx-community/gemma-4-31b-it-4bit",
238
+ hf_repo_id="mlx-community/gemma-4-31b-it-4bit",
239
+ name="Gemma 4 31B Instruct",
240
+ family="Gemma 4",
241
+ tag="local-vlm",
242
+ size="18.4GB",
243
+ quantization="4bit",
244
+ provider_hints=["local_mlx", "ollama", "vllm"],
245
+ download_strategy="hf_hub",
246
+ load_strategy="mlx_vlm",
247
+ hardware=HardwareProfile(min_ram_gb=24.0, recommended_ram_gb=32.0, apple_silicon_pref=True, notes="Very large; high-end local only. Consider cloud fallback for lower RAM."),
248
+ source_country="미국", source_company="Google",
249
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
250
+ ),
251
+
252
+ # Qwen3-VL (strong real-world multimodal, good small sizes)
253
+ ModelCapability(
254
+ id="mlx-community/Qwen3-VL-4B-Instruct-4bit",
255
+ hf_repo_id="mlx-community/Qwen3-VL-4B-Instruct-4bit",
256
+ name="Qwen3-VL 4B",
257
+ family="Qwen3-VL",
258
+ tag="local-vlm",
259
+ size="2.7GB",
260
+ quantization="4bit",
261
+ provider_hints=["local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"],
262
+ download_strategy="hf_hub",
263
+ load_strategy="mlx_vlm",
264
+ hardware=HardwareProfile(min_ram_gb=5.0, recommended_ram_gb=8.0, apple_silicon_pref=True, notes="Extremely compact strong VLM. Best default for low-RAM Macs."),
265
+ source_country="중국", source_company="Alibaba",
266
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
267
+ recommended_default=True,
268
+ display_priority=5,
269
+ ),
270
+ ModelCapability(
271
+ id="mlx-community/Qwen3-VL-8B-Instruct-4bit",
272
+ hf_repo_id="mlx-community/Qwen3-VL-8B-Instruct-4bit",
273
+ name="Qwen3-VL 8B",
274
+ family="Qwen3-VL",
275
+ tag="local-vlm",
276
+ size="4.8GB",
277
+ quantization="4bit",
278
+ provider_hints=["local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"],
279
+ download_strategy="hf_hub",
280
+ load_strategy="mlx_vlm",
281
+ hardware=HardwareProfile(min_ram_gb=8.0, recommended_ram_gb=12.0, apple_silicon_pref=True),
282
+ source_country="중국", source_company="Alibaba",
283
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
284
+ recommended_default=True,
285
+ display_priority=15,
286
+ ),
287
+ ModelCapability(
288
+ id="mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit",
289
+ hf_repo_id="mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit",
290
+ name="Qwen3-VL 30B A3B",
291
+ family="Qwen3-VL",
292
+ tag="local-vlm",
293
+ size="18GB",
294
+ quantization="4bit",
295
+ provider_hints=["local_mlx", "ollama", "vllm"],
296
+ download_strategy="hf_hub",
297
+ load_strategy="mlx_vlm",
298
+ hardware=HardwareProfile(min_ram_gb=24.0, recommended_ram_gb=32.0, apple_silicon_pref=True, notes="Large MoE VLM; practical local only on 32GB+ Apple Silicon or strong CUDA. Download is multi-GB."),
299
+ source_country="중국", source_company="Alibaba",
300
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
301
+ ),
302
+
303
+ # Llama 4
304
+ ModelCapability(
305
+ id="mlx-community/Llama-4-Scout-17B-16E-Instruct-4bit",
306
+ hf_repo_id="mlx-community/Llama-4-Scout-17B-16E-Instruct-4bit",
307
+ name="Llama 4 Scout 17B 16E",
308
+ family="Llama 4",
309
+ tag="local-vlm",
310
+ size="11.8GB",
311
+ quantization="4bit",
312
+ provider_hints=["local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"],
313
+ download_strategy="hf_hub",
314
+ load_strategy="mlx_vlm",
315
+ hardware=HardwareProfile(min_ram_gb=16.0, recommended_ram_gb=20.0, apple_silicon_pref=True),
316
+ source_country="미국", source_company="Meta",
317
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="llama3.1-ish / meta-llama", verified_by="hf-api-light"),
318
+ recommended_default=True,
319
+ display_priority=25,
320
+ ),
321
+
322
+ # ── Modern additions for 5.2.0 (verified on HF, user choice expansion) ──
323
+ # Gemma 3 (excellent real multimodal balance, smaller than 4 where present)
324
+ ModelCapability(
325
+ id="google/gemma-3-4b-it",
326
+ hf_repo_id="google/gemma-3-4b-it",
327
+ name="Gemma 3 4B Instruct (HF)",
328
+ family="Gemma 3",
329
+ tag="local-vlm",
330
+ size="~5GB+",
331
+ quantization="bf16 / 4bit variants",
332
+ provider_hints=["local_mlx", "vllm", "ollama"],
333
+ download_strategy="hf_hub",
334
+ load_strategy="mlx_vlm",
335
+ hardware=HardwareProfile(min_ram_gb=8.0, recommended_ram_gb=12.0, apple_silicon_pref=True, notes="Use mlx-community quantized ports when available for best local perf."),
336
+ source_country="미국", source_company="Google",
337
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="gemma-terms", verified_by="hf-api-light"),
338
+ display_priority=30,
339
+ ),
340
+ ModelCapability(
341
+ id="google/gemma-3-12b-it",
342
+ hf_repo_id="google/gemma-3-12b-it",
343
+ name="Gemma 3 12B Instruct (HF)",
344
+ family="Gemma 3",
345
+ tag="local-vlm",
346
+ size="~12GB+",
347
+ quantization="bf16 / GGUF-4bit",
348
+ provider_hints=["ollama", "vllm", "lmstudio", "llamacpp"],
349
+ download_strategy="hf_hub",
350
+ load_strategy="ollama",
351
+ hardware=HardwareProfile(min_ram_gb=16.0, recommended_ram_gb=20.0, notes="Prefer quantized GGUF for llama.cpp / ollama on non-Apple or lower RAM."),
352
+ source_country="미국", source_company="Google",
353
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="gemma-terms", verified_by="hf-api-light"),
354
+ ),
355
+
356
+ # Qwen2.5-VL (battle-tested, widely supported)
357
+ ModelCapability(
358
+ id="Qwen/Qwen2.5-VL-7B-Instruct",
359
+ hf_repo_id="Qwen/Qwen2.5-VL-7B-Instruct",
360
+ name="Qwen2.5-VL 7B Instruct",
361
+ family="Qwen2.5-VL",
362
+ tag="local-vlm",
363
+ size="~8-15GB (quant dependent)",
364
+ quantization="AWQ / GGUF / 4bit ports",
365
+ provider_hints=["vllm", "ollama", "lmstudio", "llamacpp"],
366
+ download_strategy="hf_hub",
367
+ load_strategy="vllm",
368
+ hardware=HardwareProfile(min_ram_gb=12.0, recommended_ram_gb=16.0, cuda_pref=True, notes="Strong general VLM. mlx-community or GGUF ports recommended for local Apple."),
369
+ source_country="중국", source_company="Alibaba",
370
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="apache-2.0", verified_by="hf-api-light"),
371
+ display_priority=35,
372
+ ),
373
+
374
+ # Llama 3.2 Vision (widely available, good ecosystem)
375
+ ModelCapability(
376
+ id="meta-llama/Llama-3.2-11B-Vision-Instruct",
377
+ hf_repo_id="meta-llama/Llama-3.2-11B-Vision-Instruct",
378
+ name="Llama 3.2 11B Vision Instruct",
379
+ family="Llama 3.2 Vision",
380
+ tag="local-vlm",
381
+ size="~11-22GB (quant)",
382
+ quantization="Q4_K_M GGUF widely available",
383
+ provider_hints=["ollama", "llamacpp", "lmstudio", "vllm"],
384
+ download_strategy="hf_hub",
385
+ load_strategy="ollama",
386
+ hardware=HardwareProfile(min_ram_gb=14.0, recommended_ram_gb=18.0, notes="Excellent GGUF support. Ollama / llama.cpp default path for most users."),
387
+ source_country="미국", source_company="Meta",
388
+ verification=VerificationStatus(hf_exists=True, has_config=True, has_tokenizer=True, has_weights_hint=True, pipeline_tag="image-text-to-text", license="llama3.2", verified_by="hf-api-light"),
389
+ display_priority=40,
390
+ ),
391
+
392
+ # Pixtral (Mistral multimodal, strong)
393
+ ModelCapability(
394
+ id="mistralai/Pixtral-12B-2409",
395
+ hf_repo_id="mistralai/Pixtral-12B-2409",
396
+ name="Pixtral 12B (Mistral)",
397
+ family="Pixtral",
398
+ tag="local-vlm",
399
+ size="~12-24GB",
400
+ quantization="GGUF / AWQ ports",
401
+ provider_hints=["vllm", "ollama", "lmstudio"],
402
+ download_strategy="hf_hub",
403
+ load_strategy="vllm",
404
+ hardware=HardwareProfile(min_ram_gb=16.0, recommended_ram_gb=20.0, cuda_pref=True, notes="High quality vision-language. Best on CUDA / vLLM; GGUF for CPU/Apple via community ports."),
405
+ source_country="프랑스", source_company="Mistral AI",
406
+ verification=VerificationStatus(
407
+ hf_exists=True,
408
+ has_config=False,
409
+ has_tokenizer=False,
410
+ has_weights_hint=True,
411
+ pipeline_tag=None,
412
+ license="mistral-research",
413
+ notes="HF repo and weights are present, but config/tokenizer files were not visible in the lightweight HF tree check; treat as available but not local-load verified.",
414
+ verified_by="hf-api-light",
415
+ ),
416
+ display_priority=45,
417
+ ),
418
+ ]
419
+
420
+
421
+ def get_all_capabilities() -> List[ModelCapability]:
422
+ return list(_REGISTRY)
423
+
424
+
425
+ def get_capability(model_id: str) -> Optional[ModelCapability]:
426
+ for m in _REGISTRY:
427
+ if m.id == model_id or m.hf_repo_id == model_id:
428
+ return m
429
+ return None
430
+
431
+
432
+ def build_engine_model_catalog() -> Dict[str, List[Dict[str, Any]]]:
433
+ """Return legacy ENGINE_MODEL_CATALOG shape, enriched with 5.2 fields."""
434
+ from collections import defaultdict
435
+ by_engine: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
436
+
437
+ engine_map = {
438
+ "local_mlx": ["local_mlx"],
439
+ "ollama": ["ollama"],
440
+ "vllm": ["vllm"],
441
+ "lmstudio": ["lmstudio"],
442
+ "llamacpp": ["llamacpp"],
443
+ }
444
+
445
+ for cap in _REGISTRY:
446
+ for eng_key, hints in engine_map.items():
447
+ if any(h in cap.provider_hints for h in hints) or eng_key in cap.provider_hints:
448
+ legacy = cap.to_legacy_dict()
449
+ # Adapt id for non-mlx engines (match historical patterns)
450
+ if eng_key == "ollama" and not legacy["id"].startswith("ollama:"):
451
+ # historical used prefixed or hf.co for some
452
+ if "gguf" in cap.tag.lower() or "gguf" in (cap.quantization or "").lower():
453
+ legacy["id"] = f"ollama:hf.co/ggml-org/{cap.family.lower().replace(' ', '')}-12B-it-GGUF:Q4_K_M" # fallback, overridden by aliases
454
+ else:
455
+ legacy["id"] = f"ollama:{cap.hf_repo_id.split('/')[-1].lower()}"
456
+ elif eng_key == "vllm" and not legacy["id"].startswith("vllm:"):
457
+ legacy["id"] = f"vllm:{cap.hf_repo_id}"
458
+ elif eng_key == "lmstudio" and not legacy["id"].startswith("lmstudio:"):
459
+ legacy["id"] = f"lmstudio:{cap.hf_repo_id}"
460
+ elif eng_key == "llamacpp" and not legacy["id"].startswith("llamacpp:"):
461
+ legacy["id"] = f"llamacpp:{cap.hf_repo_id}-GGUF"
462
+ by_engine[eng_key].append(legacy)
463
+
464
+ # Ensure at least the primary local_mlx ones are present (exact historical)
465
+ # If projection missed any, inject the original local_mlx entries enriched
466
+ if not by_engine.get("local_mlx"):
467
+ for cap in _REGISTRY:
468
+ if "local_mlx" in cap.provider_hints:
469
+ by_engine["local_mlx"].append(cap.to_legacy_dict())
470
+
471
+ return {k: v for k, v in by_engine.items()}
472
+
473
+
474
+ def get_verified_models() -> List[Dict[str, Any]]:
475
+ """Return only load-verified HF entries with rich fields (for API/UI)."""
476
+ return [
477
+ c.to_legacy_dict() for c in _REGISTRY
478
+ if c.verification.hf_exists and c.verification.has_config and c.verification.has_tokenizer
479
+ ]
480
+
481
+
482
+ # Back-compat: expose a simple list mirroring the old top-level for mlx
483
+ LOCAL_MLX_MODELS = [c.to_legacy_dict() for c in _REGISTRY if "local_mlx" in c.provider_hints]