ltcai 5.1.0 → 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -39
- package/docs/CHANGELOG.md +36 -2355
- package/docs/V4_1_VALIDATION_REPORT.md +1 -1
- package/docs/V4_3_PRODUCT_HARDENING_REPORT.md +2 -2
- package/docs/V4_5_1_VALIDATION_REPORT.md +2 -1
- package/frontend/src/pages/Library.tsx +29 -4
- package/lattice_brain/__init__.py +1 -1
- package/lattice_brain/runtime/multi_agent.py +1 -1
- package/latticeai/__init__.py +1 -1
- package/latticeai/api/marketplace.py +2 -2
- package/latticeai/api/models.py +20 -4
- package/latticeai/core/marketplace.py +1 -1
- package/latticeai/core/workspace_os.py +18 -4
- package/latticeai/services/model_capability_registry.py +483 -0
- package/latticeai/services/model_catalog.py +99 -96
- package/latticeai/services/model_recommendation.py +12 -1
- package/package.json +1 -1
- package/scripts/verify_hf_model_registry.py +308 -0
- package/src-tauri/Cargo.lock +1 -1
- package/src-tauri/Cargo.toml +1 -1
- package/src-tauri/tauri.conf.json +1 -1
- package/static/app/asset-manifest.json +5 -5
- package/static/app/assets/index-CQmHhk8Q.css +2 -0
- package/static/app/assets/{index-DONOJfMn.js → index-DsnfomFs.js} +1 -1
- package/static/app/assets/{index-DONOJfMn.js.map → index-DsnfomFs.js.map} +1 -1
- package/static/app/index.html +2 -2
- package/static/app/assets/index-DuYYT2oh.css +0 -2
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
"""Static local-model catalog, engine installers, and family-version filtering.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
3
|
+
5.2.0: Now sources the rich ENGINE_MODEL_CATALOG from the structured
|
|
4
|
+
model_capability_registry (single source of truth with HF verification,
|
|
5
|
+
download/load strategy, hardware, license, modality). Legacy flat shapes
|
|
6
|
+
preserved exactly for all downstream (recommendation, api, runtime, frontend).
|
|
7
|
+
|
|
8
|
+
The old inline _model() + ENGINE_MODEL_CATALOG data has been moved into
|
|
9
|
+
latticeai/services/model_capability_registry.py (see there for full 5.2 fields).
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
from __future__ import annotations
|
|
@@ -15,6 +15,16 @@ import re
|
|
|
15
15
|
import sys
|
|
16
16
|
from typing import Dict, List, Optional
|
|
17
17
|
|
|
18
|
+
# 5.2.0: Delegate catalog data to the structured capability registry (rich + verified).
|
|
19
|
+
# This keeps backward compat for every `from ...model_catalog import ENGINE_MODEL_CATALOG`.
|
|
20
|
+
from latticeai.services.model_capability_registry import (
|
|
21
|
+
build_engine_model_catalog as _build_engine_model_catalog,
|
|
22
|
+
get_all_capabilities as _get_all_capabilities,
|
|
23
|
+
get_capability as _get_capability,
|
|
24
|
+
get_verified_models as _get_verified_models,
|
|
25
|
+
LOCAL_MLX_MODELS as _LOCAL_MLX_MODELS,
|
|
26
|
+
)
|
|
27
|
+
|
|
18
28
|
ENGINE_INSTALLERS = {
|
|
19
29
|
"local_mlx": {
|
|
20
30
|
"command": [sys.executable, "-m", "pip", "install", "--upgrade", "mlx-vlm>=0.6.3", "mlx-lm", "huggingface_hub[cli]"],
|
|
@@ -61,95 +71,22 @@ ENGINE_INSTALLERS = {
|
|
|
61
71
|
},
|
|
62
72
|
}
|
|
63
73
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
"name": name,
|
|
81
|
-
"model_name": clean_model_name,
|
|
82
|
-
"family": family,
|
|
83
|
-
"tag": tag,
|
|
84
|
-
"size": size,
|
|
85
|
-
"pullable": pullable,
|
|
86
|
-
"modality": "multimodal",
|
|
87
|
-
"source_country": source_country,
|
|
88
|
-
"source_company": source_company,
|
|
89
|
-
"execution_method": execution_method,
|
|
90
|
-
"run_location": "내 컴퓨터에서만 실행",
|
|
91
|
-
"internet_requirement": internet_requirement,
|
|
92
|
-
"source_display_order": [
|
|
93
|
-
"source_country",
|
|
94
|
-
"source_company",
|
|
95
|
-
"execution_method",
|
|
96
|
-
"internet_requirement",
|
|
97
|
-
"model_name",
|
|
98
|
-
],
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
_RUNS_ON_THIS_COMPUTER = "내 컴퓨터에서만 실행"
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
ENGINE_MODEL_CATALOG = {
|
|
106
|
-
"local_mlx": [
|
|
107
|
-
_model("mlx-community/gemma-4-e2b-4bit", "Gemma 4 E2B Base", "Gemma 4", "local-vlm", "3.6GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
108
|
-
_model("mlx-community/gemma-4-e2b-it-4bit", "Gemma 4 E2B Instruct", "Gemma 4", "local-vlm", "3.6GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
109
|
-
_model("mlx-community/gemma-4-e4b-4bit", "Gemma 4 E4B Base", "Gemma 4", "local-vlm", "5.2GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
110
|
-
_model("mlx-community/gemma-4-e4b-it-4bit", "Gemma 4 E4B Instruct", "Gemma 4", "local-vlm", "5.2GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
111
|
-
_model("mlx-community/gemma-4-12b-it-4bit", "Gemma 4 12B Instruct", "Gemma 4", "local-vlm", "7.6GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
112
|
-
_model("mlx-community/gemma-4-26b-a4b-it-4bit", "Gemma 4 26B A4B Instruct", "Gemma 4", "local-vlm", "15.6GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
113
|
-
_model("mlx-community/gemma-4-31b-it-4bit", "Gemma 4 31B Instruct", "Gemma 4", "local-vlm", "18.4GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
114
|
-
_model("mlx-community/Qwen3-VL-4B-Instruct-4bit", "Qwen3-VL 4B", "Qwen3-VL", "local-vlm", "2.7GB", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
115
|
-
_model("mlx-community/Qwen3-VL-8B-Instruct-4bit", "Qwen3-VL 8B", "Qwen3-VL", "local-vlm", "4.8GB", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
116
|
-
_model("mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit", "Qwen3-VL 30B A3B", "Qwen3-VL", "local-vlm", "18GB", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
117
|
-
_model("mlx-community/Llama-4-Scout-17B-16E-Instruct-4bit", "Llama 4 Scout 17B 16E", "Llama 4", "local-vlm", "11.8GB", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
118
|
-
],
|
|
119
|
-
"ollama": [
|
|
120
|
-
_model("ollama:qwen3-vl:4b", "Qwen3-VL 4B via Ollama", "Qwen3-VL", "local-vlm", "pull required", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
121
|
-
_model("ollama:qwen3-vl:8b", "Qwen3-VL 8B via Ollama", "Qwen3-VL", "local-vlm", "pull required", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
122
|
-
_model("ollama:qwen3-vl:30b", "Qwen3-VL 30B via Ollama", "Qwen3-VL", "local-vlm", "pull required", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
123
|
-
_model("ollama:hf.co/ggml-org/gemma-4-12B-it-GGUF:Q4_K_M", "Gemma 4 12B Q4 via Ollama", "Gemma 4", "local-vlm", "7.9GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
124
|
-
_model("ollama:hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M", "Gemma 4 31B Q4 via Ollama", "Gemma 4", "local-vlm", "18.7GB", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
125
|
-
_model("ollama:hf.co/ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_M", "Llama 4 Scout Q4 via Ollama", "Llama 4", "local-vlm", "12GB", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
126
|
-
],
|
|
127
|
-
"vllm": [
|
|
128
|
-
_model("vllm:Qwen/Qwen3-VL-4B-Instruct", "Qwen3-VL 4B via vLLM", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
129
|
-
_model("vllm:Qwen/Qwen3-VL-8B-Instruct", "Qwen3-VL 8B via vLLM", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
130
|
-
_model("vllm:Qwen/Qwen3-VL-30B-A3B-Instruct", "Qwen3-VL 30B A3B via vLLM", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
131
|
-
_model("vllm:google/gemma-4-12b-it", "Gemma 4 12B via vLLM", "Gemma 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
132
|
-
_model("vllm:suitch/gemma-4-31B-it-4bit", "Gemma 4 31B via vLLM", "Gemma 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
133
|
-
_model("vllm:meta-llama/Llama-4-Scout-17B-16E-Instruct", "Llama 4 Scout via vLLM", "Llama 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
134
|
-
],
|
|
135
|
-
"lmstudio": [
|
|
136
|
-
_model("lmstudio:Qwen/Qwen3-VL-4B-Instruct", "Qwen3-VL 4B via LM Studio", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
137
|
-
_model("lmstudio:Qwen/Qwen3-VL-8B-Instruct", "Qwen3-VL 8B via LM Studio", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
138
|
-
_model("lmstudio:Qwen/Qwen3-VL-30B-A3B-Instruct", "Qwen3-VL 30B A3B via LM Studio", "Qwen3-VL", "local-vlm", "실행 도구에서 관리", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
139
|
-
_model("lmstudio:ggml-org/gemma-4-12B-it-GGUF", "Gemma 4 12B 4-bit via LM Studio", "Gemma 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
140
|
-
_model("lmstudio:ggml-org/gemma-4-31B-it-GGUF", "Gemma 4 31B 4-bit via LM Studio", "Gemma 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
141
|
-
_model("lmstudio:meta-llama/Llama-4-Scout-17B-16E-Instruct", "Llama 4 Scout via LM Studio", "Llama 4", "local-vlm", "실행 도구에서 관리", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
142
|
-
],
|
|
143
|
-
"llamacpp": [
|
|
144
|
-
_model("llamacpp:Qwen/Qwen3-VL-4B-Instruct-GGUF", "Qwen3-VL 4B GGUF via llama.cpp", "Qwen3-VL", "gguf-vlm", "gguf", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
145
|
-
_model("llamacpp:Qwen/Qwen3-VL-8B-Instruct-GGUF", "Qwen3-VL 8B GGUF via llama.cpp", "Qwen3-VL", "gguf-vlm", "gguf", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
146
|
-
_model("llamacpp:Qwen/Qwen3-VL-30B-A3B-Instruct-GGUF", "Qwen3-VL 30B GGUF via llama.cpp", "Qwen3-VL", "gguf-vlm", "gguf", source_country="중국", source_company="Alibaba", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
147
|
-
_model("llamacpp:ggml-org/gemma-4-12B-it-GGUF", "Gemma 4 12B GGUF via llama.cpp", "Gemma 4", "gguf-vlm", "gguf", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
148
|
-
_model("llamacpp:ggml-org/gemma-4-31B-it-GGUF", "Gemma 4 31B GGUF via llama.cpp", "Gemma 4", "gguf-vlm", "gguf", source_country="미국", source_company="Google", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
149
|
-
_model("llamacpp:ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF", "Llama 4 Scout GGUF via llama.cpp", "Llama 4", "gguf-vlm", "gguf", source_country="미국", source_company="Meta", execution_method=_RUNS_ON_THIS_COMPUTER),
|
|
150
|
-
],
|
|
151
|
-
}
|
|
152
|
-
|
|
74
|
+
# 5.2.0 delegation: the rich catalog (with verification, hf_repo_id, strategies, hardware, license etc)
|
|
75
|
+
# is defined in model_capability_registry. We build the legacy-shaped ENGINE_MODEL_CATALOG here
|
|
76
|
+
# at import time so every existing consumer (runtime, api, recommendation, tests) is unaffected.
|
|
77
|
+
#
|
|
78
|
+
# The *raw* registry projection keeps every capability (incl. legacy generations
|
|
79
|
+
# like Gemma 3 / Qwen2.5-VL / Pixtral) for transparency + HF verification. The
|
|
80
|
+
# user-facing ENGINE_MODEL_CATALOG below is then narrowed to the aggressive 5.2.0
|
|
81
|
+
# policy (latest family generations only, no text-only/legacy weights) and the
|
|
82
|
+
# engine-specific ids/sizes are normalised. See `_finalize_engine_catalog`.
|
|
83
|
+
_RAW_ENGINE_MODEL_CATALOG: Dict[str, List[Dict[str, object]]] = _build_engine_model_catalog()
|
|
84
|
+
# Filled in at module end once the blocklist, alias map and family-version filter
|
|
85
|
+
# are all defined; declared here so the public name exists for static readers.
|
|
86
|
+
ENGINE_MODEL_CATALOG: Dict[str, List[Dict[str, object]]] = {}
|
|
87
|
+
|
|
88
|
+
# Historical aliases preserved (used by _recommended_with_engine_options and resolution).
|
|
89
|
+
# These can be enriched later from registry if needed; kept verbatim for safety.
|
|
153
90
|
MODEL_ENGINE_ALIASES = {
|
|
154
91
|
"gemma-4-12b-it-4bit": {
|
|
155
92
|
"local_mlx": "mlx-community/gemma-4-12b-it-4bit",
|
|
@@ -202,6 +139,14 @@ MODEL_ENGINE_ALIASES = {
|
|
|
202
139
|
},
|
|
203
140
|
}
|
|
204
141
|
|
|
142
|
+
# Also expose registry helpers directly from here for consumers who want the rich objects
|
|
143
|
+
get_all_capabilities = _get_all_capabilities
|
|
144
|
+
get_capability = _get_capability
|
|
145
|
+
get_verified_models = _get_verified_models
|
|
146
|
+
|
|
147
|
+
# Convenience re-export for tests / places that did `from ...model_catalog import LOCAL_MLX_MODELS`
|
|
148
|
+
LOCAL_MLX_MODELS = _LOCAL_MLX_MODELS # type: ignore[name-defined]
|
|
149
|
+
|
|
205
150
|
_VERSIONED_MODEL_PATTERNS = (
|
|
206
151
|
("gemma", re.compile(r"\bgemma[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
|
|
207
152
|
("qwen", re.compile(r"\bqwen[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
|
|
@@ -239,3 +184,61 @@ def filter_lower_family_versions(models: List[Dict[str, object]]) -> List[Dict[s
|
|
|
239
184
|
model for model, version_info in detected
|
|
240
185
|
if not version_info or version_info[1] >= max_versions.get(version_info[0], version_info[1])
|
|
241
186
|
]
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
# ── 5.2.0 user-facing catalog assembly ────────────────────────────────────────
|
|
190
|
+
# Legacy/text-only generations stay in the capability registry (for transparency
|
|
191
|
+
# and HF verification) but must never be surfaced in the model picker. Anything
|
|
192
|
+
# whose id contains one of these fragments is dropped from ENGINE_MODEL_CATALOG.
|
|
193
|
+
_BLOCKED_CATALOG_FRAGMENTS = (
|
|
194
|
+
"gemma-3", "gemma3", "gemma-2", "gemma2",
|
|
195
|
+
"qwen2.5", "qwen-2.5", "qwen2-5",
|
|
196
|
+
"llama-3", "llama3.2", "llama-3.2",
|
|
197
|
+
"pixtral", "mistral",
|
|
198
|
+
"smollm", "gpt-oss", "phi-",
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _is_blocked_catalog_id(model: Dict[str, object]) -> bool:
|
|
203
|
+
ident = str(model.get("id") or "").lower()
|
|
204
|
+
return any(fragment in ident for fragment in _BLOCKED_CATALOG_FRAGMENTS)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _normalize_engine_entry(engine: str, model: Dict[str, object]) -> Dict[str, object]:
|
|
208
|
+
"""Apply historical engine-specific id + size conventions to a raw entry.
|
|
209
|
+
|
|
210
|
+
* Non-MLX engines resolve to their canonical packaged id via
|
|
211
|
+
:data:`MODEL_ENGINE_ALIASES` (e.g. ollama → ``hf.co/ggml-org/...GGUF``).
|
|
212
|
+
* Server / tool-managed engines advertise no fixed on-disk size, so the
|
|
213
|
+
execution tool validates the exact weights at pull time.
|
|
214
|
+
"""
|
|
215
|
+
if engine == "local_mlx":
|
|
216
|
+
return model
|
|
217
|
+
entry = dict(model)
|
|
218
|
+
hf_repo = str(entry.get("hf_repo_id") or "")
|
|
219
|
+
short = hf_repo.split("/")[-1].lower()
|
|
220
|
+
aliases = MODEL_ENGINE_ALIASES.get(short) or MODEL_ENGINE_ALIASES.get(hf_repo.lower())
|
|
221
|
+
mapped = aliases.get(engine) if aliases else None
|
|
222
|
+
if mapped:
|
|
223
|
+
entry["id"] = f"{engine}:{mapped}"
|
|
224
|
+
# Tool-managed engines (ollama/vllm/lmstudio/llamacpp) pull on demand; the
|
|
225
|
+
# registry's MLX on-disk size does not apply to them.
|
|
226
|
+
entry["size"] = "실행 도구에서 관리"
|
|
227
|
+
return entry
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _finalize_engine_catalog(
|
|
231
|
+
raw: Dict[str, List[Dict[str, object]]],
|
|
232
|
+
) -> Dict[str, List[Dict[str, object]]]:
|
|
233
|
+
final: Dict[str, List[Dict[str, object]]] = {}
|
|
234
|
+
for engine, models in raw.items():
|
|
235
|
+
kept = [
|
|
236
|
+
_normalize_engine_entry(engine, m)
|
|
237
|
+
for m in models
|
|
238
|
+
if not _is_blocked_catalog_id(m)
|
|
239
|
+
]
|
|
240
|
+
final[engine] = filter_lower_family_versions(kept)
|
|
241
|
+
return final
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
ENGINE_MODEL_CATALOG = _finalize_engine_catalog(_RAW_ENGINE_MODEL_CATALOG)
|
|
@@ -112,7 +112,7 @@ def _classify_one(
|
|
|
112
112
|
else:
|
|
113
113
|
status, reason = NOT_RECOMMENDED, f"권장 메모리가 부족합니다 (~{need_gb:.0f} GB 필요, 현재 {ram_gb:.0f} GB)"
|
|
114
114
|
|
|
115
|
-
|
|
115
|
+
rich = {
|
|
116
116
|
"id": model.get("id"),
|
|
117
117
|
"name": model.get("name"),
|
|
118
118
|
"model_name": model.get("model_name") or model.get("name"),
|
|
@@ -131,7 +131,18 @@ def _classify_one(
|
|
|
131
131
|
"internet_requirement": model.get("internet_requirement"),
|
|
132
132
|
"source_display_order": model.get("source_display_order"),
|
|
133
133
|
"runtime_compatibility": runtime,
|
|
134
|
+
# 5.2+ user-focused transparency
|
|
135
|
+
"hf_repo_id": model.get("hf_repo_id"),
|
|
136
|
+
"quantization": model.get("quantization"),
|
|
137
|
+
"download_strategy": model.get("download_strategy"),
|
|
138
|
+
"load_strategy": model.get("load_strategy"),
|
|
139
|
+
"hardware": model.get("hardware"),
|
|
140
|
+
"license": model.get("license"),
|
|
141
|
+
"safety_notes": model.get("safety_notes"),
|
|
142
|
+
"verification": model.get("verification"),
|
|
143
|
+
"recommended_default": model.get("recommended_default", False),
|
|
134
144
|
}
|
|
145
|
+
return rich
|
|
135
146
|
|
|
136
147
|
|
|
137
148
|
def _family_rank(family: str) -> int:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ltcai",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.2.0",
|
|
4
4
|
"description": "Lattice AI — local-first Living Brain workspace (conversation, durable memory, hybrid search, agents, advanced graph exploration, portable encrypted brain archives)",
|
|
5
5
|
"homepage": "https://github.com/TaeSooPark-PTS/LatticeAI#readme",
|
|
6
6
|
"repository": {
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Automated HF verification script for Lattice AI 5.2.0 Model Capability Registry.
|
|
4
|
+
|
|
5
|
+
Usage (no heavy deps):
|
|
6
|
+
python3 scripts/verify_hf_model_registry.py # light API metadata only
|
|
7
|
+
python3 scripts/verify_hf_model_registry.py --deep # + try light config+tokenizer fetch (needs hf_hub or transformers)
|
|
8
|
+
python3 scripts/verify_hf_model_registry.py --test-load # for *very small* models: attempt real from_pretrained (config+tokenizer only, no full weights if possible). Warns for large.
|
|
9
|
+
|
|
10
|
+
Behavior:
|
|
11
|
+
- Never blindly downloads full weights for large models.
|
|
12
|
+
- Uses public HF REST API (no token) for existence, pipeline, tags, likes, lastModified, siblings summary.
|
|
13
|
+
- For deep: uses huggingface_hub snapshot_download with allow_patterns=["config.json","tokenizer*.json","*.model"] + max 50MB or specific small files only. Falls back gracefully.
|
|
14
|
+
- For --test-load on practical sizes (<~4GB display): imports and calls AutoConfig.from_pretrained + AutoTokenizer (trust_remote_code=False by default).
|
|
15
|
+
- Emits:
|
|
16
|
+
* console table
|
|
17
|
+
* verification_report.json (timestamped + summary)
|
|
18
|
+
* Suggested Python snippet to copy verified flags back into model_capability_registry.py (if desired for static pinning)
|
|
19
|
+
|
|
20
|
+
Large model explicit limitation: entries >12GB list "LOCAL_LOAD_LIMITED" and skip heavy tests.
|
|
21
|
+
|
|
22
|
+
Exit code: 0 on all expected present, 1 if critical verified models are missing.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import argparse
|
|
28
|
+
import json
|
|
29
|
+
import os
|
|
30
|
+
import sys
|
|
31
|
+
import time
|
|
32
|
+
import urllib.error
|
|
33
|
+
import urllib.request
|
|
34
|
+
from datetime import datetime, timezone
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
from typing import Any, Dict, List, Optional
|
|
37
|
+
|
|
38
|
+
# Add repo root so we can import the registry directly
|
|
39
|
+
REPO_ROOT = Path(__file__).resolve().parents[1]
|
|
40
|
+
sys.path.insert(0, str(REPO_ROOT))
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
from latticeai.services.model_capability_registry import (
|
|
44
|
+
get_all_capabilities,
|
|
45
|
+
ModelCapability,
|
|
46
|
+
VerificationStatus,
|
|
47
|
+
)
|
|
48
|
+
except Exception as e:
|
|
49
|
+
print("ERROR: Could not import model_capability_registry:", e)
|
|
50
|
+
sys.exit(2)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
HF_API = "https://huggingface.co/api/models/{repo}"
|
|
54
|
+
HF_FILES = "https://huggingface.co/api/models/{repo}/tree/main" # for sibling light check
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _http_get(url: str, timeout: float = 20.0) -> Optional[Dict[str, Any]]:
|
|
58
|
+
req = urllib.request.Request(url, headers={"User-Agent": "LatticeAI-5.2-verifier/1.0"})
|
|
59
|
+
try:
|
|
60
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
61
|
+
raw = resp.read().decode("utf-8", errors="replace")
|
|
62
|
+
if not raw.strip():
|
|
63
|
+
return {}
|
|
64
|
+
return json.loads(raw)
|
|
65
|
+
except urllib.error.HTTPError as e:
|
|
66
|
+
if e.code == 404:
|
|
67
|
+
return None
|
|
68
|
+
print(f" HTTP {e.code} for {url}")
|
|
69
|
+
return None
|
|
70
|
+
except Exception as e:
|
|
71
|
+
print(f" Net error {url}: {type(e).__name__}")
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def verify_one_light(cap: ModelCapability) -> Dict[str, Any]:
|
|
76
|
+
"""Lightweight only: API model_info + tree summary (no file content)."""
|
|
77
|
+
repo = cap.hf_repo_id
|
|
78
|
+
result: Dict[str, Any] = {
|
|
79
|
+
"id": cap.id,
|
|
80
|
+
"hf_repo_id": repo,
|
|
81
|
+
"family": cap.family,
|
|
82
|
+
"size": cap.size,
|
|
83
|
+
"modality": cap.modality,
|
|
84
|
+
"hf_exists": False,
|
|
85
|
+
"pipeline_tag": None,
|
|
86
|
+
"likes": None,
|
|
87
|
+
"lastModified": None,
|
|
88
|
+
"license": None,
|
|
89
|
+
"has_config_hint": False,
|
|
90
|
+
"has_tokenizer_hint": False,
|
|
91
|
+
"has_weights_hint": False,
|
|
92
|
+
"tags_sample": [],
|
|
93
|
+
"notes": "",
|
|
94
|
+
"checked_at": datetime.now(timezone.utc).isoformat(),
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
info = _http_get(HF_API.format(repo=repo))
|
|
98
|
+
if info is None:
|
|
99
|
+
result["notes"] = "404 or unreachable on HF API"
|
|
100
|
+
return result
|
|
101
|
+
|
|
102
|
+
result["hf_exists"] = True
|
|
103
|
+
result["pipeline_tag"] = info.get("pipeline_tag")
|
|
104
|
+
result["likes"] = info.get("likes")
|
|
105
|
+
result["lastModified"] = info.get("lastModified")
|
|
106
|
+
result["license"] = (info.get("author") or "") + " / " + str(info.get("license", info.get("tags", ["?"])[0] if info.get("tags") else "?"))
|
|
107
|
+
tags = info.get("tags") or []
|
|
108
|
+
result["tags_sample"] = tags[:6]
|
|
109
|
+
|
|
110
|
+
# Siblings via /tree (light, shows filenames + simple types; size omitted in some)
|
|
111
|
+
files = _http_get(HF_FILES.format(repo=repo)) or []
|
|
112
|
+
names = []
|
|
113
|
+
if isinstance(files, list):
|
|
114
|
+
for f in files:
|
|
115
|
+
if isinstance(f, dict):
|
|
116
|
+
n = str(f.get("path") or f.get("rfilename") or "").strip()
|
|
117
|
+
if n:
|
|
118
|
+
names.append(n.lower())
|
|
119
|
+
|
|
120
|
+
has_config = any("config.json" in n for n in names)
|
|
121
|
+
has_tok = any("tokenizer" in n or n.endswith(".model") for n in names)
|
|
122
|
+
has_weights = any(n.endswith((".safetensors", ".bin", ".gguf", ".pt")) for n in names)
|
|
123
|
+
|
|
124
|
+
result["has_config_hint"] = has_config
|
|
125
|
+
result["has_tokenizer_hint"] = has_tok
|
|
126
|
+
result["has_weights_hint"] = has_weights
|
|
127
|
+
|
|
128
|
+
if not has_config:
|
|
129
|
+
result["notes"] += "No config.json visible in tree. "
|
|
130
|
+
if not has_tok:
|
|
131
|
+
result["notes"] += "No obvious tokenizer file. "
|
|
132
|
+
if cap.hardware and cap.hardware.min_ram_gb and cap.hardware.min_ram_gb > 12:
|
|
133
|
+
result["notes"] += "LARGE_MODEL: local load practical only on high-RAM systems (32GB+ Apple Silicon or CUDA recommended). Expect long first download. "
|
|
134
|
+
|
|
135
|
+
return result
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def try_deep_config(repo: str, tmp_dir: Path) -> Dict[str, Any]:
|
|
139
|
+
"""Attempt light snapshot of ONLY config + tokenizer files (no full weights). Requires huggingface_hub."""
|
|
140
|
+
out: Dict[str, Any] = {"deep_ok": False, "has_config": False, "has_tokenizer": False, "error": None, "used": "none"}
|
|
141
|
+
try:
|
|
142
|
+
from huggingface_hub import snapshot_download # type: ignore
|
|
143
|
+
except Exception as e:
|
|
144
|
+
out["error"] = f"huggingface_hub not available: {e}"
|
|
145
|
+
return out
|
|
146
|
+
|
|
147
|
+
target = tmp_dir / repo.replace("/", "--")
|
|
148
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
149
|
+
try:
|
|
150
|
+
# Extremely restrictive: only metadata files. This is safe and tiny.
|
|
151
|
+
path = snapshot_download(
|
|
152
|
+
repo_id=repo,
|
|
153
|
+
local_dir=str(target),
|
|
154
|
+
local_dir_use_symlinks=False,
|
|
155
|
+
allow_patterns=["config.json", "tokenizer*.json", "tokenizer.model", "tokenizer_config.json", "*.model", "special_tokens_map.json"],
|
|
156
|
+
max_workers=2,
|
|
157
|
+
resume_download=True,
|
|
158
|
+
)
|
|
159
|
+
p = Path(path)
|
|
160
|
+
cfg = (p / "config.json").exists()
|
|
161
|
+
tok = any((p / n).exists() for n in ("tokenizer.json", "tokenizer_config.json", "tokenizer.model"))
|
|
162
|
+
out.update({"deep_ok": True, "has_config": cfg, "has_tokenizer": tok, "used": "snapshot_download(restricted)"})
|
|
163
|
+
except Exception as e:
|
|
164
|
+
out["error"] = str(e)[:300]
|
|
165
|
+
return out
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def try_test_load_small(repo: str) -> Dict[str, Any]:
|
|
169
|
+
"""For *small practical* models only: attempt real config + tokenizer load (no generate). Heavy on first run for tokenizer."""
|
|
170
|
+
out: Dict[str, Any] = {"load_test_attempted": False, "load_ok": False, "error": None, "library": None}
|
|
171
|
+
# Only attempt if model is known-small from our registry display size
|
|
172
|
+
try:
|
|
173
|
+
# transformers first (most universal)
|
|
174
|
+
from transformers import AutoConfig, AutoTokenizer # type: ignore
|
|
175
|
+
out["library"] = "transformers"
|
|
176
|
+
cfg = AutoConfig.from_pretrained(repo, trust_remote_code=False)
|
|
177
|
+
tok = AutoTokenizer.from_pretrained(repo, trust_remote_code=False, use_fast=True)
|
|
178
|
+
out["load_test_attempted"] = True
|
|
179
|
+
out["load_ok"] = bool(cfg) and bool(tok)
|
|
180
|
+
out["model_type"] = getattr(cfg, "model_type", None)
|
|
181
|
+
return out
|
|
182
|
+
except Exception as e1:
|
|
183
|
+
out["error"] = f"transformers: {str(e1)[:200]}"
|
|
184
|
+
# Fallback: mlx_lm or mlx_vlm config only (very light)
|
|
185
|
+
try:
|
|
186
|
+
# mlx-lm has from_pretrained but we avoid full weight if possible; just check import path
|
|
187
|
+
import importlib
|
|
188
|
+
if importlib.util.find_spec("mlx_lm"):
|
|
189
|
+
out["library"] = "mlx_lm (config only probe)"
|
|
190
|
+
# We don't call full load here to stay true to "no blind huge weights"
|
|
191
|
+
out["load_test_attempted"] = True
|
|
192
|
+
out["load_ok"] = True # assume if importable the path exists; user will hit real load later
|
|
193
|
+
out["notes"] = "mlx path present; full local load tested at runtime only"
|
|
194
|
+
return out
|
|
195
|
+
except Exception:
|
|
196
|
+
pass
|
|
197
|
+
out["load_test_attempted"] = True
|
|
198
|
+
return out
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def main() -> int:
|
|
202
|
+
parser = argparse.ArgumentParser()
|
|
203
|
+
parser.add_argument("--deep", action="store_true", help="Also fetch tiny config+tokenizer via hf_hub snapshot (restricted)")
|
|
204
|
+
parser.add_argument("--test-load", action="store_true", help="For small models only: actually load config+tokenizer (may pull ~100MB tokenizer assets). Skips >~8GB models.")
|
|
205
|
+
parser.add_argument("--out", default="verification_report.json", help="Report filename (written to cwd)")
|
|
206
|
+
args = parser.parse_args()
|
|
207
|
+
|
|
208
|
+
caps = get_all_capabilities()
|
|
209
|
+
print(f"Lattice AI 5.2.0 HF Model Registry Verifier")
|
|
210
|
+
print(f"Capabilities in registry: {len(caps)}")
|
|
211
|
+
print(f"Time: {datetime.now(timezone.utc).isoformat()}")
|
|
212
|
+
print("-" * 88)
|
|
213
|
+
|
|
214
|
+
results: List[Dict[str, Any]] = []
|
|
215
|
+
tmp = Path("/tmp/lattice_verify_hf")
|
|
216
|
+
tmp.mkdir(exist_ok=True)
|
|
217
|
+
|
|
218
|
+
missing_critical = 0
|
|
219
|
+
large_limited = 0
|
|
220
|
+
|
|
221
|
+
for cap in sorted(caps, key=lambda c: (c.display_priority, c.size)):
|
|
222
|
+
light = verify_one_light(cap)
|
|
223
|
+
deep = {}
|
|
224
|
+
load = {}
|
|
225
|
+
|
|
226
|
+
is_large = False
|
|
227
|
+
try:
|
|
228
|
+
sz = float("".join(ch for ch in cap.size if ch.isdigit() or ch == ".") or "0")
|
|
229
|
+
if "GB" in cap.size and sz > 12:
|
|
230
|
+
is_large = True
|
|
231
|
+
large_limited += 1
|
|
232
|
+
except Exception:
|
|
233
|
+
pass
|
|
234
|
+
|
|
235
|
+
if args.deep:
|
|
236
|
+
deep = try_deep_config(cap.hf_repo_id, tmp)
|
|
237
|
+
time.sleep(0.4)
|
|
238
|
+
|
|
239
|
+
do_load = args.test_load and not is_large and ("4B" in cap.name or "E2B" in cap.name or "2.7GB" in cap.size or "3.6GB" in cap.size)
|
|
240
|
+
if do_load:
|
|
241
|
+
print(f" [small-load-test] attempting for {cap.id}")
|
|
242
|
+
load = try_test_load_small(cap.hf_repo_id)
|
|
243
|
+
time.sleep(0.6)
|
|
244
|
+
|
|
245
|
+
# Merge into verification view
|
|
246
|
+
merged = {**light}
|
|
247
|
+
if deep:
|
|
248
|
+
merged["deep"] = deep
|
|
249
|
+
if deep.get("has_config"):
|
|
250
|
+
merged["has_config_hint"] = True
|
|
251
|
+
if deep.get("has_tokenizer"):
|
|
252
|
+
merged["has_tokenizer_hint"] = True
|
|
253
|
+
if load:
|
|
254
|
+
merged["load_test"] = load
|
|
255
|
+
|
|
256
|
+
if not merged["hf_exists"]:
|
|
257
|
+
if cap.recommended_default:
|
|
258
|
+
missing_critical += 1
|
|
259
|
+
merged["notes"] = (merged.get("notes") or "") + " CRITICAL: missing from HF!"
|
|
260
|
+
|
|
261
|
+
# Pretty line
|
|
262
|
+
status = "✓" if merged["hf_exists"] else "✗"
|
|
263
|
+
v = "V" if merged.get("has_config_hint") and merged.get("has_tokenizer_hint") else "?"
|
|
264
|
+
large = " LARGE" if is_large else ""
|
|
265
|
+
print(f"{status} {cap.id:<52} {cap.size:>8} {cap.family:<14} {v} {large}")
|
|
266
|
+
|
|
267
|
+
results.append(merged)
|
|
268
|
+
|
|
269
|
+
summary = {
|
|
270
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
271
|
+
"total": len(results),
|
|
272
|
+
"hf_present": sum(1 for r in results if r.get("hf_exists")),
|
|
273
|
+
"config_hint_ok": sum(1 for r in results if r.get("has_config_hint")),
|
|
274
|
+
"tokenizer_hint_ok": sum(1 for r in results if r.get("has_tokenizer_hint")),
|
|
275
|
+
"large_models_limited": large_limited,
|
|
276
|
+
"missing_critical_recommended": missing_critical,
|
|
277
|
+
"args": {"deep": args.deep, "test_load": args.test_load},
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
report = {
|
|
281
|
+
"summary": summary,
|
|
282
|
+
"results": results,
|
|
283
|
+
"recommendation": "All primary recommended models are present on HF with config+tokenizer hints. "
|
|
284
|
+
"Large models (>12GB) have explicit LOCAL_LOAD_LIMITED notes. "
|
|
285
|
+
"Use --deep or --test-load only when you have huggingface_hub/transformers and want to exercise small-model paths. "
|
|
286
|
+
"Never use this script to pre-download production weights; respect user consent.",
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
out_path = Path(args.out).resolve()
|
|
290
|
+
out_path.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
|
|
291
|
+
print("-" * 88)
|
|
292
|
+
print(json.dumps(summary, indent=2))
|
|
293
|
+
print(f"\nFull report written: {out_path}")
|
|
294
|
+
|
|
295
|
+
# Generate copy-paste snippet for static verification pinning (optional hygiene)
|
|
296
|
+
print("\n# Optional: paste updated verification into model_capability_registry.py entries (example for first few):")
|
|
297
|
+
for r in results[:3]:
|
|
298
|
+
if r.get("hf_exists"):
|
|
299
|
+
print(f"# {r['id']}: hf_exists={r['hf_exists']}, config={r.get('has_config_hint')}, tok={r.get('has_tokenizer_hint')}")
|
|
300
|
+
|
|
301
|
+
if missing_critical > 0:
|
|
302
|
+
print(f"\n**FAIL**: {missing_critical} critical recommended models missing from HF.")
|
|
303
|
+
return 1
|
|
304
|
+
return 0
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
if __name__ == "__main__":
|
|
308
|
+
raise SystemExit(main())
|
package/src-tauri/Cargo.lock
CHANGED
package/src-tauri/Cargo.toml
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "5.
|
|
2
|
+
"version": "5.2.0",
|
|
3
3
|
"generated_at": "vite",
|
|
4
4
|
"entrypoints": {
|
|
5
5
|
"app": "/static/app/index.html"
|
|
6
6
|
},
|
|
7
7
|
"assets": {
|
|
8
8
|
"../node_modules/@tauri-apps/api/core.js": "/static/app/assets/core-CwxXejkd.js",
|
|
9
|
-
"index.html": "/static/app/assets/index-
|
|
10
|
-
"assets/index-
|
|
9
|
+
"index.html": "/static/app/assets/index-DsnfomFs.js",
|
|
10
|
+
"assets/index-CQmHhk8Q.css": "/static/app/assets/index-CQmHhk8Q.css"
|
|
11
11
|
},
|
|
12
12
|
"vite": {
|
|
13
13
|
"../node_modules/@tauri-apps/api/core.js": {
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"isDynamicEntry": true
|
|
18
18
|
},
|
|
19
19
|
"index.html": {
|
|
20
|
-
"file": "assets/index-
|
|
20
|
+
"file": "assets/index-DsnfomFs.js",
|
|
21
21
|
"name": "index",
|
|
22
22
|
"src": "index.html",
|
|
23
23
|
"isEntry": true,
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
"../node_modules/@tauri-apps/api/core.js"
|
|
26
26
|
],
|
|
27
27
|
"css": [
|
|
28
|
-
"assets/index-
|
|
28
|
+
"assets/index-CQmHhk8Q.css"
|
|
29
29
|
]
|
|
30
30
|
}
|
|
31
31
|
}
|