ltcai 4.4.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -18
- package/docs/CHANGELOG.md +85 -0
- package/docs/V4_5_0_GEMMA_RUNTIME_COMPATIBILITY_REPORT.md +49 -0
- package/docs/V4_5_0_GRAPH_UX_REPORT.md +34 -0
- package/docs/V4_5_0_MODEL_RUNTIME_UX_REPORT.md +40 -0
- package/docs/V4_5_0_ONBOARDING_REPORT.md +31 -0
- package/docs/V4_5_0_PRODUCT_EXPERIENCE_RECOVERY_REPORT.md +49 -0
- package/docs/V4_5_0_VALIDATION_REPORT.md +60 -0
- package/docs/V4_5_1_GRAPH_EXPERIENCE_REPORT.md +33 -0
- package/docs/V4_5_1_MODEL_EXPERIENCE_REPORT.md +37 -0
- package/docs/V4_5_1_NAVIGATION_REPORT.md +37 -0
- package/docs/V4_5_1_ONBOARDING_REPORT.md +29 -0
- package/docs/V4_5_1_PRODUCT_REIMAGINING_REPORT.md +61 -0
- package/docs/V4_5_1_RC_ARTIFACTS.md +44 -0
- package/docs/V4_5_1_UX_REPORT.md +45 -0
- package/docs/V4_5_1_VALIDATION_REPORT.md +54 -0
- package/docs/V4_5_1_VISUAL_DESIGN_REPORT.md +30 -0
- package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +16 -16
- package/docs/architecture.md +8 -4
- package/frontend/src/App.tsx +152 -91
- package/frontend/src/api/client.ts +83 -1
- package/frontend/src/components/FirstRunGuide.tsx +99 -0
- package/frontend/src/components/primitives.tsx +131 -25
- package/frontend/src/components/ui/badge.tsx +2 -2
- package/frontend/src/components/ui/button.tsx +7 -7
- package/frontend/src/components/ui/card.tsx +5 -5
- package/frontend/src/components/ui/input.tsx +1 -1
- package/frontend/src/components/ui/textarea.tsx +1 -1
- package/frontend/src/pages/Act.tsx +58 -28
- package/frontend/src/pages/Ask.tsx +51 -19
- package/frontend/src/pages/Brain.tsx +60 -42
- package/frontend/src/pages/Capture.tsx +24 -24
- package/frontend/src/pages/Library.tsx +222 -32
- package/frontend/src/pages/System.tsx +56 -34
- package/frontend/src/routes.ts +15 -13
- package/frontend/src/store/appStore.ts +8 -1
- package/frontend/src/styles.css +666 -36
- package/lattice_brain/__init__.py +1 -1
- package/lattice_brain/runtime/multi_agent.py +1 -1
- package/latticeai/__init__.py +1 -1
- package/latticeai/api/models.py +107 -18
- package/latticeai/core/marketplace.py +1 -1
- package/latticeai/core/model_compat.py +250 -0
- package/latticeai/core/workspace_os.py +1 -1
- package/latticeai/models/router.py +136 -32
- package/latticeai/services/model_catalog.py +2 -2
- package/latticeai/services/model_recommendation.py +8 -1
- package/latticeai/services/model_runtime.py +18 -3
- package/package.json +1 -1
- package/scripts/build_frontend_assets.mjs +12 -1
- package/src-tauri/Cargo.lock +1 -1
- package/src-tauri/Cargo.toml +1 -1
- package/src-tauri/tauri.conf.json +1 -1
- package/static/app/asset-manifest.json +5 -5
- package/static/app/assets/index-3G8qcrIS.js +336 -0
- package/static/app/assets/index-3G8qcrIS.js.map +1 -0
- package/static/app/assets/index-C0wYZp7k.css +2 -0
- package/static/app/index.html +2 -2
- package/static/app/assets/index-CHHal8Zl.css +0 -2
- package/static/app/assets/index-pdzil9ac.js +0 -333
- package/static/app/assets/index-pdzil9ac.js.map +0 -1
|
@@ -6,6 +6,7 @@ import asyncio
|
|
|
6
6
|
import base64
|
|
7
7
|
import gc
|
|
8
8
|
import io
|
|
9
|
+
import json
|
|
9
10
|
import os
|
|
10
11
|
import re
|
|
11
12
|
import time
|
|
@@ -29,15 +30,28 @@ executor = ThreadPoolExecutor(max_workers=1)
|
|
|
29
30
|
|
|
30
31
|
try:
|
|
31
32
|
import mlx.core as mx
|
|
33
|
+
except Exception as e:
|
|
34
|
+
mx = None
|
|
35
|
+
print(f"⚠️ MLX core unavailable: {e}")
|
|
36
|
+
|
|
37
|
+
try:
|
|
32
38
|
from mlx_vlm import load as vlm_load
|
|
33
39
|
VLM_AVAILABLE = True
|
|
34
40
|
print("✅ MLX-VLM is ready for multimodal models.")
|
|
35
41
|
except Exception as e:
|
|
36
|
-
mx = None
|
|
37
42
|
vlm_load = None
|
|
38
43
|
VLM_AVAILABLE = False
|
|
39
44
|
print(f"⚠️ MLX-VLM unavailable: {e}")
|
|
40
45
|
|
|
46
|
+
try:
|
|
47
|
+
from mlx_lm import load as lm_load
|
|
48
|
+
LM_AVAILABLE = True
|
|
49
|
+
print("✅ MLX-LM is ready for text fallback models.")
|
|
50
|
+
except Exception as e:
|
|
51
|
+
lm_load = None
|
|
52
|
+
LM_AVAILABLE = False
|
|
53
|
+
print(f"⚠️ MLX-LM unavailable: {e}")
|
|
54
|
+
|
|
41
55
|
BRAND_NAME = "Lattice AI"
|
|
42
56
|
LEGACY_BRAND_PATTERNS = [
|
|
43
57
|
(re.compile(r"\bconnect\s+ai\b", re.IGNORECASE), BRAND_NAME),
|
|
@@ -236,20 +250,63 @@ def _resolve_local_hf_model(model_id: str) -> str:
|
|
|
236
250
|
return str(local_dir)
|
|
237
251
|
return model_id
|
|
238
252
|
|
|
253
|
+
def _is_gemma4_model_id(model_id: str) -> bool:
|
|
254
|
+
raw = str(model_id or "").lower()
|
|
255
|
+
return bool(re.search(r"gemma[-_/ ]?4|gemma4", raw))
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _local_model_type(path_or_model_id: str) -> Optional[str]:
|
|
259
|
+
raw = str(path_or_model_id or "").strip()
|
|
260
|
+
candidates = []
|
|
261
|
+
explicit = Path(raw).expanduser()
|
|
262
|
+
if raw and explicit.exists():
|
|
263
|
+
candidates.append(explicit / "config.json")
|
|
264
|
+
candidates.append(hf_model_dir(raw) / "config.json")
|
|
265
|
+
for config_path in candidates:
|
|
266
|
+
try:
|
|
267
|
+
if config_path.exists():
|
|
268
|
+
data = json.loads(config_path.read_text(encoding="utf-8"))
|
|
269
|
+
model_type = str(data.get("model_type") or "").strip().lower()
|
|
270
|
+
if model_type:
|
|
271
|
+
return model_type
|
|
272
|
+
except Exception as e:
|
|
273
|
+
print(f"⚠️ Model config read skipped for {config_path}: {e}")
|
|
274
|
+
return None
|
|
275
|
+
|
|
276
|
+
|
|
239
277
|
def ensure_mlx_runtime() -> None:
|
|
240
|
-
global mx, vlm_load, VLM_AVAILABLE
|
|
241
|
-
if mx is not None and vlm_load is not None:
|
|
278
|
+
global mx, vlm_load, lm_load, VLM_AVAILABLE, LM_AVAILABLE
|
|
279
|
+
if mx is not None and (vlm_load is not None or lm_load is not None):
|
|
242
280
|
return
|
|
281
|
+
errors = []
|
|
243
282
|
try:
|
|
244
283
|
import mlx.core as mlx_core
|
|
245
|
-
from mlx_vlm import load as mlx_vlm_load
|
|
246
|
-
|
|
247
284
|
mx = mlx_core
|
|
285
|
+
mx.set_default_device(mx.gpu)
|
|
286
|
+
except Exception as e:
|
|
287
|
+
errors.append(f"mlx: {e}")
|
|
288
|
+
mx = None
|
|
289
|
+
|
|
290
|
+
try:
|
|
291
|
+
from mlx_vlm import load as mlx_vlm_load
|
|
248
292
|
vlm_load = mlx_vlm_load
|
|
249
293
|
VLM_AVAILABLE = True
|
|
250
|
-
mx.set_default_device(mx.gpu)
|
|
251
294
|
except Exception as e:
|
|
252
|
-
|
|
295
|
+
vlm_load = None
|
|
296
|
+
VLM_AVAILABLE = False
|
|
297
|
+
errors.append(f"mlx-vlm: {e}")
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
from mlx_lm import load as mlx_lm_load
|
|
301
|
+
lm_load = mlx_lm_load
|
|
302
|
+
LM_AVAILABLE = True
|
|
303
|
+
except Exception as e:
|
|
304
|
+
lm_load = None
|
|
305
|
+
LM_AVAILABLE = False
|
|
306
|
+
errors.append(f"mlx-lm: {e}")
|
|
307
|
+
|
|
308
|
+
if mx is None or (vlm_load is None and lm_load is None):
|
|
309
|
+
raise RuntimeError(f"MLX runtime is not available after install: {'; '.join(errors)}")
|
|
253
310
|
|
|
254
311
|
def _mlx_sampler(temperature: float):
|
|
255
312
|
"""Build an MLX sampler callable for the given temperature.
|
|
@@ -353,8 +410,8 @@ class LLMRouter:
|
|
|
353
410
|
return self._load_cloud_model(provider, provider_model, api_key_override=api_key_override, owner=owner)
|
|
354
411
|
|
|
355
412
|
ensure_mlx_runtime()
|
|
356
|
-
if mx is None or vlm_load is None:
|
|
357
|
-
raise RuntimeError("MLX
|
|
413
|
+
if mx is None or (vlm_load is None and lm_load is None):
|
|
414
|
+
raise RuntimeError("MLX is not available in this process. Run on Apple Silicon with Metal access.")
|
|
358
415
|
|
|
359
416
|
cache_key = f"{model_id}_{draft_model_id}" if draft_model_id else model_id
|
|
360
417
|
if cache_key in self._cache:
|
|
@@ -370,25 +427,43 @@ class LLMRouter:
|
|
|
370
427
|
|
|
371
428
|
def _load():
|
|
372
429
|
mx.set_default_device(mx.gpu)
|
|
373
|
-
|
|
374
|
-
|
|
430
|
+
is_gemma4 = _is_gemma4_model_id(model_id)
|
|
431
|
+
model_type = _local_model_type(target_model_id) or _local_model_type(model_id)
|
|
432
|
+
loader_kind = "mlx_vlm"
|
|
433
|
+
|
|
434
|
+
try:
|
|
435
|
+
if vlm_load is None:
|
|
436
|
+
raise RuntimeError("MLX-VLM is not installed.")
|
|
437
|
+
print(f"🔄 Loading Target (VLM Mode): {target_model_id}...")
|
|
438
|
+
model, tokenizer = vlm_load(target_model_id)
|
|
439
|
+
except Exception as vlm_error:
|
|
440
|
+
if not (is_gemma4 and model_type != "gemma4_unified" and lm_load is not None):
|
|
441
|
+
raise
|
|
442
|
+
print(f"⚠️ Gemma 4 MLX-VLM load failed; retrying MLX-LM text path: {vlm_error}")
|
|
443
|
+
print(f"🔄 Loading Target (LM Mode): {target_model_id}...")
|
|
444
|
+
model, tokenizer = lm_load(target_model_id)
|
|
445
|
+
loader_kind = "mlx_lm"
|
|
375
446
|
|
|
376
447
|
draft_model = None
|
|
377
448
|
if target_draft_model_id:
|
|
378
|
-
|
|
379
|
-
|
|
449
|
+
if loader_kind == "mlx_vlm":
|
|
450
|
+
print(f"🔄 Loading Assistant (VLM Mode): {target_draft_model_id}...")
|
|
451
|
+
draft_model, _ = vlm_load(target_draft_model_id)
|
|
452
|
+
elif lm_load is not None:
|
|
453
|
+
print(f"🔄 Loading Assistant (LM Mode): {target_draft_model_id}...")
|
|
454
|
+
draft_model, _ = lm_load(target_draft_model_id)
|
|
380
455
|
print("✅ Assistant Ready.")
|
|
381
456
|
|
|
382
|
-
return model, tokenizer, draft_model
|
|
457
|
+
return model, tokenizer, draft_model, loader_kind
|
|
383
458
|
|
|
384
459
|
try:
|
|
385
460
|
# Use the dedicated single-thread executor to ensure MLX GPU streams match during inference
|
|
386
|
-
model, tokenizer, draft_model = await loop.run_in_executor(executor, _load)
|
|
387
|
-
self._cache[cache_key] = (model, tokenizer, draft_model)
|
|
461
|
+
model, tokenizer, draft_model, loader_kind = await loop.run_in_executor(executor, _load)
|
|
462
|
+
self._cache[cache_key] = (model, tokenizer, draft_model, loader_kind)
|
|
388
463
|
self._current = cache_key
|
|
389
464
|
self._touch(cache_key)
|
|
390
|
-
print(f"✅ Fully Loaded: {cache_key}")
|
|
391
|
-
return f"Success: {cache_key}"
|
|
465
|
+
print(f"✅ Fully Loaded: {cache_key} ({loader_kind})")
|
|
466
|
+
return f"Success: {cache_key} ({loader_kind})"
|
|
392
467
|
except Exception as e:
|
|
393
468
|
print(f"❌ Load Error: {e}")
|
|
394
469
|
raise e
|
|
@@ -510,6 +585,11 @@ class LLMRouter:
|
|
|
510
585
|
print(f"⚠️ VLM chat template fallback: {e}")
|
|
511
586
|
return self._build_prompt(message, context, processor)
|
|
512
587
|
|
|
588
|
+
def _unpack_local_cache(self, cached: Tuple) -> Tuple[object, object, object, str]:
|
|
589
|
+
model, tokenizer, draft_model = cached[:3]
|
|
590
|
+
loader_kind = str(cached[3]) if len(cached) > 3 else "mlx_vlm"
|
|
591
|
+
return model, tokenizer, draft_model, loader_kind
|
|
592
|
+
|
|
513
593
|
async def generate_as(self, model_id: str | None, message: str, context: Optional[str] = None, max_tokens: int = 4096, temperature: float = 0.2) -> str:
|
|
514
594
|
"""Generate using a specific model, temporarily switching if needed. Falls back to current model if model_id is None or not loaded."""
|
|
515
595
|
if not model_id or model_id == self._current:
|
|
@@ -531,16 +611,24 @@ class LLMRouter:
|
|
|
531
611
|
if isinstance(cached, CloudModel):
|
|
532
612
|
return await self._cloud_generate(cached, message, context, max_tokens, temperature)
|
|
533
613
|
|
|
534
|
-
model, tokenizer, draft_model = self._cache[self._current]
|
|
535
|
-
|
|
614
|
+
model, tokenizer, draft_model, loader_kind = self._unpack_local_cache(self._cache[self._current])
|
|
615
|
+
use_vlm = loader_kind == "mlx_vlm"
|
|
616
|
+
prompt = (
|
|
617
|
+
self._build_vlm_prompt(model, tokenizer, message, context, 1 if image_data else 0)
|
|
618
|
+
if use_vlm
|
|
619
|
+
else self._build_prompt(message, context, tokenizer)
|
|
620
|
+
)
|
|
536
621
|
|
|
537
622
|
loop = asyncio.get_event_loop()
|
|
538
623
|
|
|
539
624
|
def _gen():
|
|
540
625
|
import mlx.core as mx
|
|
541
626
|
mx.set_default_device(mx.gpu)
|
|
542
|
-
|
|
543
|
-
|
|
627
|
+
if use_vlm:
|
|
628
|
+
from mlx_vlm import generate as vlm_gen
|
|
629
|
+
return vlm_gen(model, tokenizer, prompt=prompt, image=self._prep_image(image_data) if image_data else None, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
|
|
630
|
+
from mlx_lm import generate as lm_gen
|
|
631
|
+
return lm_gen(model, tokenizer, prompt=prompt, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model)
|
|
544
632
|
result = await loop.run_in_executor(executor, _gen)
|
|
545
633
|
# mlx-vlm might return a GenerationResult object; extract the text
|
|
546
634
|
if hasattr(result, "text"):
|
|
@@ -577,8 +665,13 @@ class LLMRouter:
|
|
|
577
665
|
yield chunk
|
|
578
666
|
return
|
|
579
667
|
|
|
580
|
-
model, tokenizer, draft_model = self._cache[self._current]
|
|
581
|
-
|
|
668
|
+
model, tokenizer, draft_model, loader_kind = self._unpack_local_cache(self._cache[self._current])
|
|
669
|
+
use_vlm = loader_kind == "mlx_vlm"
|
|
670
|
+
prompt = (
|
|
671
|
+
self._build_vlm_prompt(model, tokenizer, message, context, 1 if image_data else 0)
|
|
672
|
+
if use_vlm
|
|
673
|
+
else self._build_prompt(message, context, tokenizer)
|
|
674
|
+
)
|
|
582
675
|
loop = asyncio.get_event_loop()
|
|
583
676
|
queue = asyncio.Queue()
|
|
584
677
|
|
|
@@ -586,8 +679,12 @@ class LLMRouter:
|
|
|
586
679
|
import mlx.core as mx
|
|
587
680
|
mx.set_default_device(mx.gpu)
|
|
588
681
|
try:
|
|
589
|
-
|
|
590
|
-
|
|
682
|
+
if use_vlm:
|
|
683
|
+
from mlx_vlm import stream_generate as vlm_stream
|
|
684
|
+
gen = vlm_stream(model, tokenizer, prompt=prompt, image=self._prep_image(image_data) if image_data else None, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
|
|
685
|
+
else:
|
|
686
|
+
from mlx_lm import stream_generate as lm_stream
|
|
687
|
+
gen = lm_stream(model, tokenizer, prompt=prompt, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model)
|
|
591
688
|
|
|
592
689
|
for chunk in gen:
|
|
593
690
|
text = chunk.text if hasattr(chunk, "text") else (chunk[0] if isinstance(chunk, tuple) else str(chunk))
|
|
@@ -660,7 +757,7 @@ class LLMRouter:
|
|
|
660
757
|
if isinstance(cached, CloudModel):
|
|
661
758
|
return await self._cloud_generate_document(cached, message, system_prompt, max_tokens, temperature)
|
|
662
759
|
|
|
663
|
-
model, tokenizer, draft_model = cached
|
|
760
|
+
model, tokenizer, draft_model, loader_kind = self._unpack_local_cache(cached)
|
|
664
761
|
if hasattr(tokenizer, "apply_chat_template"):
|
|
665
762
|
try:
|
|
666
763
|
msgs = [
|
|
@@ -677,8 +774,11 @@ class LLMRouter:
|
|
|
677
774
|
def _gen():
|
|
678
775
|
import mlx.core as mx
|
|
679
776
|
mx.set_default_device(mx.gpu)
|
|
680
|
-
|
|
681
|
-
|
|
777
|
+
if loader_kind == "mlx_vlm":
|
|
778
|
+
from mlx_vlm import generate as vlm_gen
|
|
779
|
+
return vlm_gen(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
|
|
780
|
+
from mlx_lm import generate as lm_gen
|
|
781
|
+
return lm_gen(model, tokenizer, prompt=prompt, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model)
|
|
682
782
|
result = await loop.run_in_executor(executor, _gen)
|
|
683
783
|
if hasattr(result, "text"):
|
|
684
784
|
return normalize_branding(result.text)
|
|
@@ -719,7 +819,7 @@ class LLMRouter:
|
|
|
719
819
|
yield chunk
|
|
720
820
|
return
|
|
721
821
|
|
|
722
|
-
model, tokenizer, draft_model = cached
|
|
822
|
+
model, tokenizer, draft_model, loader_kind = self._unpack_local_cache(cached)
|
|
723
823
|
if hasattr(tokenizer, "apply_chat_template"):
|
|
724
824
|
try:
|
|
725
825
|
msgs = [
|
|
@@ -739,8 +839,12 @@ class LLMRouter:
|
|
|
739
839
|
import mlx.core as mx
|
|
740
840
|
mx.set_default_device(mx.gpu)
|
|
741
841
|
try:
|
|
742
|
-
|
|
743
|
-
|
|
842
|
+
if loader_kind == "mlx_vlm":
|
|
843
|
+
from mlx_vlm import stream_generate as vlm_stream
|
|
844
|
+
gen = vlm_stream(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
|
|
845
|
+
else:
|
|
846
|
+
from mlx_lm import stream_generate as lm_stream
|
|
847
|
+
gen = lm_stream(model, tokenizer, prompt=prompt, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model)
|
|
744
848
|
for chunk in gen:
|
|
745
849
|
text = chunk.text if hasattr(chunk, "text") else (chunk[0] if isinstance(chunk, tuple) else str(chunk))
|
|
746
850
|
loop.call_soon_threadsafe(queue.put_nowait, text)
|
|
@@ -17,8 +17,8 @@ from typing import Dict, List, Optional
|
|
|
17
17
|
|
|
18
18
|
ENGINE_INSTALLERS = {
|
|
19
19
|
"local_mlx": {
|
|
20
|
-
"command": [sys.executable, "-m", "pip", "install", "--upgrade", "mlx-vlm", "huggingface_hub[cli]"],
|
|
21
|
-
"label": "Install MLX
|
|
20
|
+
"command": [sys.executable, "-m", "pip", "install", "--upgrade", "mlx-vlm>=0.6.3", "mlx-lm", "huggingface_hub[cli]"],
|
|
21
|
+
"label": "Install MLX runtime",
|
|
22
22
|
},
|
|
23
23
|
"openai": {
|
|
24
24
|
"command": [sys.executable, "-m", "pip", "install", "openai"],
|
|
@@ -18,6 +18,7 @@ from __future__ import annotations
|
|
|
18
18
|
import re
|
|
19
19
|
from typing import Any, Dict, List, Optional
|
|
20
20
|
|
|
21
|
+
from latticeai.core.model_compat import model_runtime_compatibility
|
|
21
22
|
from latticeai.services.model_catalog import ENGINE_MODEL_CATALOG
|
|
22
23
|
|
|
23
24
|
# ── status vocabulary ─────────────────────────────────────────────────────────
|
|
@@ -85,14 +86,19 @@ def _engine_available(engine: str, profile: Dict[str, Any]) -> bool:
|
|
|
85
86
|
def _classify_one(
|
|
86
87
|
model: Dict[str, Any],
|
|
87
88
|
*,
|
|
89
|
+
engine: str,
|
|
88
90
|
engine_available: bool,
|
|
89
91
|
ram_gb: float,
|
|
90
92
|
) -> Dict[str, Any]:
|
|
91
93
|
size_gb = parse_size_gb(model.get("size"))
|
|
92
94
|
need_gb = estimated_ram_gb(size_gb) if size_gb is not None else None
|
|
95
|
+
runtime = model_runtime_compatibility(str(model.get("id") or ""), engine=engine)
|
|
93
96
|
|
|
94
97
|
if not engine_available:
|
|
95
98
|
status, reason = NOT_RECOMMENDED, "Apple Silicon과 MLX-VLM이 필요합니다"
|
|
99
|
+
elif runtime.get("supported") is False:
|
|
100
|
+
status = NOT_RECOMMENDED
|
|
101
|
+
reason = str(runtime.get("user_message") or "이 모델은 현재 설치된 실행 런타임에서 지원되지 않습니다")
|
|
96
102
|
elif need_gb is None:
|
|
97
103
|
# Tool-managed/pull models have no fixed on-disk size, so treat them as
|
|
98
104
|
# compatible and let the execution tool validate the exact model.
|
|
@@ -124,6 +130,7 @@ def _classify_one(
|
|
|
124
130
|
"run_location": model.get("run_location"),
|
|
125
131
|
"internet_requirement": model.get("internet_requirement"),
|
|
126
132
|
"source_display_order": model.get("source_display_order"),
|
|
133
|
+
"runtime_compatibility": runtime,
|
|
127
134
|
}
|
|
128
135
|
|
|
129
136
|
|
|
@@ -148,7 +155,7 @@ def recommend_catalog(profile: Dict[str, Any], *, engine: str = "local_mlx") ->
|
|
|
148
155
|
ram_gb = _ram_gb(profile)
|
|
149
156
|
|
|
150
157
|
classified = [
|
|
151
|
-
_classify_one(m, engine_available=engine_available, ram_gb=ram_gb)
|
|
158
|
+
_classify_one(m, engine=engine, engine_available=engine_available, ram_gb=ram_gb)
|
|
152
159
|
for m in models
|
|
153
160
|
]
|
|
154
161
|
|
|
@@ -40,6 +40,8 @@ from latticeai.core.model_compat import (
|
|
|
40
40
|
classify_smoke_response as _classify_smoke_response,
|
|
41
41
|
ensure_profile as _ensure_compat_profile,
|
|
42
42
|
fast_postprocess as _compat_fast_postprocess,
|
|
43
|
+
friendly_model_runtime_error as _friendly_model_runtime_error,
|
|
44
|
+
model_runtime_compatibility as _model_runtime_compatibility,
|
|
43
45
|
record_smoke_result as _record_smoke_result,
|
|
44
46
|
)
|
|
45
47
|
from latticeai.core.model_resolution import ModelResolution as _ModelResolution
|
|
@@ -931,7 +933,10 @@ def ensure_llamacpp_server(model_name: str) -> None:
|
|
|
931
933
|
|
|
932
934
|
def engine_installed(engine: str) -> bool:
|
|
933
935
|
if engine == "local_mlx":
|
|
934
|
-
return bool(
|
|
936
|
+
return bool(
|
|
937
|
+
importlib.util.find_spec("mlx")
|
|
938
|
+
and (importlib.util.find_spec("mlx_vlm") or importlib.util.find_spec("mlx_lm"))
|
|
939
|
+
)
|
|
935
940
|
if engine == "ollama":
|
|
936
941
|
return local_binary("ollama") is not None
|
|
937
942
|
if engine == "vllm":
|
|
@@ -1042,7 +1047,7 @@ def engine_status() -> List[Dict]:
|
|
|
1042
1047
|
"id": "local_mlx",
|
|
1043
1048
|
"name": "MLX",
|
|
1044
1049
|
"kind": "local",
|
|
1045
|
-
"description": "Apple Silicon GPU에서 MLX
|
|
1050
|
+
"description": "Apple Silicon GPU에서 MLX-VLM 모델을 직접 실행하고, Gemma 4는 필요 시 MLX-LM 텍스트 경로로 재시도합니다.",
|
|
1046
1051
|
"installed": engine_installed("local_mlx"),
|
|
1047
1052
|
"installable": True,
|
|
1048
1053
|
"install_label": ENGINE_INSTALLERS["local_mlx"]["label"],
|
|
@@ -1369,6 +1374,9 @@ async def prepare_and_load_model(
|
|
|
1369
1374
|
parsed_provider, parsed_model = parse_model_ref(model_id)
|
|
1370
1375
|
if parsed_provider == "mlx":
|
|
1371
1376
|
parsed_provider = "local_mlx"
|
|
1377
|
+
compatibility = _model_runtime_compatibility(parsed_model, engine=parsed_provider)
|
|
1378
|
+
if compatibility.get("supported") is False:
|
|
1379
|
+
raise HTTPException(status_code=400, detail=compatibility)
|
|
1372
1380
|
|
|
1373
1381
|
local_engines = {"local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"}
|
|
1374
1382
|
install_result: Dict[str, object] = {}
|
|
@@ -1488,6 +1496,9 @@ async def prepare_and_load_model_stream(
|
|
|
1488
1496
|
parsed_provider, parsed_model = parse_model_ref(model_id)
|
|
1489
1497
|
if parsed_provider == "mlx":
|
|
1490
1498
|
parsed_provider = "local_mlx"
|
|
1499
|
+
compatibility = _model_runtime_compatibility(parsed_model, engine=parsed_provider)
|
|
1500
|
+
if compatibility.get("supported") is False:
|
|
1501
|
+
raise HTTPException(status_code=400, detail=compatibility)
|
|
1491
1502
|
|
|
1492
1503
|
work_queue: "queue.Queue[Dict[str, object]]" = queue.Queue()
|
|
1493
1504
|
work_result: Dict[str, object] = {}
|
|
@@ -1651,7 +1662,11 @@ async def prepare_and_load_model_stream(
|
|
|
1651
1662
|
work_queue.put({"kind": "error", "status_code": exc.status_code, "detail": exc.detail})
|
|
1652
1663
|
except Exception as exc:
|
|
1653
1664
|
logging.exception("model prepare stream worker failed")
|
|
1654
|
-
work_queue.put({
|
|
1665
|
+
work_queue.put({
|
|
1666
|
+
"kind": "error",
|
|
1667
|
+
"status_code": 500,
|
|
1668
|
+
"detail": _friendly_model_runtime_error(exc, model_id=model_id, engine=parsed_provider),
|
|
1669
|
+
})
|
|
1655
1670
|
|
|
1656
1671
|
worker = threading.Thread(target=blocking_prepare, daemon=True)
|
|
1657
1672
|
worker.start()
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ltcai",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.5.1",
|
|
4
4
|
"description": "Lattice AI — local-first Digital Brain Platform (knowledge graph, durable memory, hybrid search, agents, portable encrypted brain archives)",
|
|
5
5
|
"homepage": "https://github.com/TaeSooPark-PTS/LatticeAI#readme",
|
|
6
6
|
"repository": {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
|
|
3
3
|
import { join } from "node:path";
|
|
4
4
|
|
|
5
5
|
const repo = join(import.meta.dirname, "..");
|
|
@@ -8,6 +8,17 @@ const nestedViteManifest = join(appDir, ".vite", "asset-manifest.json");
|
|
|
8
8
|
const publicManifest = join(appDir, "asset-manifest.json");
|
|
9
9
|
const pkg = JSON.parse(readFileSync(join(repo, "package.json"), "utf8"));
|
|
10
10
|
|
|
11
|
+
const assetsDir = join(appDir, "assets");
|
|
12
|
+
if (existsSync(assetsDir)) {
|
|
13
|
+
for (const name of readdirSync(assetsDir)) {
|
|
14
|
+
if (!/\.(?:css|js)$/.test(name)) continue;
|
|
15
|
+
const file = join(assetsDir, name);
|
|
16
|
+
const text = readFileSync(file, "utf8");
|
|
17
|
+
const normalized = text.replace(/[ \t]+$/gm, "");
|
|
18
|
+
if (normalized !== text) writeFileSync(file, normalized, "utf8");
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
11
22
|
const viteManifest = existsSync(nestedViteManifest) ? nestedViteManifest : publicManifest;
|
|
12
23
|
if (!existsSync(viteManifest)) {
|
|
13
24
|
console.error("Vite manifest missing. Run `vite build` before build_frontend_assets.mjs.");
|
package/src-tauri/Cargo.lock
CHANGED
package/src-tauri/Cargo.toml
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "4.
|
|
2
|
+
"version": "4.5.1",
|
|
3
3
|
"generated_at": "vite",
|
|
4
4
|
"entrypoints": {
|
|
5
5
|
"app": "/static/app/index.html"
|
|
6
6
|
},
|
|
7
7
|
"assets": {
|
|
8
8
|
"../node_modules/@tauri-apps/api/core.js": "/static/app/assets/core-CwxXejkd.js",
|
|
9
|
-
"index.html": "/static/app/assets/index-
|
|
10
|
-
"assets/index-
|
|
9
|
+
"index.html": "/static/app/assets/index-3G8qcrIS.js",
|
|
10
|
+
"assets/index-C0wYZp7k.css": "/static/app/assets/index-C0wYZp7k.css"
|
|
11
11
|
},
|
|
12
12
|
"vite": {
|
|
13
13
|
"../node_modules/@tauri-apps/api/core.js": {
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"isDynamicEntry": true
|
|
18
18
|
},
|
|
19
19
|
"index.html": {
|
|
20
|
-
"file": "assets/index-
|
|
20
|
+
"file": "assets/index-3G8qcrIS.js",
|
|
21
21
|
"name": "index",
|
|
22
22
|
"src": "index.html",
|
|
23
23
|
"isEntry": true,
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
"../node_modules/@tauri-apps/api/core.js"
|
|
26
26
|
],
|
|
27
27
|
"css": [
|
|
28
|
-
"assets/index-
|
|
28
|
+
"assets/index-C0wYZp7k.css"
|
|
29
29
|
]
|
|
30
30
|
}
|
|
31
31
|
}
|