ltcai 4.4.0 → 4.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +46 -18
  2. package/docs/CHANGELOG.md +85 -0
  3. package/docs/V4_5_0_GEMMA_RUNTIME_COMPATIBILITY_REPORT.md +49 -0
  4. package/docs/V4_5_0_GRAPH_UX_REPORT.md +34 -0
  5. package/docs/V4_5_0_MODEL_RUNTIME_UX_REPORT.md +40 -0
  6. package/docs/V4_5_0_ONBOARDING_REPORT.md +31 -0
  7. package/docs/V4_5_0_PRODUCT_EXPERIENCE_RECOVERY_REPORT.md +49 -0
  8. package/docs/V4_5_0_VALIDATION_REPORT.md +60 -0
  9. package/docs/V4_5_1_GRAPH_EXPERIENCE_REPORT.md +33 -0
  10. package/docs/V4_5_1_MODEL_EXPERIENCE_REPORT.md +37 -0
  11. package/docs/V4_5_1_NAVIGATION_REPORT.md +37 -0
  12. package/docs/V4_5_1_ONBOARDING_REPORT.md +29 -0
  13. package/docs/V4_5_1_PRODUCT_REIMAGINING_REPORT.md +61 -0
  14. package/docs/V4_5_1_RC_ARTIFACTS.md +44 -0
  15. package/docs/V4_5_1_UX_REPORT.md +45 -0
  16. package/docs/V4_5_1_VALIDATION_REPORT.md +54 -0
  17. package/docs/V4_5_1_VISUAL_DESIGN_REPORT.md +30 -0
  18. package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +16 -16
  19. package/docs/architecture.md +8 -4
  20. package/frontend/src/App.tsx +152 -91
  21. package/frontend/src/api/client.ts +83 -1
  22. package/frontend/src/components/FirstRunGuide.tsx +99 -0
  23. package/frontend/src/components/primitives.tsx +131 -25
  24. package/frontend/src/components/ui/badge.tsx +2 -2
  25. package/frontend/src/components/ui/button.tsx +7 -7
  26. package/frontend/src/components/ui/card.tsx +5 -5
  27. package/frontend/src/components/ui/input.tsx +1 -1
  28. package/frontend/src/components/ui/textarea.tsx +1 -1
  29. package/frontend/src/pages/Act.tsx +58 -28
  30. package/frontend/src/pages/Ask.tsx +51 -19
  31. package/frontend/src/pages/Brain.tsx +60 -42
  32. package/frontend/src/pages/Capture.tsx +24 -24
  33. package/frontend/src/pages/Library.tsx +222 -32
  34. package/frontend/src/pages/System.tsx +56 -34
  35. package/frontend/src/routes.ts +15 -13
  36. package/frontend/src/store/appStore.ts +8 -1
  37. package/frontend/src/styles.css +666 -36
  38. package/lattice_brain/__init__.py +1 -1
  39. package/lattice_brain/runtime/multi_agent.py +1 -1
  40. package/latticeai/__init__.py +1 -1
  41. package/latticeai/api/models.py +107 -18
  42. package/latticeai/core/marketplace.py +1 -1
  43. package/latticeai/core/model_compat.py +250 -0
  44. package/latticeai/core/workspace_os.py +1 -1
  45. package/latticeai/models/router.py +136 -32
  46. package/latticeai/services/model_catalog.py +2 -2
  47. package/latticeai/services/model_recommendation.py +8 -1
  48. package/latticeai/services/model_runtime.py +18 -3
  49. package/package.json +1 -1
  50. package/scripts/build_frontend_assets.mjs +12 -1
  51. package/src-tauri/Cargo.lock +1 -1
  52. package/src-tauri/Cargo.toml +1 -1
  53. package/src-tauri/tauri.conf.json +1 -1
  54. package/static/app/asset-manifest.json +5 -5
  55. package/static/app/assets/index-3G8qcrIS.js +336 -0
  56. package/static/app/assets/index-3G8qcrIS.js.map +1 -0
  57. package/static/app/assets/index-C0wYZp7k.css +2 -0
  58. package/static/app/index.html +2 -2
  59. package/static/app/assets/index-CHHal8Zl.css +0 -2
  60. package/static/app/assets/index-pdzil9ac.js +0 -333
  61. package/static/app/assets/index-pdzil9ac.js.map +0 -1
@@ -6,6 +6,7 @@ import asyncio
6
6
  import base64
7
7
  import gc
8
8
  import io
9
+ import json
9
10
  import os
10
11
  import re
11
12
  import time
@@ -29,15 +30,28 @@ executor = ThreadPoolExecutor(max_workers=1)
29
30
 
30
31
  try:
31
32
  import mlx.core as mx
33
+ except Exception as e:
34
+ mx = None
35
+ print(f"⚠️ MLX core unavailable: {e}")
36
+
37
+ try:
32
38
  from mlx_vlm import load as vlm_load
33
39
  VLM_AVAILABLE = True
34
40
  print("✅ MLX-VLM is ready for multimodal models.")
35
41
  except Exception as e:
36
- mx = None
37
42
  vlm_load = None
38
43
  VLM_AVAILABLE = False
39
44
  print(f"⚠️ MLX-VLM unavailable: {e}")
40
45
 
46
+ try:
47
+ from mlx_lm import load as lm_load
48
+ LM_AVAILABLE = True
49
+ print("✅ MLX-LM is ready for text fallback models.")
50
+ except Exception as e:
51
+ lm_load = None
52
+ LM_AVAILABLE = False
53
+ print(f"⚠️ MLX-LM unavailable: {e}")
54
+
41
55
  BRAND_NAME = "Lattice AI"
42
56
  LEGACY_BRAND_PATTERNS = [
43
57
  (re.compile(r"\bconnect\s+ai\b", re.IGNORECASE), BRAND_NAME),
@@ -236,20 +250,63 @@ def _resolve_local_hf_model(model_id: str) -> str:
236
250
  return str(local_dir)
237
251
  return model_id
238
252
 
253
+ def _is_gemma4_model_id(model_id: str) -> bool:
254
+ raw = str(model_id or "").lower()
255
+ return bool(re.search(r"gemma[-_/ ]?4|gemma4", raw))
256
+
257
+
258
+ def _local_model_type(path_or_model_id: str) -> Optional[str]:
259
+ raw = str(path_or_model_id or "").strip()
260
+ candidates = []
261
+ explicit = Path(raw).expanduser()
262
+ if raw and explicit.exists():
263
+ candidates.append(explicit / "config.json")
264
+ candidates.append(hf_model_dir(raw) / "config.json")
265
+ for config_path in candidates:
266
+ try:
267
+ if config_path.exists():
268
+ data = json.loads(config_path.read_text(encoding="utf-8"))
269
+ model_type = str(data.get("model_type") or "").strip().lower()
270
+ if model_type:
271
+ return model_type
272
+ except Exception as e:
273
+ print(f"⚠️ Model config read skipped for {config_path}: {e}")
274
+ return None
275
+
276
+
239
277
  def ensure_mlx_runtime() -> None:
240
- global mx, vlm_load, VLM_AVAILABLE
241
- if mx is not None and vlm_load is not None:
278
+ global mx, vlm_load, lm_load, VLM_AVAILABLE, LM_AVAILABLE
279
+ if mx is not None and (vlm_load is not None or lm_load is not None):
242
280
  return
281
+ errors = []
243
282
  try:
244
283
  import mlx.core as mlx_core
245
- from mlx_vlm import load as mlx_vlm_load
246
-
247
284
  mx = mlx_core
285
+ mx.set_default_device(mx.gpu)
286
+ except Exception as e:
287
+ errors.append(f"mlx: {e}")
288
+ mx = None
289
+
290
+ try:
291
+ from mlx_vlm import load as mlx_vlm_load
248
292
  vlm_load = mlx_vlm_load
249
293
  VLM_AVAILABLE = True
250
- mx.set_default_device(mx.gpu)
251
294
  except Exception as e:
252
- raise RuntimeError(f"MLX-VLM runtime is not available after install: {e}") from e
295
+ vlm_load = None
296
+ VLM_AVAILABLE = False
297
+ errors.append(f"mlx-vlm: {e}")
298
+
299
+ try:
300
+ from mlx_lm import load as mlx_lm_load
301
+ lm_load = mlx_lm_load
302
+ LM_AVAILABLE = True
303
+ except Exception as e:
304
+ lm_load = None
305
+ LM_AVAILABLE = False
306
+ errors.append(f"mlx-lm: {e}")
307
+
308
+ if mx is None or (vlm_load is None and lm_load is None):
309
+ raise RuntimeError(f"MLX runtime is not available after install: {'; '.join(errors)}")
253
310
 
254
311
  def _mlx_sampler(temperature: float):
255
312
  """Build an MLX sampler callable for the given temperature.
@@ -353,8 +410,8 @@ class LLMRouter:
353
410
  return self._load_cloud_model(provider, provider_model, api_key_override=api_key_override, owner=owner)
354
411
 
355
412
  ensure_mlx_runtime()
356
- if mx is None or vlm_load is None:
357
- raise RuntimeError("MLX-VLM is not available in this process. Run on Apple Silicon with Metal access.")
413
+ if mx is None or (vlm_load is None and lm_load is None):
414
+ raise RuntimeError("MLX is not available in this process. Run on Apple Silicon with Metal access.")
358
415
 
359
416
  cache_key = f"{model_id}_{draft_model_id}" if draft_model_id else model_id
360
417
  if cache_key in self._cache:
@@ -370,25 +427,43 @@ class LLMRouter:
370
427
 
371
428
  def _load():
372
429
  mx.set_default_device(mx.gpu)
373
- print(f"🔄 Loading Target (VLM Mode): {target_model_id}...")
374
- model, tokenizer = vlm_load(target_model_id)
430
+ is_gemma4 = _is_gemma4_model_id(model_id)
431
+ model_type = _local_model_type(target_model_id) or _local_model_type(model_id)
432
+ loader_kind = "mlx_vlm"
433
+
434
+ try:
435
+ if vlm_load is None:
436
+ raise RuntimeError("MLX-VLM is not installed.")
437
+ print(f"🔄 Loading Target (VLM Mode): {target_model_id}...")
438
+ model, tokenizer = vlm_load(target_model_id)
439
+ except Exception as vlm_error:
440
+ if not (is_gemma4 and model_type != "gemma4_unified" and lm_load is not None):
441
+ raise
442
+ print(f"⚠️ Gemma 4 MLX-VLM load failed; retrying MLX-LM text path: {vlm_error}")
443
+ print(f"🔄 Loading Target (LM Mode): {target_model_id}...")
444
+ model, tokenizer = lm_load(target_model_id)
445
+ loader_kind = "mlx_lm"
375
446
 
376
447
  draft_model = None
377
448
  if target_draft_model_id:
378
- print(f"🔄 Loading Assistant (VLM Mode): {target_draft_model_id}...")
379
- draft_model, _ = vlm_load(target_draft_model_id)
449
+ if loader_kind == "mlx_vlm":
450
+ print(f"🔄 Loading Assistant (VLM Mode): {target_draft_model_id}...")
451
+ draft_model, _ = vlm_load(target_draft_model_id)
452
+ elif lm_load is not None:
453
+ print(f"🔄 Loading Assistant (LM Mode): {target_draft_model_id}...")
454
+ draft_model, _ = lm_load(target_draft_model_id)
380
455
  print("✅ Assistant Ready.")
381
456
 
382
- return model, tokenizer, draft_model
457
+ return model, tokenizer, draft_model, loader_kind
383
458
 
384
459
  try:
385
460
  # Use the dedicated single-thread executor to ensure MLX GPU streams match during inference
386
- model, tokenizer, draft_model = await loop.run_in_executor(executor, _load)
387
- self._cache[cache_key] = (model, tokenizer, draft_model)
461
+ model, tokenizer, draft_model, loader_kind = await loop.run_in_executor(executor, _load)
462
+ self._cache[cache_key] = (model, tokenizer, draft_model, loader_kind)
388
463
  self._current = cache_key
389
464
  self._touch(cache_key)
390
- print(f"✅ Fully Loaded: {cache_key}")
391
- return f"Success: {cache_key}"
465
+ print(f"✅ Fully Loaded: {cache_key} ({loader_kind})")
466
+ return f"Success: {cache_key} ({loader_kind})"
392
467
  except Exception as e:
393
468
  print(f"❌ Load Error: {e}")
394
469
  raise e
@@ -510,6 +585,11 @@ class LLMRouter:
510
585
  print(f"⚠️ VLM chat template fallback: {e}")
511
586
  return self._build_prompt(message, context, processor)
512
587
 
588
+ def _unpack_local_cache(self, cached: Tuple) -> Tuple[object, object, object, str]:
589
+ model, tokenizer, draft_model = cached[:3]
590
+ loader_kind = str(cached[3]) if len(cached) > 3 else "mlx_vlm"
591
+ return model, tokenizer, draft_model, loader_kind
592
+
513
593
  async def generate_as(self, model_id: str | None, message: str, context: Optional[str] = None, max_tokens: int = 4096, temperature: float = 0.2) -> str:
514
594
  """Generate using a specific model, temporarily switching if needed. Falls back to current model if model_id is None or not loaded."""
515
595
  if not model_id or model_id == self._current:
@@ -531,16 +611,24 @@ class LLMRouter:
531
611
  if isinstance(cached, CloudModel):
532
612
  return await self._cloud_generate(cached, message, context, max_tokens, temperature)
533
613
 
534
- model, tokenizer, draft_model = self._cache[self._current]
535
- prompt = self._build_vlm_prompt(model, tokenizer, message, context, 1 if image_data else 0)
614
+ model, tokenizer, draft_model, loader_kind = self._unpack_local_cache(self._cache[self._current])
615
+ use_vlm = loader_kind == "mlx_vlm"
616
+ prompt = (
617
+ self._build_vlm_prompt(model, tokenizer, message, context, 1 if image_data else 0)
618
+ if use_vlm
619
+ else self._build_prompt(message, context, tokenizer)
620
+ )
536
621
 
537
622
  loop = asyncio.get_event_loop()
538
623
 
539
624
  def _gen():
540
625
  import mlx.core as mx
541
626
  mx.set_default_device(mx.gpu)
542
- from mlx_vlm import generate as vlm_gen
543
- return vlm_gen(model, tokenizer, prompt=prompt, image=self._prep_image(image_data) if image_data else None, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
627
+ if use_vlm:
628
+ from mlx_vlm import generate as vlm_gen
629
+ return vlm_gen(model, tokenizer, prompt=prompt, image=self._prep_image(image_data) if image_data else None, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
630
+ from mlx_lm import generate as lm_gen
631
+ return lm_gen(model, tokenizer, prompt=prompt, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model)
544
632
  result = await loop.run_in_executor(executor, _gen)
545
633
  # mlx-vlm might return a GenerationResult object; extract the text
546
634
  if hasattr(result, "text"):
@@ -577,8 +665,13 @@ class LLMRouter:
577
665
  yield chunk
578
666
  return
579
667
 
580
- model, tokenizer, draft_model = self._cache[self._current]
581
- prompt = self._build_vlm_prompt(model, tokenizer, message, context, 1 if image_data else 0)
668
+ model, tokenizer, draft_model, loader_kind = self._unpack_local_cache(self._cache[self._current])
669
+ use_vlm = loader_kind == "mlx_vlm"
670
+ prompt = (
671
+ self._build_vlm_prompt(model, tokenizer, message, context, 1 if image_data else 0)
672
+ if use_vlm
673
+ else self._build_prompt(message, context, tokenizer)
674
+ )
582
675
  loop = asyncio.get_event_loop()
583
676
  queue = asyncio.Queue()
584
677
 
@@ -586,8 +679,12 @@ class LLMRouter:
586
679
  import mlx.core as mx
587
680
  mx.set_default_device(mx.gpu)
588
681
  try:
589
- from mlx_vlm import stream_generate as vlm_stream
590
- gen = vlm_stream(model, tokenizer, prompt=prompt, image=self._prep_image(image_data) if image_data else None, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
682
+ if use_vlm:
683
+ from mlx_vlm import stream_generate as vlm_stream
684
+ gen = vlm_stream(model, tokenizer, prompt=prompt, image=self._prep_image(image_data) if image_data else None, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
685
+ else:
686
+ from mlx_lm import stream_generate as lm_stream
687
+ gen = lm_stream(model, tokenizer, prompt=prompt, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model)
591
688
 
592
689
  for chunk in gen:
593
690
  text = chunk.text if hasattr(chunk, "text") else (chunk[0] if isinstance(chunk, tuple) else str(chunk))
@@ -660,7 +757,7 @@ class LLMRouter:
660
757
  if isinstance(cached, CloudModel):
661
758
  return await self._cloud_generate_document(cached, message, system_prompt, max_tokens, temperature)
662
759
 
663
- model, tokenizer, draft_model = cached
760
+ model, tokenizer, draft_model, loader_kind = self._unpack_local_cache(cached)
664
761
  if hasattr(tokenizer, "apply_chat_template"):
665
762
  try:
666
763
  msgs = [
@@ -677,8 +774,11 @@ class LLMRouter:
677
774
  def _gen():
678
775
  import mlx.core as mx
679
776
  mx.set_default_device(mx.gpu)
680
- from mlx_vlm import generate as vlm_gen
681
- return vlm_gen(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
777
+ if loader_kind == "mlx_vlm":
778
+ from mlx_vlm import generate as vlm_gen
779
+ return vlm_gen(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
780
+ from mlx_lm import generate as lm_gen
781
+ return lm_gen(model, tokenizer, prompt=prompt, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model)
682
782
  result = await loop.run_in_executor(executor, _gen)
683
783
  if hasattr(result, "text"):
684
784
  return normalize_branding(result.text)
@@ -719,7 +819,7 @@ class LLMRouter:
719
819
  yield chunk
720
820
  return
721
821
 
722
- model, tokenizer, draft_model = cached
822
+ model, tokenizer, draft_model, loader_kind = self._unpack_local_cache(cached)
723
823
  if hasattr(tokenizer, "apply_chat_template"):
724
824
  try:
725
825
  msgs = [
@@ -739,8 +839,12 @@ class LLMRouter:
739
839
  import mlx.core as mx
740
840
  mx.set_default_device(mx.gpu)
741
841
  try:
742
- from mlx_vlm import stream_generate as vlm_stream
743
- gen = vlm_stream(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
842
+ if loader_kind == "mlx_vlm":
843
+ from mlx_vlm import stream_generate as vlm_stream
844
+ gen = vlm_stream(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
845
+ else:
846
+ from mlx_lm import stream_generate as lm_stream
847
+ gen = lm_stream(model, tokenizer, prompt=prompt, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model)
744
848
  for chunk in gen:
745
849
  text = chunk.text if hasattr(chunk, "text") else (chunk[0] if isinstance(chunk, tuple) else str(chunk))
746
850
  loop.call_soon_threadsafe(queue.put_nowait, text)
@@ -17,8 +17,8 @@ from typing import Dict, List, Optional
17
17
 
18
18
  ENGINE_INSTALLERS = {
19
19
  "local_mlx": {
20
- "command": [sys.executable, "-m", "pip", "install", "--upgrade", "mlx-vlm", "huggingface_hub[cli]"],
21
- "label": "Install MLX-VLM runtime",
20
+ "command": [sys.executable, "-m", "pip", "install", "--upgrade", "mlx-vlm>=0.6.3", "mlx-lm", "huggingface_hub[cli]"],
21
+ "label": "Install MLX runtime",
22
22
  },
23
23
  "openai": {
24
24
  "command": [sys.executable, "-m", "pip", "install", "openai"],
@@ -18,6 +18,7 @@ from __future__ import annotations
18
18
  import re
19
19
  from typing import Any, Dict, List, Optional
20
20
 
21
+ from latticeai.core.model_compat import model_runtime_compatibility
21
22
  from latticeai.services.model_catalog import ENGINE_MODEL_CATALOG
22
23
 
23
24
  # ── status vocabulary ─────────────────────────────────────────────────────────
@@ -85,14 +86,19 @@ def _engine_available(engine: str, profile: Dict[str, Any]) -> bool:
85
86
  def _classify_one(
86
87
  model: Dict[str, Any],
87
88
  *,
89
+ engine: str,
88
90
  engine_available: bool,
89
91
  ram_gb: float,
90
92
  ) -> Dict[str, Any]:
91
93
  size_gb = parse_size_gb(model.get("size"))
92
94
  need_gb = estimated_ram_gb(size_gb) if size_gb is not None else None
95
+ runtime = model_runtime_compatibility(str(model.get("id") or ""), engine=engine)
93
96
 
94
97
  if not engine_available:
95
98
  status, reason = NOT_RECOMMENDED, "Apple Silicon과 MLX-VLM이 필요합니다"
99
+ elif runtime.get("supported") is False:
100
+ status = NOT_RECOMMENDED
101
+ reason = str(runtime.get("user_message") or "이 모델은 현재 설치된 실행 런타임에서 지원되지 않습니다")
96
102
  elif need_gb is None:
97
103
  # Tool-managed/pull models have no fixed on-disk size, so treat them as
98
104
  # compatible and let the execution tool validate the exact model.
@@ -124,6 +130,7 @@ def _classify_one(
124
130
  "run_location": model.get("run_location"),
125
131
  "internet_requirement": model.get("internet_requirement"),
126
132
  "source_display_order": model.get("source_display_order"),
133
+ "runtime_compatibility": runtime,
127
134
  }
128
135
 
129
136
 
@@ -148,7 +155,7 @@ def recommend_catalog(profile: Dict[str, Any], *, engine: str = "local_mlx") ->
148
155
  ram_gb = _ram_gb(profile)
149
156
 
150
157
  classified = [
151
- _classify_one(m, engine_available=engine_available, ram_gb=ram_gb)
158
+ _classify_one(m, engine=engine, engine_available=engine_available, ram_gb=ram_gb)
152
159
  for m in models
153
160
  ]
154
161
 
@@ -40,6 +40,8 @@ from latticeai.core.model_compat import (
40
40
  classify_smoke_response as _classify_smoke_response,
41
41
  ensure_profile as _ensure_compat_profile,
42
42
  fast_postprocess as _compat_fast_postprocess,
43
+ friendly_model_runtime_error as _friendly_model_runtime_error,
44
+ model_runtime_compatibility as _model_runtime_compatibility,
43
45
  record_smoke_result as _record_smoke_result,
44
46
  )
45
47
  from latticeai.core.model_resolution import ModelResolution as _ModelResolution
@@ -931,7 +933,10 @@ def ensure_llamacpp_server(model_name: str) -> None:
931
933
 
932
934
  def engine_installed(engine: str) -> bool:
933
935
  if engine == "local_mlx":
934
- return bool(importlib.util.find_spec("mlx") and importlib.util.find_spec("mlx_vlm"))
936
+ return bool(
937
+ importlib.util.find_spec("mlx")
938
+ and (importlib.util.find_spec("mlx_vlm") or importlib.util.find_spec("mlx_lm"))
939
+ )
935
940
  if engine == "ollama":
936
941
  return local_binary("ollama") is not None
937
942
  if engine == "vllm":
@@ -1042,7 +1047,7 @@ def engine_status() -> List[Dict]:
1042
1047
  "id": "local_mlx",
1043
1048
  "name": "MLX",
1044
1049
  "kind": "local",
1045
- "description": "Apple Silicon GPU에서 MLX/MLX-VLM 모델을 직접 실행합니다.",
1050
+ "description": "Apple Silicon GPU에서 MLX-VLM 모델을 직접 실행하고, Gemma 4는 필요 시 MLX-LM 텍스트 경로로 재시도합니다.",
1046
1051
  "installed": engine_installed("local_mlx"),
1047
1052
  "installable": True,
1048
1053
  "install_label": ENGINE_INSTALLERS["local_mlx"]["label"],
@@ -1369,6 +1374,9 @@ async def prepare_and_load_model(
1369
1374
  parsed_provider, parsed_model = parse_model_ref(model_id)
1370
1375
  if parsed_provider == "mlx":
1371
1376
  parsed_provider = "local_mlx"
1377
+ compatibility = _model_runtime_compatibility(parsed_model, engine=parsed_provider)
1378
+ if compatibility.get("supported") is False:
1379
+ raise HTTPException(status_code=400, detail=compatibility)
1372
1380
 
1373
1381
  local_engines = {"local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"}
1374
1382
  install_result: Dict[str, object] = {}
@@ -1488,6 +1496,9 @@ async def prepare_and_load_model_stream(
1488
1496
  parsed_provider, parsed_model = parse_model_ref(model_id)
1489
1497
  if parsed_provider == "mlx":
1490
1498
  parsed_provider = "local_mlx"
1499
+ compatibility = _model_runtime_compatibility(parsed_model, engine=parsed_provider)
1500
+ if compatibility.get("supported") is False:
1501
+ raise HTTPException(status_code=400, detail=compatibility)
1491
1502
 
1492
1503
  work_queue: "queue.Queue[Dict[str, object]]" = queue.Queue()
1493
1504
  work_result: Dict[str, object] = {}
@@ -1651,7 +1662,11 @@ async def prepare_and_load_model_stream(
1651
1662
  work_queue.put({"kind": "error", "status_code": exc.status_code, "detail": exc.detail})
1652
1663
  except Exception as exc:
1653
1664
  logging.exception("model prepare stream worker failed")
1654
- work_queue.put({"kind": "error", "status_code": 500, "detail": str(exc)[-2000:]})
1665
+ work_queue.put({
1666
+ "kind": "error",
1667
+ "status_code": 500,
1668
+ "detail": _friendly_model_runtime_error(exc, model_id=model_id, engine=parsed_provider),
1669
+ })
1655
1670
 
1656
1671
  worker = threading.Thread(target=blocking_prepare, daemon=True)
1657
1672
  worker.start()
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ltcai",
3
- "version": "4.4.0",
3
+ "version": "4.5.1",
4
4
  "description": "Lattice AI — local-first Digital Brain Platform (knowledge graph, durable memory, hybrid search, agents, portable encrypted brain archives)",
5
5
  "homepage": "https://github.com/TaeSooPark-PTS/LatticeAI#readme",
6
6
  "repository": {
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { existsSync, readFileSync, writeFileSync } from "node:fs";
2
+ import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
3
3
  import { join } from "node:path";
4
4
 
5
5
  const repo = join(import.meta.dirname, "..");
@@ -8,6 +8,17 @@ const nestedViteManifest = join(appDir, ".vite", "asset-manifest.json");
8
8
  const publicManifest = join(appDir, "asset-manifest.json");
9
9
  const pkg = JSON.parse(readFileSync(join(repo, "package.json"), "utf8"));
10
10
 
11
+ const assetsDir = join(appDir, "assets");
12
+ if (existsSync(assetsDir)) {
13
+ for (const name of readdirSync(assetsDir)) {
14
+ if (!/\.(?:css|js)$/.test(name)) continue;
15
+ const file = join(assetsDir, name);
16
+ const text = readFileSync(file, "utf8");
17
+ const normalized = text.replace(/[ \t]+$/gm, "");
18
+ if (normalized !== text) writeFileSync(file, normalized, "utf8");
19
+ }
20
+ }
21
+
11
22
  const viteManifest = existsSync(nestedViteManifest) ? nestedViteManifest : publicManifest;
12
23
  if (!existsSync(viteManifest)) {
13
24
  console.error("Vite manifest missing. Run `vite build` before build_frontend_assets.mjs.");
@@ -1654,7 +1654,7 @@ dependencies = [
1654
1654
 
1655
1655
  [[package]]
1656
1656
  name = "lattice-ai-desktop"
1657
- version = "4.4.0"
1657
+ version = "4.5.1"
1658
1658
  dependencies = [
1659
1659
  "plist",
1660
1660
  "serde",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "lattice-ai-desktop"
3
- version = "4.4.0"
3
+ version = "4.5.1"
4
4
  description = "Lattice AI Digital Brain desktop shell"
5
5
  authors = ["TaeSoo Park"]
6
6
  edition = "2021"
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "https://schema.tauri.app/config/2",
3
3
  "productName": "Lattice AI",
4
- "version": "4.4.0",
4
+ "version": "4.5.1",
5
5
  "identifier": "ai.lattice.desktop",
6
6
  "build": {
7
7
  "beforeDevCommand": "npm run frontend:dev",
@@ -1,13 +1,13 @@
1
1
  {
2
- "version": "4.4.0",
2
+ "version": "4.5.1",
3
3
  "generated_at": "vite",
4
4
  "entrypoints": {
5
5
  "app": "/static/app/index.html"
6
6
  },
7
7
  "assets": {
8
8
  "../node_modules/@tauri-apps/api/core.js": "/static/app/assets/core-CwxXejkd.js",
9
- "index.html": "/static/app/assets/index-pdzil9ac.js",
10
- "assets/index-CHHal8Zl.css": "/static/app/assets/index-CHHal8Zl.css"
9
+ "index.html": "/static/app/assets/index-3G8qcrIS.js",
10
+ "assets/index-C0wYZp7k.css": "/static/app/assets/index-C0wYZp7k.css"
11
11
  },
12
12
  "vite": {
13
13
  "../node_modules/@tauri-apps/api/core.js": {
@@ -17,7 +17,7 @@
17
17
  "isDynamicEntry": true
18
18
  },
19
19
  "index.html": {
20
- "file": "assets/index-pdzil9ac.js",
20
+ "file": "assets/index-3G8qcrIS.js",
21
21
  "name": "index",
22
22
  "src": "index.html",
23
23
  "isEntry": true,
@@ -25,7 +25,7 @@
25
25
  "../node_modules/@tauri-apps/api/core.js"
26
26
  ],
27
27
  "css": [
28
- "assets/index-CHHal8Zl.css"
28
+ "assets/index-C0wYZp7k.css"
29
29
  ]
30
30
  }
31
31
  }