ltcai 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,14 +23,23 @@ from pathlib import Path
23
23
  from typing import Any, Dict, Iterable, List, Optional, Tuple
24
24
 
25
25
  try:
26
- from kg_schema import KGStoreV2
26
+ from kg_schema import KGStoreV2, NodeType, EdgeType, _exec_script
27
27
  except Exception: # pragma: no cover - v2 schema is optional at import time
28
28
  KGStoreV2 = None # type: ignore[assignment]
29
+ NodeType = None # type: ignore[assignment]
30
+ EdgeType = None # type: ignore[assignment]
31
+ _exec_script = None # type: ignore[assignment]
29
32
 
30
33
  # Default read source for the graph queries: v2 reconstruction views.
31
34
  # Override with LATTICEAI_KG_READ_V2=0 to fall back to the legacy tables.
32
35
  _READ_FROM_V2_DEFAULT = os.getenv("LATTICEAI_KG_READ_V2", "1") != "0"
33
36
 
37
+ # Bump when the v2 projection layout changes (columns, normalization rules).
38
+ # On init, a stale projection is dropped and rebuilt from the authoritative
39
+ # legacy tables — safe because nodes_v2/edges_v2 only ever hold a derived view.
40
+ # v4: summary nullable + verbatim (byte-faithful) projection of legacy values.
41
+ _PROJECTION_VERSION = 4
42
+
34
43
  _llm_router_ref = None
35
44
 
36
45
  def set_llm_router(router_instance):
@@ -916,133 +925,192 @@ class KnowledgeGraphStore:
916
925
  )
917
926
  self._init_v2_schema()
918
927
 
919
- # SQL views that reconstruct the *exact* legacy row shape on top of the v2
920
- # tables, so the read methods can run unchanged against either source. The
921
- # projection (see _v2_project_node/_edge) stashes summary + the original
922
- # metadata_json + (via the type column) the legacy type string, so these
923
- # views are byte-faithful to the legacy nodes/edges tables.
928
+ # SQL views that reconstruct the *exact* legacy row shape on top of the
929
+ # normalized v2 tables, so the read methods run unchanged against either
930
+ # source. The projection stores the raw legacy type string in ``legacy_type``
931
+ # and promotes summary + metadata to first-class columns (no more
932
+ # ``attrs._kg`` passthrough / ``evidence`` abuse), so these views are
933
+ # byte-faithful to the legacy nodes/edges tables.
924
934
  _V2_VIEWS_SQL = """
925
935
  CREATE VIEW IF NOT EXISTS kgv2_nodes AS
926
- SELECT id, type,
936
+ SELECT id,
937
+ COALESCE(legacy_type, type) AS type,
927
938
  label AS title,
928
- COALESCE(json_extract(attrs, '$._kg.summary'), '') AS summary,
929
- COALESCE(json_extract(attrs, '$._kg.metadata_json'), '{}') AS metadata_json,
939
+ summary,
940
+ attrs AS metadata_json,
930
941
  created_at, updated_at
931
942
  FROM nodes_v2;
932
943
  CREATE VIEW IF NOT EXISTS kgv2_edges AS
933
- SELECT id, source AS from_node, target AS to_node, type, weight,
934
- COALESCE(evidence, '{}') AS metadata_json, created_at
944
+ SELECT id, source AS from_node, target AS to_node,
945
+ COALESCE(legacy_type, type) AS type,
946
+ weight,
947
+ metadata AS metadata_json,
948
+ created_at
935
949
  FROM edges_v2;
936
950
  """
937
951
 
938
952
  def _init_v2_schema(self) -> None:
939
- """Initialize the v2 tables + reconstruction views and backfill from legacy.
940
-
941
- Completes the v2 migration: both write (dual-write projection in
942
- _upsert_node/_upsert_edge) and read (read methods route to the kgv2_*
943
- views when ``_READ_FROM_V2`` is on) flow through the v2 tables. Legacy
944
- nodes/edges are retained as the durable source until the v2 path bakes in.
953
+ """Initialize the normalized v2 tables + reconstruction views, migrating
954
+ the projection layout when it is stale — **atomically**.
955
+
956
+ The entire DROP CREATE VIEWS BACKFILL → version-stamp sequence runs
957
+ in a single transaction on one connection: on any failure it rolls back,
958
+ leaving the prior projection untouched and the version unchanged, so the
959
+ next startup simply retries. The migration only ever touches the v2
960
+ tables/views and the ``projection_version`` key — never the authoritative
961
+ legacy ``nodes``/``edges`` — so legacy data cannot be corrupted even if
962
+ the rebuild fails midway.
945
963
  """
946
- if KGStoreV2 is None:
964
+ if KGStoreV2 is None or _exec_script is None:
947
965
  return
948
966
  try:
949
- KGStoreV2(self.db_path).init_schema()
950
967
  with self._connect() as conn:
951
- conn.executescript(self._V2_VIEWS_SQL)
952
- self._backfill_v2_if_needed()
968
+ conn.execute("BEGIN")
969
+ stale = self._projection_version(conn) != _PROJECTION_VERSION
970
+ if stale:
971
+ # The projection is non-authoritative; drop it so init_schema
972
+ # recreates the tables with the current normalized columns.
973
+ for stmt in (
974
+ "DROP VIEW IF EXISTS kgv2_edges",
975
+ "DROP VIEW IF EXISTS kgv2_nodes",
976
+ "DROP TABLE IF EXISTS edges_v2",
977
+ "DROP TABLE IF EXISTS nodes_v2",
978
+ ):
979
+ conn.execute(stmt)
980
+ # init_schema(conn=...) joins this transaction (no implicit commit)
981
+ KGStoreV2(self.db_path).init_schema(conn=conn)
982
+ _exec_script(conn, self._V2_VIEWS_SQL)
983
+ self._backfill_v2_on(conn, force=stale)
984
+ # version stamp commits together with the backfill — never stranded
985
+ conn.execute(
986
+ "INSERT OR REPLACE INTO kg_meta(key, value) VALUES ('projection_version', ?)",
987
+ (str(_PROJECTION_VERSION),),
988
+ )
953
989
  except Exception as e:
954
990
  logging.warning("knowledge_graph: v2 schema init/backfill skipped: %s", e)
955
991
 
956
- def _backfill_v2_if_needed(self) -> None:
957
- """Project legacy nodes/edges into the v2 tables when v2 is empty or stale.
992
+ def _projection_version(self, conn: sqlite3.Connection) -> int:
993
+ """Return the stored v2 projection layout version (0 if unknown).
994
+
995
+ A fresh DB (kg_meta absent) raises ``sqlite3.OperationalError`` here and
996
+ is correctly treated as version 0 → rebuild. Only sqlite errors are
997
+ swallowed so a real bug doesn't masquerade as a stale projection.
998
+ """
999
+ try:
1000
+ row = conn.execute(
1001
+ "SELECT value FROM kg_meta WHERE key='projection_version'"
1002
+ ).fetchone()
1003
+ return int(row["value"]) if row and row["value"] is not None else 0
1004
+ except sqlite3.Error:
1005
+ return 0
1006
+
1007
+ def _backfill_v2_if_needed(self, *, force: bool = False) -> None:
1008
+ """Project legacy nodes/edges into v2 on a fresh transaction.
958
1009
 
959
- Non-destructive to legacy. Reprojects when the v2 rows predate the
960
- ``_kg`` reconstruction blob (older enum-only backfill), so the views
961
- stay faithful. Idempotent: no-ops once v2 carries the current projection.
1010
+ Thin wrapper around :meth:`_backfill_v2_on` for callers (tests, ad-hoc
1011
+ re-sync) that aren't already inside the migration transaction.
962
1012
  """
963
1013
  try:
964
1014
  with self._connect() as conn:
965
- v2_nodes = conn.execute("SELECT COUNT(*) FROM nodes_v2").fetchone()[0]
966
- legacy_nodes = conn.execute("SELECT COUNT(*) FROM nodes").fetchone()[0]
967
- if legacy_nodes == 0:
968
- return
969
- if v2_nodes > 0:
970
- has_kg = conn.execute(
971
- "SELECT COUNT(*) FROM nodes_v2 WHERE json_extract(attrs,'$._kg') IS NOT NULL"
972
- ).fetchone()[0]
973
- if has_kg > 0:
974
- return # current projection already present
975
- # (re)project: clear v2 graph (not authoritative) and rebuild
976
- conn.execute("DELETE FROM edges_v2")
977
- conn.execute("DELETE FROM nodes_v2")
978
- n = e = 0
979
- for r in conn.execute(
980
- "SELECT id, type, title, summary, metadata_json, created_at, updated_at FROM nodes"
981
- ).fetchall():
982
- self._v2_project_node(
983
- conn, r["id"], r["type"], r["title"] or "", r["summary"] or "",
984
- _safe_loads(r["metadata_json"]),
985
- created_at=r["created_at"], updated_at=r["updated_at"],
986
- )
987
- n += 1
988
- for r in conn.execute(
989
- "SELECT id, from_node, to_node, type, weight, metadata_json, created_at FROM edges"
990
- ).fetchall():
991
- self._v2_project_edge(
992
- conn, r["from_node"], r["to_node"], r["type"], float(r["weight"] or 1.0),
993
- _safe_loads(r["metadata_json"]), edge_id=r["id"], created_at=r["created_at"],
994
- )
995
- e += 1
996
- logging.info("knowledge_graph: projected legacy → v2 (%d nodes, %d edges)", n, e)
1015
+ self._backfill_v2_on(conn, force=force)
997
1016
  except Exception as ex:
998
1017
  logging.warning("knowledge_graph: v2 backfill skipped: %s", ex)
999
1018
 
1000
- # ── v2 dual-write projection (legacy types + summary/metadata in attrs._kg) ──
1019
+ def _backfill_v2_on(self, conn: sqlite3.Connection, *, force: bool = False) -> None:
1020
+ """Project legacy nodes/edges into the normalized v2 tables on ``conn``.
1021
+
1022
+ Non-destructive to legacy. ``force`` rebuilds unconditionally (used after
1023
+ a layout migration); otherwise it only projects when v2 is empty. The v2
1024
+ graph is a derived projection, so clearing + rebuilding it is always safe.
1025
+ Idempotent: no-ops once v2 carries the current projection. Copies the
1026
+ legacy column values **verbatim** so the kgv2_* views are byte-faithful.
1027
+ """
1028
+ legacy_nodes = conn.execute("SELECT COUNT(*) FROM nodes").fetchone()[0]
1029
+ if legacy_nodes == 0:
1030
+ return
1031
+ v2_nodes = conn.execute("SELECT COUNT(*) FROM nodes_v2").fetchone()[0]
1032
+ if v2_nodes > 0 and not force:
1033
+ return # current projection already present
1034
+ # (re)project: clear v2 graph (not authoritative) and rebuild
1035
+ conn.execute("DELETE FROM edges_v2")
1036
+ conn.execute("DELETE FROM nodes_v2")
1037
+ n = e = 0
1038
+ for r in conn.execute(
1039
+ "SELECT id, type, title, summary, metadata_json, created_at, updated_at FROM nodes"
1040
+ ).fetchall():
1041
+ self._v2_project_node(
1042
+ conn, r["id"], r["type"], r["title"], r["summary"], r["metadata_json"],
1043
+ created_at=r["created_at"], updated_at=r["updated_at"],
1044
+ )
1045
+ n += 1
1046
+ for r in conn.execute(
1047
+ "SELECT id, from_node, to_node, type, weight, metadata_json, created_at FROM edges"
1048
+ ).fetchall():
1049
+ self._v2_project_edge(
1050
+ conn, r["from_node"], r["to_node"], r["type"], float(r["weight"] or 1.0),
1051
+ r["metadata_json"], edge_id=r["id"], created_at=r["created_at"],
1052
+ )
1053
+ e += 1
1054
+ logging.info("knowledge_graph: projected legacy → v2 (%d nodes, %d edges)", n, e)
1055
+
1056
+ # ── v2 dual-write projection (normalized type, byte-faithful legacy values) ──
1057
+ # The projection stores the legacy ``title``/``summary``/``metadata_json``
1058
+ # values it is handed VERBATIM (no truncation or JSON re-encoding) so the
1059
+ # kgv2_* views reproduce the legacy rows exactly. Callers (_upsert_* and the
1060
+ # backfill) pass the already-canonical legacy column values.
1001
1061
  def _v2_project_node(
1002
1062
  self, conn: sqlite3.Connection, node_id: str, node_type: str, title: str,
1003
- summary: str, metadata: Optional[Dict[str, Any]],
1063
+ summary: Optional[str], metadata_json: Optional[str],
1004
1064
  *, created_at: Optional[str] = None, updated_at: Optional[str] = None,
1005
1065
  ) -> None:
1006
1066
  if KGStoreV2 is None:
1007
1067
  return
1008
1068
  ts = updated_at or _now()
1009
- attrs = _json({"_kg": {"summary": (summary or "")[:1000], "metadata_json": _json(metadata)}})
1069
+ norm_type = NodeType.from_legacy(node_type).value if NodeType is not None else node_type
1010
1070
  try:
1011
1071
  conn.execute(
1012
1072
  """
1013
- INSERT INTO nodes_v2(id, type, label, attrs, owner_id, visibility,
1014
- created_at, updated_at, importance_score)
1015
- VALUES (?, ?, ?, ?, NULL, 'private', ?, ?, 0.0)
1073
+ INSERT INTO nodes_v2(id, type, legacy_type, label, summary, attrs,
1074
+ owner_id, visibility, created_at, updated_at,
1075
+ importance_score)
1076
+ VALUES (?, ?, ?, ?, ?, ?, NULL, 'private', ?, ?, 0.0)
1016
1077
  ON CONFLICT(id) DO UPDATE SET
1017
- type=excluded.type, label=excluded.label,
1078
+ type=excluded.type, legacy_type=excluded.legacy_type,
1079
+ label=excluded.label, summary=excluded.summary,
1018
1080
  attrs=excluded.attrs, updated_at=excluded.updated_at
1019
1081
  """,
1020
- (node_id, node_type, (title or "")[:240], attrs, created_at or ts, ts),
1082
+ (node_id, norm_type, node_type, title, summary,
1083
+ metadata_json if metadata_json is not None else "{}",
1084
+ created_at or ts, ts),
1021
1085
  )
1022
1086
  except Exception as ex:
1023
1087
  logging.debug("knowledge_graph: v2 node projection skipped (%s): %s", node_id, ex)
1024
1088
 
1025
1089
  def _v2_project_edge(
1026
1090
  self, conn: sqlite3.Connection, from_node: str, to_node: str, edge_type: str,
1027
- weight: float, metadata: Optional[Dict[str, Any]],
1091
+ weight: float, metadata_json: Optional[str],
1028
1092
  *, edge_id: Optional[str] = None, created_at: Optional[str] = None,
1029
1093
  ) -> None:
1030
1094
  if KGStoreV2 is None:
1031
1095
  return
1032
- meta = metadata or {}
1033
1096
  eid = edge_id or f"edge:{_sha256_text(f'{from_node}|{edge_type}|{to_node}')[:24]}"
1097
+ norm_type = EdgeType.from_legacy(edge_type).value if EdgeType is not None else edge_type
1098
+ meta_str = metadata_json if metadata_json is not None else "{}"
1099
+ confidence = float(_safe_loads(meta_str).get("confidence", 1.0))
1034
1100
  try:
1035
1101
  conn.execute(
1036
1102
  """
1037
- INSERT INTO edges_v2(id, source, target, type, weight, confidence,
1038
- evidence, created_by, created_at)
1039
- VALUES (?, ?, ?, ?, ?, ?, ?, 'legacy', ?)
1040
- ON CONFLICT(source, target, type) DO UPDATE SET
1103
+ INSERT INTO edges_v2(id, source, target, type, legacy_type, weight,
1104
+ confidence, evidence, metadata, created_by, created_at)
1105
+ VALUES (?, ?, ?, ?, ?, ?, ?, '[]', ?, 'legacy', ?)
1106
+ ON CONFLICT(source, target, legacy_type) DO UPDATE SET
1107
+ type=excluded.type,
1041
1108
  weight=max(edges_v2.weight, excluded.weight),
1042
- evidence=excluded.evidence
1109
+ confidence=excluded.confidence,
1110
+ metadata=excluded.metadata
1043
1111
  """,
1044
- (eid, from_node, to_node, edge_type, float(weight),
1045
- float(meta.get("confidence", 1.0)), _json(meta), created_at or _now()),
1112
+ (eid, from_node, to_node, norm_type, edge_type, float(weight),
1113
+ confidence, meta_str, created_at or _now()),
1046
1114
  )
1047
1115
  except Exception as ex:
1048
1116
  logging.debug("knowledge_graph: v2 edge projection skipped (%s->%s): %s", from_node, to_node, ex)
@@ -1069,6 +1137,35 @@ class KnowledgeGraphStore:
1069
1137
  except Exception as ex:
1070
1138
  logging.debug("knowledge_graph: v2 edge delete mirror skipped: %s", ex)
1071
1139
 
1140
+ def _v2_sync_report(self) -> Dict[str, Any]:
1141
+ """Diagnose the dual-write invariant: legacy node/edge id sets must equal
1142
+ the v2 projection's. Returns counts + any drift (ids missing from / extra
1143
+ in v2). ``in_sync`` is True only when both id sets match exactly.
1144
+
1145
+ All legacy writes go through _upsert_node/_upsert_edge (which dual-write)
1146
+ and every legacy delete is mirrored, so a non-empty drift signals a
1147
+ bypassed write path — this is the runtime guard for that invariant.
1148
+ """
1149
+ if KGStoreV2 is None:
1150
+ return {"available": False, "in_sync": True}
1151
+ with self._connect() as conn:
1152
+ legacy_nodes = {r[0] for r in conn.execute("SELECT id FROM nodes")}
1153
+ v2_nodes = {r[0] for r in conn.execute("SELECT id FROM nodes_v2")}
1154
+ legacy_edges = {r[0] for r in conn.execute("SELECT id FROM edges")}
1155
+ v2_edges = {r[0] for r in conn.execute("SELECT id FROM edges_v2")}
1156
+ return {
1157
+ "available": True,
1158
+ "in_sync": legacy_nodes == v2_nodes and legacy_edges == v2_edges,
1159
+ "nodes_legacy": len(legacy_nodes),
1160
+ "nodes_v2": len(v2_nodes),
1161
+ "edges_legacy": len(legacy_edges),
1162
+ "edges_v2": len(v2_edges),
1163
+ "nodes_missing_from_v2": sorted(legacy_nodes - v2_nodes),
1164
+ "nodes_extra_in_v2": sorted(v2_nodes - legacy_nodes),
1165
+ "edges_missing_from_v2": sorted(legacy_edges - v2_edges),
1166
+ "edges_extra_in_v2": sorted(v2_edges - legacy_edges),
1167
+ }
1168
+
1072
1169
  def _upsert_node(
1073
1170
  self,
1074
1171
  conn: sqlite3.Connection,
@@ -1080,6 +1177,11 @@ class KnowledgeGraphStore:
1080
1177
  raw: Optional[Dict[str, Any]] = None,
1081
1178
  ) -> str:
1082
1179
  now = _now()
1180
+ # Canonical stored values, computed once and shared with the v2
1181
+ # projection so legacy and v2 hold byte-identical strings.
1182
+ title_s = title[:240]
1183
+ summary_s = summary[:1000]
1184
+ meta_json = _json(metadata)
1083
1185
  conn.execute(
1084
1186
  """
1085
1187
  INSERT INTO nodes(id, type, title, summary, metadata_json, raw_json, created_at, updated_at)
@@ -1091,10 +1193,10 @@ class KnowledgeGraphStore:
1091
1193
  raw_json=excluded.raw_json,
1092
1194
  updated_at=excluded.updated_at
1093
1195
  """,
1094
- (node_id, node_type, title[:240], summary[:1000], _json(metadata), _json(raw), now, now),
1196
+ (node_id, node_type, title_s, summary_s, meta_json, _json(raw), now, now),
1095
1197
  )
1096
1198
  # dual-write: project into the v2 graph on the same transaction
1097
- self._v2_project_node(conn, node_id, node_type, title, summary, metadata,
1199
+ self._v2_project_node(conn, node_id, node_type, title_s, summary_s, meta_json,
1098
1200
  created_at=now, updated_at=now)
1099
1201
  return node_id
1100
1202
 
@@ -1109,6 +1211,7 @@ class KnowledgeGraphStore:
1109
1211
  ) -> str:
1110
1212
  edge_id = f"edge:{_sha256_text(f'{from_node}|{edge_type}|{to_node}')[:24]}"
1111
1213
  now = _now()
1214
+ meta_json = _json(metadata) # canonical string shared with the projection
1112
1215
  conn.execute(
1113
1216
  """
1114
1217
  INSERT INTO edges(id, from_node, to_node, type, weight, metadata_json, created_at)
@@ -1117,10 +1220,10 @@ class KnowledgeGraphStore:
1117
1220
  weight=max(edges.weight, excluded.weight),
1118
1221
  metadata_json=excluded.metadata_json
1119
1222
  """,
1120
- (edge_id, from_node, to_node, edge_type, float(weight), _json(metadata), now),
1223
+ (edge_id, from_node, to_node, edge_type, float(weight), meta_json, now),
1121
1224
  )
1122
1225
  # dual-write: project into the v2 graph on the same transaction
1123
- self._v2_project_edge(conn, from_node, to_node, edge_type, float(weight), metadata,
1226
+ self._v2_project_edge(conn, from_node, to_node, edge_type, float(weight), meta_json,
1124
1227
  edge_id=edge_id, created_at=now)
1125
1228
  return edge_id
1126
1229
 
@@ -3072,7 +3175,7 @@ class KnowledgeGraphStore:
3072
3175
  conn.execute(
3073
3176
  """
3074
3177
  DELETE FROM nodes_v2
3075
- WHERE type='Topic'
3178
+ WHERE legacy_type='Topic'
3076
3179
  AND id NOT IN (SELECT target FROM edges_v2)
3077
3180
  AND id NOT IN (SELECT source FROM edges_v2)
3078
3181
  """
package/llm_router.py CHANGED
@@ -227,6 +227,18 @@ def ensure_mlx_runtime() -> None:
227
227
  except Exception as e:
228
228
  raise RuntimeError(f"MLX runtime is not available after install: {e}") from e
229
229
 
230
+ def _mlx_sampler(temperature: float):
231
+ """Build an MLX sampler callable for the given temperature.
232
+
233
+ mlx_lm >= 0.20 removed the ``temp`` keyword from generate_step in favour of a
234
+ ``sampler`` callable, and mlx_vlm follows the same convention. Passing
235
+ ``temp=`` to generate/stream_generate now raises
236
+ ``generate_step() got an unexpected keyword argument 'temp'``. Both libraries
237
+ accept ``sampler=`` and share make_sampler from mlx_lm.sample_utils.
238
+ """
239
+ from mlx_lm.sample_utils import make_sampler
240
+ return make_sampler(temp=temperature)
241
+
230
242
  class LLMRouter:
231
243
  def __init__(self):
232
244
  self._cache: Dict[str, Tuple] = {}
@@ -514,10 +526,10 @@ class LLMRouter:
514
526
  is_gemma4 = "gemma-4" in self._current.lower() or "gemma4" in self._current.lower()
515
527
  if is_gemma4 and VLM_AVAILABLE:
516
528
  from mlx_vlm import generate as vlm_gen
517
- return vlm_gen(model, tokenizer, prompt=prompt, image=self._prep_image(image_data), max_tokens=max_tokens, temp=temperature, draft_model=draft_model, draft_kind="mtp")
529
+ return vlm_gen(model, tokenizer, prompt=prompt, image=self._prep_image(image_data), max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
518
530
  else:
519
531
  from mlx_lm import generate as lm_gen
520
- return lm_gen(model, tokenizer, prompt=prompt, max_tokens=max_tokens, temp=temperature, draft_model=draft_model)
532
+ return lm_gen(model, tokenizer, prompt=prompt, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model)
521
533
  result = await loop.run_in_executor(executor, _gen)
522
534
  # mlx-vlm might return a GenerationResult object; extract the text
523
535
  if hasattr(result, "text"):
@@ -571,10 +583,10 @@ class LLMRouter:
571
583
  is_gemma4 = "gemma-4" in self._current.lower() or "gemma4" in self._current.lower()
572
584
  if is_gemma4 and VLM_AVAILABLE:
573
585
  from mlx_vlm import stream_generate as vlm_stream
574
- gen = vlm_stream(model, tokenizer, prompt=prompt, image=self._prep_image(image_data), max_tokens=max_tokens, temp=temperature, draft_model=draft_model, draft_kind="mtp")
586
+ gen = vlm_stream(model, tokenizer, prompt=prompt, image=self._prep_image(image_data), max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
575
587
  else:
576
588
  from mlx_lm import stream_generate as lm_stream
577
- gen = lm_stream(model, tokenizer, prompt=prompt, max_tokens=max_tokens, temp=temperature, draft_model=draft_model)
589
+ gen = lm_stream(model, tokenizer, prompt=prompt, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model)
578
590
 
579
591
  for chunk in gen:
580
592
  text = chunk.text if hasattr(chunk, "text") else (chunk[0] if isinstance(chunk, tuple) else str(chunk))
@@ -666,10 +678,10 @@ class LLMRouter:
666
678
  is_gemma4 = "gemma-4" in self._current.lower() or "gemma4" in self._current.lower()
667
679
  if is_gemma4 and VLM_AVAILABLE:
668
680
  from mlx_vlm import generate as vlm_gen
669
- return vlm_gen(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, temp=temperature, draft_model=draft_model, draft_kind="mtp")
681
+ return vlm_gen(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
670
682
  else:
671
683
  from mlx_lm import generate as lm_gen
672
- return lm_gen(model, tokenizer, prompt=prompt, max_tokens=max_tokens, temp=temperature, draft_model=draft_model)
684
+ return lm_gen(model, tokenizer, prompt=prompt, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model)
673
685
  result = await loop.run_in_executor(executor, _gen)
674
686
  if hasattr(result, "text"):
675
687
  return normalize_branding(result.text)
@@ -733,10 +745,10 @@ class LLMRouter:
733
745
  is_gemma4 = "gemma-4" in self._current.lower() or "gemma4" in self._current.lower()
734
746
  if is_gemma4 and VLM_AVAILABLE:
735
747
  from mlx_vlm import stream_generate as vlm_stream
736
- gen = vlm_stream(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, temp=temperature, draft_model=draft_model, draft_kind="mtp")
748
+ gen = vlm_stream(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model, draft_kind="mtp")
737
749
  else:
738
750
  from mlx_lm import stream_generate as lm_stream
739
- gen = lm_stream(model, tokenizer, prompt=prompt, max_tokens=max_tokens, temp=temperature, draft_model=draft_model)
751
+ gen = lm_stream(model, tokenizer, prompt=prompt, max_tokens=max_tokens, sampler=_mlx_sampler(temperature), draft_model=draft_model)
740
752
  for chunk in gen:
741
753
  text = chunk.text if hasattr(chunk, "text") else (chunk[0] if isinstance(chunk, tuple) else str(chunk))
742
754
  loop.call_soon_threadsafe(queue.put_nowait, text)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ltcai",
3
- "version": "0.4.0",
3
+ "version": "0.5.1",
4
4
  "description": "Lattice AI local MLX/cloud LLM workspace server",
5
5
  "homepage": "https://github.com/TaeSooPark-PTS/LatticeAI#readme",
6
6
  "repository": {
package/server.py CHANGED
@@ -1121,7 +1121,7 @@ async def lifespan(app: FastAPI):
1121
1121
  except Exception:
1122
1122
  pass
1123
1123
 
1124
- app = FastAPI(title=f"Lattice AI Server ({APP_MODE})", version="0.4.0", lifespan=lifespan)
1124
+ app = FastAPI(title=f"Lattice AI Server ({APP_MODE})", version="0.5.1", lifespan=lifespan)
1125
1125
 
1126
1126
  CORS_ALLOWED_ORIGINS = [
1127
1127
  f"http://localhost:{DEFAULT_PORT}",
@@ -3466,7 +3466,7 @@ async def verify_cloud_models(force: bool = False, provider_filter: Optional[str
3466
3466
 
3467
3467
  @app.get("/health")
3468
3468
  async def health(request: Request):
3469
- base = {"status": "ok", "version": "0.4.0", "mode": APP_MODE}
3469
+ base = {"status": "ok", "version": "0.5.1", "mode": APP_MODE}
3470
3470
  if not get_current_user(request) and REQUIRE_AUTH:
3471
3471
  return base
3472
3472
  engines = await asyncio.to_thread(engine_status)