ltcai 2.2.7 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/README.md +72 -34
  2. package/docs/CHANGELOG.md +119 -0
  3. package/docs/V3_BACKEND_ARCHITECTURE.md +138 -0
  4. package/docs/V3_FRONTEND.md +139 -0
  5. package/knowledge_graph.py +649 -21
  6. package/latticeai/__init__.py +1 -1
  7. package/latticeai/api/admin.py +47 -0
  8. package/latticeai/api/agents.py +54 -31
  9. package/latticeai/api/auth.py +5 -2
  10. package/latticeai/api/chat.py +10 -2
  11. package/latticeai/api/search.py +240 -0
  12. package/latticeai/api/static_routes.py +11 -2
  13. package/latticeai/core/config.py +18 -0
  14. package/latticeai/core/embedding_providers.py +625 -0
  15. package/latticeai/core/local_embeddings.py +86 -0
  16. package/latticeai/core/workspace_os.py +1 -1
  17. package/latticeai/server_app.py +65 -1
  18. package/latticeai/services/agent_runtime.py +245 -0
  19. package/latticeai/services/search_service.py +346 -0
  20. package/package.json +13 -6
  21. package/scripts/build_v3_assets.mjs +164 -0
  22. package/scripts/capture/README.md +28 -0
  23. package/scripts/capture/capture_enterprise.js +8 -0
  24. package/scripts/capture/capture_graph.js +8 -0
  25. package/scripts/capture/capture_onboarding.js +8 -0
  26. package/scripts/capture/capture_page.js +43 -0
  27. package/scripts/capture/capture_release_media.js +125 -0
  28. package/scripts/capture/capture_skills.js +8 -0
  29. package/scripts/capture/capture_workspace.js +8 -0
  30. package/scripts/generate_diagrams.py +513 -0
  31. package/scripts/lint_v3.mjs +33 -0
  32. package/scripts/release-0.3.1.sh +105 -0
  33. package/scripts/take_screenshots.js +69 -0
  34. package/scripts/validate_release_artifacts.py +167 -0
  35. package/static/account.html +9 -9
  36. package/static/activity.html +4 -4
  37. package/static/admin.html +8 -8
  38. package/static/agents.html +4 -4
  39. package/static/chat.html +10 -10
  40. package/static/css/reference/account.css +137 -1
  41. package/static/css/reference/chat.css +31 -37
  42. package/static/css/responsive.css +42 -0
  43. package/static/css/tokens.5a595671.css +260 -0
  44. package/static/css/tokens.css +125 -130
  45. package/static/graph.html +9 -9
  46. package/static/manifest.json +3 -3
  47. package/static/plugins.html +4 -4
  48. package/static/scripts/account.js +4 -4
  49. package/static/scripts/chat.js +40 -8
  50. package/static/scripts/workspace.js +78 -0
  51. package/static/sw.js +3 -1
  52. package/static/v3/asset-manifest.json +47 -0
  53. package/static/v3/css/lattice.base.css +128 -0
  54. package/static/v3/css/lattice.base.e4cdd05d.css +128 -0
  55. package/static/v3/css/lattice.components.011e988b.css +447 -0
  56. package/static/v3/css/lattice.components.css +447 -0
  57. package/static/v3/css/lattice.shell.4920f42d.css +407 -0
  58. package/static/v3/css/lattice.shell.css +407 -0
  59. package/static/v3/css/lattice.tokens.c597ff81.css +132 -0
  60. package/static/v3/css/lattice.tokens.css +132 -0
  61. package/static/v3/css/lattice.views.3ee19d4e.css +277 -0
  62. package/static/v3/css/lattice.views.css +277 -0
  63. package/static/v3/index.html +69 -0
  64. package/static/v3/js/app.46fb61d9.js +26 -0
  65. package/static/v3/js/app.js +26 -0
  66. package/static/v3/js/core/api.22a41d42.js +344 -0
  67. package/static/v3/js/core/api.js +344 -0
  68. package/static/v3/js/core/components.4c83e0a9.js +222 -0
  69. package/static/v3/js/core/components.js +222 -0
  70. package/static/v3/js/core/dom.a2773eb0.js +148 -0
  71. package/static/v3/js/core/dom.js +148 -0
  72. package/static/v3/js/core/router.584570f2.js +37 -0
  73. package/static/v3/js/core/router.js +37 -0
  74. package/static/v3/js/core/routes.f935dd50.js +78 -0
  75. package/static/v3/js/core/routes.js +78 -0
  76. package/static/v3/js/core/shell.1b6199d6.js +363 -0
  77. package/static/v3/js/core/shell.js +363 -0
  78. package/static/v3/js/core/store.34ebd5e6.js +113 -0
  79. package/static/v3/js/core/store.js +113 -0
  80. package/static/v3/js/views/admin-audit.660a1fb1.js +185 -0
  81. package/static/v3/js/views/admin-audit.js +185 -0
  82. package/static/v3/js/views/admin-permissions.a7ae5f09.js +177 -0
  83. package/static/v3/js/views/admin-permissions.js +177 -0
  84. package/static/v3/js/views/admin-policies.3658fd86.js +102 -0
  85. package/static/v3/js/views/admin-policies.js +102 -0
  86. package/static/v3/js/views/admin-private-vpc.7d342d36.js +135 -0
  87. package/static/v3/js/views/admin-private-vpc.js +135 -0
  88. package/static/v3/js/views/admin-security.07c66b72.js +180 -0
  89. package/static/v3/js/views/admin-security.js +180 -0
  90. package/static/v3/js/views/admin-users.03bac88c.js +168 -0
  91. package/static/v3/js/views/admin-users.js +168 -0
  92. package/static/v3/js/views/agents.14e48bdd.js +193 -0
  93. package/static/v3/js/views/agents.js +193 -0
  94. package/static/v3/js/views/chat.718144ce.js +449 -0
  95. package/static/v3/js/views/chat.js +449 -0
  96. package/static/v3/js/views/files.4935197e.js +186 -0
  97. package/static/v3/js/views/files.js +186 -0
  98. package/static/v3/js/views/home.cdde3b32.js +119 -0
  99. package/static/v3/js/views/home.js +119 -0
  100. package/static/v3/js/views/hybrid-search.b22b97e0.js +195 -0
  101. package/static/v3/js/views/hybrid-search.js +195 -0
  102. package/static/v3/js/views/knowledge-graph.a14ea7e7.js +237 -0
  103. package/static/v3/js/views/knowledge-graph.js +237 -0
  104. package/static/v3/js/views/models.a1ffa147.js +256 -0
  105. package/static/v3/js/views/models.js +256 -0
  106. package/static/v3/js/views/my-computer.1b2ff621.js +237 -0
  107. package/static/v3/js/views/my-computer.js +237 -0
  108. package/static/v3/js/views/pipeline.c522f1ce.js +157 -0
  109. package/static/v3/js/views/pipeline.js +157 -0
  110. package/static/v3/js/views/settings.4f777210.js +250 -0
  111. package/static/v3/js/views/settings.js +250 -0
  112. package/static/workflows.html +4 -4
  113. package/static/workspace.css +340 -2
  114. package/static/workspace.html +43 -24
  115. package/docs/images/tmp_frames/frame_00.png +0 -0
  116. package/docs/images/tmp_frames/frame_01.png +0 -0
  117. package/docs/images/tmp_frames/frame_02.png +0 -0
  118. package/docs/images/tmp_frames/frame_03.png +0 -0
  119. package/docs/images/tmp_frames/hero_00.png +0 -0
  120. package/docs/images/tmp_frames/hero_01.png +0 -0
  121. package/docs/images/tmp_frames/hero_02.png +0 -0
  122. package/docs/images/tmp_frames/hero_03.png +0 -0
@@ -0,0 +1,625 @@
1
+ """Provider-backed embeddings for Lattice AI retrieval.
2
+
3
+ The knowledge graph stores dense vectors keyed by ``(embedding_model,
4
+ embedding_dim)`` and only ever compares vectors that share those keys
5
+ (``knowledge_graph.vector_search``). That contract means the *embedder* can be
6
+ swapped behind a single interface as long as every implementation agrees on:
7
+
8
+ * ``model_id`` / ``dim`` — the index identity (a change forces a re-index, which
9
+ ``index_status`` already reports as ``stale``/``needs_reindex``);
10
+ * ``encode`` / ``decode`` — the on-disk float32 codec (shared by all providers);
11
+ * ``embed`` returns an **L2-normalized** vector, so ``similarity`` is a plain dot
12
+ product and equals cosine similarity regardless of provider.
13
+
14
+ This module defines that :class:`EmbeddingProvider` interface and five concrete
15
+ implementations:
16
+
17
+ 1. :class:`HashEmbeddingProvider` — deterministic, offline, always-available
18
+ fallback (wraps the legacy :class:`~latticeai.core.local_embeddings.LocalEmbeddingModel`).
19
+ 2. :class:`MLXEmbeddingProvider` — local Apple-Silicon embedding models.
20
+ 3. :class:`OllamaEmbeddingProvider` — a local/remote Ollama server.
21
+ 4. :class:`OpenAICompatibleEmbeddingProvider` — any ``/v1/embeddings`` endpoint
22
+ (OpenAI, LM Studio, vLLM, llama.cpp, Together, …).
23
+ 5. :class:`CustomEmbeddingProvider` — a user-supplied dotted callable.
24
+
25
+ :func:`resolve_embedder` builds the configured provider and, when that provider
26
+ is unavailable, degrades to the hash fallback while *reporting* the requested
27
+ vs. active provider — nothing is silently faked.
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import importlib
33
+ import math
34
+ import os
35
+ import struct
36
+ from dataclasses import dataclass, field
37
+ from typing import Any, Dict, Iterable, List, Optional, Sequence
38
+
39
+ from latticeai.core.local_embeddings import DEFAULT_EMBEDDING_DIM, LocalEmbeddingModel
40
+
41
+
42
+ class EmbeddingUnavailable(RuntimeError):
43
+ """Raised when a configured provider cannot produce an embedding.
44
+
45
+ Callers in the hot path (``vector_search``) translate this into a clear
46
+ 503/"provider unavailable" rather than a misleading empty result.
47
+ """
48
+
49
+
50
+ # Best-known output dimensionality for common embedding models, so the index
51
+ # identity is stable before the first (possibly remote) call. A configured
52
+ # ``dim`` always wins; an unknown model falls back to a one-time live probe.
53
+ _KNOWN_DIMS = {
54
+ "bge-m3": 1024,
55
+ "nomic-embed-text": 768,
56
+ "mxbai-embed-large": 1024,
57
+ "all-minilm": 384,
58
+ "all-minilm-l6-v2": 384,
59
+ "bge-small-en": 384,
60
+ "bge-base-en": 768,
61
+ "bge-large-en": 1024,
62
+ "gte-small": 384,
63
+ "gte-base": 768,
64
+ "gte-large": 1024,
65
+ "e5-large": 1024,
66
+ "multilingual-e5-large": 1024,
67
+ "text-embedding-3-small": 1536,
68
+ "text-embedding-3-large": 3072,
69
+ "text-embedding-ada-002": 1536,
70
+ }
71
+
72
+
73
+ PRODUCTION_PROVIDER_PROFILES: Dict[str, Dict[str, Any]] = {
74
+ "local:bge-m3": {
75
+ "id": "local:bge-m3",
76
+ "provider": "mlx",
77
+ "model": "bge-m3",
78
+ "dimensions": 1024,
79
+ "grade": "production",
80
+ "family": "local",
81
+ "label": "BGE-M3 local",
82
+ "detail": "Multilingual semantic embeddings for local retrieval.",
83
+ },
84
+ "local:nomic-embed-text": {
85
+ "id": "local:nomic-embed-text",
86
+ "provider": "ollama",
87
+ "model": "nomic-embed-text",
88
+ "dimensions": 768,
89
+ "grade": "production",
90
+ "family": "local",
91
+ "label": "Nomic Embed Text local",
92
+ "detail": "General-purpose local semantic embeddings.",
93
+ },
94
+ "local:e5-large": {
95
+ "id": "local:e5-large",
96
+ "provider": "mlx",
97
+ "model": "e5-large",
98
+ "dimensions": 1024,
99
+ "grade": "production",
100
+ "family": "local",
101
+ "label": "E5 Large local",
102
+ "detail": "High-recall local retrieval profile.",
103
+ },
104
+ "local:gte-large": {
105
+ "id": "local:gte-large",
106
+ "provider": "mlx",
107
+ "model": "gte-large",
108
+ "dimensions": 1024,
109
+ "grade": "production",
110
+ "family": "local",
111
+ "label": "GTE Large local",
112
+ "detail": "Large local semantic embedding profile.",
113
+ },
114
+ "ollama:nomic-embed-text": {
115
+ "id": "ollama:nomic-embed-text",
116
+ "provider": "ollama",
117
+ "model": "nomic-embed-text",
118
+ "dimensions": 768,
119
+ "grade": "production",
120
+ "family": "ollama",
121
+ "label": "Ollama Nomic Embed Text",
122
+ "detail": "Production semantic embeddings through Ollama.",
123
+ },
124
+ "ollama:mxbai-embed-large": {
125
+ "id": "ollama:mxbai-embed-large",
126
+ "provider": "ollama",
127
+ "model": "mxbai-embed-large",
128
+ "dimensions": 1024,
129
+ "grade": "production",
130
+ "family": "ollama",
131
+ "label": "Ollama MXBAI Embed Large",
132
+ "detail": "High-quality local semantic embeddings through Ollama.",
133
+ },
134
+ "ollama:bge-m3": {
135
+ "id": "ollama:bge-m3",
136
+ "provider": "ollama",
137
+ "model": "bge-m3",
138
+ "dimensions": 1024,
139
+ "grade": "production",
140
+ "family": "ollama",
141
+ "label": "Ollama BGE-M3-compatible",
142
+ "detail": "BGE-M3-compatible providers exposed through Ollama.",
143
+ },
144
+ "mlx:bge-m3": {
145
+ "id": "mlx:bge-m3",
146
+ "provider": "mlx",
147
+ "model": "bge-m3",
148
+ "dimensions": 1024,
149
+ "grade": "production",
150
+ "family": "mlx",
151
+ "label": "MLX BGE-M3",
152
+ "detail": "Apple Silicon optimized local embeddings.",
153
+ },
154
+ "openai:text-embedding-3-small": {
155
+ "id": "openai:text-embedding-3-small",
156
+ "provider": "openai",
157
+ "model": "text-embedding-3-small",
158
+ "dimensions": 1536,
159
+ "grade": "production",
160
+ "family": "openai-compatible",
161
+ "label": "OpenAI-compatible small",
162
+ "detail": "OpenAI-compatible /v1/embeddings endpoint.",
163
+ },
164
+ "openai:text-embedding-3-large": {
165
+ "id": "openai:text-embedding-3-large",
166
+ "provider": "openai",
167
+ "model": "text-embedding-3-large",
168
+ "dimensions": 3072,
169
+ "grade": "production",
170
+ "family": "openai-compatible",
171
+ "label": "OpenAI-compatible large",
172
+ "detail": "Highest-dimensional OpenAI-compatible embedding profile.",
173
+ },
174
+ }
175
+
176
+
177
+ def embedding_provider_profiles() -> List[Dict[str, Any]]:
178
+ return [dict(PRODUCTION_PROVIDER_PROFILES[key]) for key in sorted(PRODUCTION_PROVIDER_PROFILES)]
179
+
180
+
181
+ def resolve_embedding_profile(profile: str) -> Dict[str, Any]:
182
+ if not profile:
183
+ return {}
184
+ key = str(profile).strip().lower()
185
+ if key in PRODUCTION_PROVIDER_PROFILES:
186
+ return dict(PRODUCTION_PROVIDER_PROFILES[key])
187
+ raise ValueError(f"unknown embedding profile: {profile!r}")
188
+
189
+
190
+ def _guess_dim(model: str, default: int) -> int:
191
+ key = str(model or "").split("/")[-1].strip().lower()
192
+ key = key.split(":")[0]
193
+ return _KNOWN_DIMS.get(key, default)
194
+
195
+
196
+ def _l2_normalize(vector: Sequence[float]) -> List[float]:
197
+ norm = math.sqrt(sum(float(v) * float(v) for v in vector))
198
+ if norm <= 0:
199
+ return [float(v) for v in vector]
200
+ return [float(v) / norm for v in vector]
201
+
202
+
203
+ class EmbeddingProvider:
204
+ """Interface every embedder implements.
205
+
206
+ Subclasses must set ``model_id`` and ``dim`` and implement
207
+ :meth:`embed_batch`; the rest (single embed, codec, similarity) is shared.
208
+ """
209
+
210
+ #: stable identity stored alongside every vector — change ⇒ re-index
211
+ model_id: str = ""
212
+ #: vector dimensionality
213
+ dim: int = DEFAULT_EMBEDDING_DIM
214
+ #: short provider kind ("hash" | "mlx" | "ollama" | "openai" | "custom")
215
+ provider: str = "hash"
216
+ #: "fallback" (hash) | "production" (real semantic model)
217
+ grade: str = "production"
218
+
219
+ # ── required ──────────────────────────────────────────────────────────
220
+ def embed_batch(self, texts: Sequence[str]) -> List[List[float]]:
221
+ raise NotImplementedError
222
+
223
+ # ── derived (shared) ──────────────────────────────────────────────────
224
+ def embed(self, text: str) -> List[float]:
225
+ result = self.embed_batch([text])
226
+ return result[0] if result else [0.0] * self.dim
227
+
228
+ def encode(self, vector: Iterable[float]) -> bytes:
229
+ values = [float(v) for v in vector]
230
+ return struct.pack(f"<{len(values)}f", *values)
231
+
232
+ def decode(self, payload: bytes, dim: Optional[int] = None) -> List[float]:
233
+ if not payload:
234
+ return []
235
+ count = int(dim or self.dim)
236
+ if len(payload) != count * 4:
237
+ count = len(payload) // 4
238
+ return list(struct.unpack(f"<{count}f", payload[: count * 4]))
239
+
240
+ def similarity(self, left: Iterable[float], right: Iterable[float]) -> float:
241
+ return float(sum(a * b for a, b in zip(left, right)))
242
+
243
+ # ── observability ─────────────────────────────────────────────────────
244
+ def health(self) -> Dict[str, Any]:
245
+ """Return ``{status, detail}``; status ∈ ok | unavailable."""
246
+ return {"status": "ok", "detail": "ready"}
247
+
248
+ def metadata(self) -> Dict[str, Any]:
249
+ return {
250
+ "provider": self.provider,
251
+ "model": self.model_id,
252
+ "model_id": self.model_id,
253
+ "dim": self.dim,
254
+ "grade": self.grade,
255
+ }
256
+
257
+
258
+ # ── 1. Hash (offline fallback) ────────────────────────────────────────────────
259
+ class HashEmbeddingProvider(EmbeddingProvider):
260
+ """Deterministic feature-hashing embedder — no network, always available."""
261
+
262
+ provider = "hash"
263
+ grade = "fallback"
264
+
265
+ def __init__(self, dim: int = DEFAULT_EMBEDDING_DIM):
266
+ self._model = LocalEmbeddingModel(dim=dim)
267
+ self.dim = self._model.dim
268
+ self.model_id = self._model.model_id
269
+
270
+ def embed(self, text: str) -> List[float]:
271
+ return self._model.embed(text) # already L2-normalized
272
+
273
+ def embed_batch(self, texts: Sequence[str]) -> List[List[float]]:
274
+ return [self._model.embed(t) for t in texts]
275
+
276
+ def health(self) -> Dict[str, Any]:
277
+ return {"status": "ok", "detail": "deterministic local fallback"}
278
+
279
+
280
+ # ── shared base for remote/model-backed providers ─────────────────────────────
281
+ @dataclass
282
+ class _RemoteConfig:
283
+ model: str
284
+ base_url: str = ""
285
+ api_key: str = ""
286
+ dim: int = DEFAULT_EMBEDDING_DIM
287
+ timeout: float = 30.0
288
+ extra: Dict[str, Any] = field(default_factory=dict)
289
+
290
+
291
+ class _NetworkEmbeddingProvider(EmbeddingProvider):
292
+ """Common machinery for providers that call a model/server to embed."""
293
+
294
+ def __init__(self, cfg: _RemoteConfig):
295
+ self._cfg = cfg
296
+ self.dim = int(cfg.dim or DEFAULT_EMBEDDING_DIM)
297
+
298
+ # subclasses implement the raw call
299
+ def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
300
+ raise NotImplementedError
301
+
302
+ def embed_batch(self, texts: Sequence[str]) -> List[List[float]]:
303
+ clean = [str(t or "")[:50_000] for t in texts]
304
+ if not clean:
305
+ return []
306
+ vectors = self._embed_raw(clean)
307
+ out: List[List[float]] = []
308
+ for vec in vectors:
309
+ vec = [float(x) for x in (vec or [])]
310
+ if vec:
311
+ # lock the index identity to the true model dimensionality
312
+ self.dim = len(vec)
313
+ out.append(_l2_normalize(vec) if vec else [0.0] * self.dim)
314
+ return out
315
+
316
+
317
+ # ── 2. MLX (local Apple-Silicon model) ────────────────────────────────────────
318
+ class MLXEmbeddingProvider(_NetworkEmbeddingProvider):
319
+ provider = "mlx"
320
+
321
+ def __init__(self, cfg: _RemoteConfig):
322
+ super().__init__(cfg)
323
+ self.model_id = f"mlx:{cfg.model}:{self.dim}"
324
+ self._encoder = None
325
+
326
+ def _load(self):
327
+ if self._encoder is not None:
328
+ return self._encoder
329
+ try: # optional dependency; only imported when this provider is used
330
+ from mlx_embeddings.utils import load as mlx_load # type: ignore
331
+
332
+ model, tokenizer = mlx_load(self._cfg.model)
333
+ self._encoder = ("mlx_embeddings", model, tokenizer)
334
+ return self._encoder
335
+ except Exception as exc: # pragma: no cover - environment dependent
336
+ raise EmbeddingUnavailable(f"MLX embedding model unavailable: {exc}") from exc
337
+
338
+ def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
339
+ kind, model, tokenizer = self._load()
340
+ try:
341
+ import mlx.core as mx # type: ignore
342
+
343
+ out: List[List[float]] = []
344
+ for text in texts:
345
+ ids = tokenizer.encode(text)
346
+ tokens = mx.array([ids])
347
+ result = model(tokens)
348
+ pooled = result[0] if isinstance(result, (tuple, list)) else result
349
+ vec = mx.mean(pooled, axis=1)[0] if pooled.ndim == 3 else pooled[0]
350
+ out.append([float(x) for x in vec.tolist()])
351
+ return out
352
+ except EmbeddingUnavailable:
353
+ raise
354
+ except Exception as exc: # pragma: no cover - environment dependent
355
+ raise EmbeddingUnavailable(f"MLX embedding failed: {exc}") from exc
356
+
357
+ def health(self) -> Dict[str, Any]:
358
+ try:
359
+ self._load()
360
+ return {"status": "ok", "detail": f"MLX model {self._cfg.model} loaded"}
361
+ except Exception as exc:
362
+ return {"status": "unavailable", "detail": str(exc)}
363
+
364
+
365
+ # ── 3. Ollama ─────────────────────────────────────────────────────────────────
366
+ class OllamaEmbeddingProvider(_NetworkEmbeddingProvider):
367
+ provider = "ollama"
368
+
369
+ def __init__(self, cfg: _RemoteConfig):
370
+ super().__init__(cfg)
371
+ self._base = (cfg.base_url or "http://127.0.0.1:11434").rstrip("/")
372
+ if not cfg.dim:
373
+ self.dim = _guess_dim(cfg.model, DEFAULT_EMBEDDING_DIM)
374
+ self.model_id = f"ollama:{cfg.model}:{self.dim}"
375
+
376
+ def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
377
+ out: List[List[float]] = []
378
+ try:
379
+ import httpx
380
+
381
+ with httpx.Client(timeout=self._cfg.timeout) as client:
382
+ # /api/embed supports batching; fall back to /api/embeddings.
383
+ resp = client.post(
384
+ f"{self._base}/api/embed",
385
+ json={"model": self._cfg.model, "input": list(texts)},
386
+ )
387
+ if resp.status_code == 404:
388
+ for text in texts:
389
+ r = client.post(
390
+ f"{self._base}/api/embeddings",
391
+ json={"model": self._cfg.model, "prompt": text},
392
+ )
393
+ r.raise_for_status()
394
+ out.append(r.json().get("embedding") or [])
395
+ return out
396
+ resp.raise_for_status()
397
+ data = resp.json()
398
+ return data.get("embeddings") or [data.get("embedding") or []]
399
+ except Exception as exc:
400
+ raise EmbeddingUnavailable(f"Ollama embedding failed: {exc}") from exc
401
+
402
+ def health(self) -> Dict[str, Any]:
403
+ try:
404
+ import httpx
405
+
406
+ with httpx.Client(timeout=min(self._cfg.timeout, 5.0)) as client:
407
+ r = client.get(f"{self._base}/api/tags")
408
+ r.raise_for_status()
409
+ return {"status": "ok", "detail": f"Ollama reachable at {self._base}"}
410
+ except Exception as exc:
411
+ return {"status": "unavailable", "detail": f"Ollama unreachable: {exc}"}
412
+
413
+
414
+ # ── 4. OpenAI-compatible (/v1/embeddings) ─────────────────────────────────────
415
+ class OpenAICompatibleEmbeddingProvider(_NetworkEmbeddingProvider):
416
+ provider = "openai"
417
+
418
+ def __init__(self, cfg: _RemoteConfig):
419
+ super().__init__(cfg)
420
+ self._base = (cfg.base_url or "https://api.openai.com/v1").rstrip("/")
421
+ if not cfg.dim:
422
+ self.dim = _guess_dim(cfg.model, DEFAULT_EMBEDDING_DIM)
423
+ self.model_id = f"openai:{cfg.model}:{self.dim}"
424
+
425
+ def _headers(self) -> Dict[str, str]:
426
+ headers = {"Content-Type": "application/json"}
427
+ if self._cfg.api_key:
428
+ headers["Authorization"] = f"Bearer {self._cfg.api_key}"
429
+ return headers
430
+
431
+ def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
432
+ try:
433
+ import httpx
434
+
435
+ with httpx.Client(timeout=self._cfg.timeout) as client:
436
+ r = client.post(
437
+ f"{self._base}/embeddings",
438
+ headers=self._headers(),
439
+ json={"model": self._cfg.model, "input": list(texts)},
440
+ )
441
+ r.raise_for_status()
442
+ rows = sorted(r.json().get("data", []), key=lambda d: d.get("index", 0))
443
+ return [row.get("embedding") or [] for row in rows]
444
+ except Exception as exc:
445
+ raise EmbeddingUnavailable(f"OpenAI-compatible embedding failed: {exc}") from exc
446
+
447
+ def health(self) -> Dict[str, Any]:
448
+ try:
449
+ self._embed_raw(["ping"])
450
+ return {"status": "ok", "detail": f"{self._base} reachable"}
451
+ except Exception as exc:
452
+ return {"status": "unavailable", "detail": str(exc)}
453
+
454
+
455
+ # ── 5. Custom (user-supplied callable) ────────────────────────────────────────
456
+ class CustomEmbeddingProvider(_NetworkEmbeddingProvider):
457
+ """Loads a dotted ``module:callable`` (or ``module.callable``).
458
+
459
+ The callable receives ``List[str]`` and returns ``List[List[float]]``.
460
+ Configured via ``LATTICEAI_EMBEDDING_CUSTOM_TARGET``.
461
+ """
462
+
463
+ provider = "custom"
464
+
465
+ def __init__(self, cfg: _RemoteConfig):
466
+ super().__init__(cfg)
467
+ self._target_ref = str(cfg.extra.get("target") or os.getenv("LATTICEAI_EMBEDDING_CUSTOM_TARGET", ""))
468
+ self.model_id = f"custom:{cfg.model or self._target_ref or 'callable'}:{self.dim}"
469
+ self._fn = None
470
+
471
+ def _load(self):
472
+ if self._fn is not None:
473
+ return self._fn
474
+ ref = self._target_ref
475
+ if not ref:
476
+ raise EmbeddingUnavailable("custom embedding target not configured (LATTICEAI_EMBEDDING_CUSTOM_TARGET)")
477
+ module_name, _, attr = ref.replace(":", ".").rpartition(".")
478
+ if not module_name:
479
+ raise EmbeddingUnavailable(f"invalid custom embedding target: {ref}")
480
+ try:
481
+ module = importlib.import_module(module_name)
482
+ self._fn = getattr(module, attr)
483
+ return self._fn
484
+ except Exception as exc:
485
+ raise EmbeddingUnavailable(f"custom embedding target unavailable: {exc}") from exc
486
+
487
+ def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
488
+ fn = self._load()
489
+ try:
490
+ return list(fn(list(texts)))
491
+ except Exception as exc:
492
+ raise EmbeddingUnavailable(f"custom embedding failed: {exc}") from exc
493
+
494
+ def health(self) -> Dict[str, Any]:
495
+ try:
496
+ self._load()
497
+ return {"status": "ok", "detail": f"custom target {self._target_ref} loaded"}
498
+ except Exception as exc:
499
+ return {"status": "unavailable", "detail": str(exc)}
500
+
501
+
502
+ # ── factory + resolution ──────────────────────────────────────────────────────
503
+ PROVIDER_TYPES = ("hash", "mlx", "ollama", "openai", "custom")
504
+
505
+
506
+ def build_embedding_provider(
507
+ provider: str,
508
+ *,
509
+ model: str = "",
510
+ base_url: str = "",
511
+ api_key: str = "",
512
+ dim: int = 0,
513
+ timeout: float = 30.0,
514
+ extra: Optional[Dict[str, Any]] = None,
515
+ ) -> EmbeddingProvider:
516
+ """Construct a provider by name. Never makes a network call."""
517
+ kind = str(provider or "hash").strip().lower()
518
+ if kind in {"", "hash", "local", "fallback"}:
519
+ return HashEmbeddingProvider(dim=int(dim or DEFAULT_EMBEDDING_DIM))
520
+ cfg = _RemoteConfig(
521
+ model=model,
522
+ base_url=base_url,
523
+ api_key=api_key,
524
+ dim=int(dim or 0),
525
+ timeout=float(timeout or 30.0),
526
+ extra=dict(extra or {}),
527
+ )
528
+ if kind == "mlx":
529
+ return MLXEmbeddingProvider(cfg)
530
+ if kind == "ollama":
531
+ return OllamaEmbeddingProvider(cfg)
532
+ if kind in {"openai", "openai-compatible", "openai_compatible"}:
533
+ return OpenAICompatibleEmbeddingProvider(cfg)
534
+ if kind == "custom":
535
+ return CustomEmbeddingProvider(cfg)
536
+ raise ValueError(f"unknown embedding provider: {provider!r} (expected one of {PROVIDER_TYPES})")
537
+
538
+
539
+ @dataclass
540
+ class ResolvedEmbedder:
541
+ provider: EmbeddingProvider
542
+ requested: str
543
+ active: str
544
+ fell_back: bool
545
+ health: Dict[str, Any]
546
+ detail: str = ""
547
+
548
+ def as_dict(self) -> Dict[str, Any]:
549
+ return {
550
+ "requested_provider": self.requested,
551
+ "active_provider": self.active,
552
+ "fell_back": self.fell_back,
553
+ "health": self.health,
554
+ "detail": self.detail,
555
+ **self.provider.metadata(),
556
+ }
557
+
558
+
559
+ def resolve_embedder(
560
+ provider: str = "",
561
+ *,
562
+ model: str = "",
563
+ base_url: str = "",
564
+ api_key: str = "",
565
+ dim: int = 0,
566
+ timeout: float = 30.0,
567
+ extra: Optional[Dict[str, Any]] = None,
568
+ probe: bool = True,
569
+ ) -> ResolvedEmbedder:
570
+ """Build the requested provider, degrading to hash if it is unavailable.
571
+
572
+ Local-first guarantee: the app always gets a working embedder. When the
573
+ requested provider is unreachable we return the hash fallback but record
574
+ ``fell_back=True`` and the failing health detail so the UI shows it as
575
+ *Unavailable* — the system never pretends a down provider is live.
576
+ """
577
+ requested = str(provider or "hash").strip().lower() or "hash"
578
+ if requested in {"hash", "local", "fallback", ""}:
579
+ prov = HashEmbeddingProvider(dim=int(dim or DEFAULT_EMBEDDING_DIM))
580
+ return ResolvedEmbedder(prov, "hash", "hash", False, prov.health(), "deterministic local fallback")
581
+
582
+ try:
583
+ prov = build_embedding_provider(
584
+ requested, model=model, base_url=base_url, api_key=api_key, dim=dim, timeout=timeout, extra=extra
585
+ )
586
+ except Exception as exc:
587
+ fallback = HashEmbeddingProvider(dim=int(dim or DEFAULT_EMBEDDING_DIM))
588
+ return ResolvedEmbedder(
589
+ fallback, requested, "hash", True,
590
+ {"status": "unavailable", "detail": str(exc)},
591
+ f"could not construct {requested}; using hash fallback",
592
+ )
593
+
594
+ if probe:
595
+ try:
596
+ health = prov.health()
597
+ except Exception as exc: # provider health must never crash startup
598
+ health = {"status": "unavailable", "detail": str(exc)}
599
+ else:
600
+ health = {"status": "unknown", "detail": "not probed"}
601
+ if probe and health.get("status") != "ok":
602
+ fallback = HashEmbeddingProvider(dim=int(dim or DEFAULT_EMBEDDING_DIM))
603
+ return ResolvedEmbedder(
604
+ fallback, requested, "hash", True, health,
605
+ f"{requested} unavailable ({health.get('detail', '')}); using hash fallback",
606
+ )
607
+ return ResolvedEmbedder(prov, requested, prov.provider, False, health, "")
608
+
609
+
610
+ __all__ = [
611
+ "EmbeddingProvider",
612
+ "EmbeddingUnavailable",
613
+ "HashEmbeddingProvider",
614
+ "MLXEmbeddingProvider",
615
+ "OllamaEmbeddingProvider",
616
+ "OpenAICompatibleEmbeddingProvider",
617
+ "CustomEmbeddingProvider",
618
+ "ResolvedEmbedder",
619
+ "build_embedding_provider",
620
+ "resolve_embedder",
621
+ "resolve_embedding_profile",
622
+ "embedding_provider_profiles",
623
+ "PRODUCTION_PROVIDER_PROFILES",
624
+ "PROVIDER_TYPES",
625
+ ]