ltcai 2.2.7 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +63 -32
  2. package/docs/CHANGELOG.md +82 -0
  3. package/docs/V3_BACKEND_ARCHITECTURE.md +138 -0
  4. package/docs/V3_FRONTEND.md +136 -0
  5. package/knowledge_graph.py +649 -21
  6. package/latticeai/__init__.py +1 -1
  7. package/latticeai/api/admin.py +47 -0
  8. package/latticeai/api/agents.py +54 -31
  9. package/latticeai/api/auth.py +1 -1
  10. package/latticeai/api/chat.py +10 -2
  11. package/latticeai/api/search.py +236 -0
  12. package/latticeai/api/static_routes.py +11 -2
  13. package/latticeai/core/config.py +16 -0
  14. package/latticeai/core/embedding_providers.py +502 -0
  15. package/latticeai/core/local_embeddings.py +86 -0
  16. package/latticeai/core/workspace_os.py +1 -1
  17. package/latticeai/server_app.py +49 -1
  18. package/latticeai/services/agent_runtime.py +245 -0
  19. package/latticeai/services/search_service.py +346 -0
  20. package/package.json +6 -4
  21. package/static/account.html +9 -9
  22. package/static/activity.html +4 -4
  23. package/static/admin.html +8 -8
  24. package/static/agents.html +4 -4
  25. package/static/chat.html +10 -10
  26. package/static/css/reference/account.css +137 -1
  27. package/static/css/reference/chat.css +31 -37
  28. package/static/css/responsive.css +42 -0
  29. package/static/css/tokens.css +125 -130
  30. package/static/graph.html +9 -9
  31. package/static/manifest.json +3 -3
  32. package/static/plugins.html +4 -4
  33. package/static/scripts/account.js +4 -4
  34. package/static/scripts/chat.js +40 -8
  35. package/static/scripts/workspace.js +78 -0
  36. package/static/v3/css/lattice.base.css +128 -0
  37. package/static/v3/css/lattice.components.css +447 -0
  38. package/static/v3/css/lattice.shell.css +407 -0
  39. package/static/v3/css/lattice.tokens.css +132 -0
  40. package/static/v3/css/lattice.views.css +277 -0
  41. package/static/v3/index.html +40 -0
  42. package/static/v3/js/app.js +26 -0
  43. package/static/v3/js/core/api.js +327 -0
  44. package/static/v3/js/core/components.js +215 -0
  45. package/static/v3/js/core/dom.js +148 -0
  46. package/static/v3/js/core/fixtures.js +171 -0
  47. package/static/v3/js/core/router.js +37 -0
  48. package/static/v3/js/core/routes.js +73 -0
  49. package/static/v3/js/core/shell.js +363 -0
  50. package/static/v3/js/core/store.js +113 -0
  51. package/static/v3/js/views/admin-audit.js +185 -0
  52. package/static/v3/js/views/admin-permissions.js +178 -0
  53. package/static/v3/js/views/admin-policies.js +103 -0
  54. package/static/v3/js/views/admin-private-vpc.js +138 -0
  55. package/static/v3/js/views/admin-security.js +181 -0
  56. package/static/v3/js/views/admin-users.js +168 -0
  57. package/static/v3/js/views/agents.js +194 -0
  58. package/static/v3/js/views/chat.js +450 -0
  59. package/static/v3/js/views/files.js +180 -0
  60. package/static/v3/js/views/home.js +119 -0
  61. package/static/v3/js/views/hybrid-search.js +195 -0
  62. package/static/v3/js/views/knowledge-graph.js +238 -0
  63. package/static/v3/js/views/models.js +247 -0
  64. package/static/v3/js/views/my-computer.js +237 -0
  65. package/static/v3/js/views/pipeline.js +161 -0
  66. package/static/v3/js/views/settings.js +258 -0
  67. package/static/workflows.html +4 -4
  68. package/static/workspace.css +340 -2
  69. package/static/workspace.html +43 -24
@@ -0,0 +1,502 @@
1
+ """Provider-backed embeddings for Lattice AI retrieval.
2
+
3
+ The knowledge graph stores dense vectors keyed by ``(embedding_model,
4
+ embedding_dim)`` and only ever compares vectors that share those keys
5
+ (``knowledge_graph.vector_search``). That contract means the *embedder* can be
6
+ swapped behind a single interface as long as every implementation agrees on:
7
+
8
+ * ``model_id`` / ``dim`` — the index identity (a change forces a re-index, which
9
+ ``index_status`` already reports as ``stale``/``needs_reindex``);
10
+ * ``encode`` / ``decode`` — the on-disk float32 codec (shared by all providers);
11
+ * ``embed`` returns an **L2-normalized** vector, so ``similarity`` is a plain dot
12
+ product and equals cosine similarity regardless of provider.
13
+
14
+ This module defines that :class:`EmbeddingProvider` interface and five concrete
15
+ implementations:
16
+
17
+ 1. :class:`HashEmbeddingProvider` — deterministic, offline, always-available
18
+ fallback (wraps the legacy :class:`~latticeai.core.local_embeddings.LocalEmbeddingModel`).
19
+ 2. :class:`MLXEmbeddingProvider` — local Apple-Silicon embedding models.
20
+ 3. :class:`OllamaEmbeddingProvider` — a local/remote Ollama server.
21
+ 4. :class:`OpenAICompatibleEmbeddingProvider` — any ``/v1/embeddings`` endpoint
22
+ (OpenAI, LM Studio, vLLM, llama.cpp, Together, …).
23
+ 5. :class:`CustomEmbeddingProvider` — a user-supplied dotted callable.
24
+
25
+ :func:`resolve_embedder` builds the configured provider and, when that provider
26
+ is unavailable, degrades to the hash fallback while *reporting* the requested
27
+ vs. active provider — nothing is silently faked.
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import importlib
33
+ import math
34
+ import os
35
+ import struct
36
+ from dataclasses import dataclass, field
37
+ from typing import Any, Dict, Iterable, List, Optional, Sequence
38
+
39
+ from latticeai.core.local_embeddings import DEFAULT_EMBEDDING_DIM, LocalEmbeddingModel
40
+
41
+
42
+ class EmbeddingUnavailable(RuntimeError):
43
+ """Raised when a configured provider cannot produce an embedding.
44
+
45
+ Callers in the hot path (``vector_search``) translate this into a clear
46
+ 503/"provider unavailable" rather than a misleading empty result.
47
+ """
48
+
49
+
50
+ # Best-known output dimensionality for common embedding models, so the index
51
+ # identity is stable before the first (possibly remote) call. A configured
52
+ # ``dim`` always wins; an unknown model falls back to a one-time live probe.
53
+ _KNOWN_DIMS = {
54
+ "nomic-embed-text": 768,
55
+ "mxbai-embed-large": 1024,
56
+ "all-minilm": 384,
57
+ "all-minilm-l6-v2": 384,
58
+ "bge-small-en": 384,
59
+ "bge-base-en": 768,
60
+ "bge-large-en": 1024,
61
+ "gte-small": 384,
62
+ "gte-base": 768,
63
+ "gte-large": 1024,
64
+ "text-embedding-3-small": 1536,
65
+ "text-embedding-3-large": 3072,
66
+ "text-embedding-ada-002": 1536,
67
+ }
68
+
69
+
70
+ def _guess_dim(model: str, default: int) -> int:
71
+ key = str(model or "").split("/")[-1].strip().lower()
72
+ key = key.split(":")[0]
73
+ return _KNOWN_DIMS.get(key, default)
74
+
75
+
76
+ def _l2_normalize(vector: Sequence[float]) -> List[float]:
77
+ norm = math.sqrt(sum(float(v) * float(v) for v in vector))
78
+ if norm <= 0:
79
+ return [float(v) for v in vector]
80
+ return [float(v) / norm for v in vector]
81
+
82
+
83
+ class EmbeddingProvider:
84
+ """Interface every embedder implements.
85
+
86
+ Subclasses must set ``model_id`` and ``dim`` and implement
87
+ :meth:`embed_batch`; the rest (single embed, codec, similarity) is shared.
88
+ """
89
+
90
+ #: stable identity stored alongside every vector — change ⇒ re-index
91
+ model_id: str = ""
92
+ #: vector dimensionality
93
+ dim: int = DEFAULT_EMBEDDING_DIM
94
+ #: short provider kind ("hash" | "mlx" | "ollama" | "openai" | "custom")
95
+ provider: str = "hash"
96
+ #: "fallback" (hash) | "production" (real semantic model)
97
+ grade: str = "production"
98
+
99
+ # ── required ──────────────────────────────────────────────────────────
100
+ def embed_batch(self, texts: Sequence[str]) -> List[List[float]]:
101
+ raise NotImplementedError
102
+
103
+ # ── derived (shared) ──────────────────────────────────────────────────
104
+ def embed(self, text: str) -> List[float]:
105
+ result = self.embed_batch([text])
106
+ return result[0] if result else [0.0] * self.dim
107
+
108
+ def encode(self, vector: Iterable[float]) -> bytes:
109
+ values = [float(v) for v in vector]
110
+ return struct.pack(f"<{len(values)}f", *values)
111
+
112
+ def decode(self, payload: bytes, dim: Optional[int] = None) -> List[float]:
113
+ if not payload:
114
+ return []
115
+ count = int(dim or self.dim)
116
+ if len(payload) != count * 4:
117
+ count = len(payload) // 4
118
+ return list(struct.unpack(f"<{count}f", payload[: count * 4]))
119
+
120
+ def similarity(self, left: Iterable[float], right: Iterable[float]) -> float:
121
+ return float(sum(a * b for a, b in zip(left, right)))
122
+
123
+ # ── observability ─────────────────────────────────────────────────────
124
+ def health(self) -> Dict[str, Any]:
125
+ """Return ``{status, detail}``; status ∈ ok | unavailable."""
126
+ return {"status": "ok", "detail": "ready"}
127
+
128
+ def metadata(self) -> Dict[str, Any]:
129
+ return {
130
+ "provider": self.provider,
131
+ "model": self.model_id,
132
+ "model_id": self.model_id,
133
+ "dim": self.dim,
134
+ "grade": self.grade,
135
+ }
136
+
137
+
138
+ # ── 1. Hash (offline fallback) ────────────────────────────────────────────────
139
+ class HashEmbeddingProvider(EmbeddingProvider):
140
+ """Deterministic feature-hashing embedder — no network, always available."""
141
+
142
+ provider = "hash"
143
+ grade = "fallback"
144
+
145
+ def __init__(self, dim: int = DEFAULT_EMBEDDING_DIM):
146
+ self._model = LocalEmbeddingModel(dim=dim)
147
+ self.dim = self._model.dim
148
+ self.model_id = self._model.model_id
149
+
150
+ def embed(self, text: str) -> List[float]:
151
+ return self._model.embed(text) # already L2-normalized
152
+
153
+ def embed_batch(self, texts: Sequence[str]) -> List[List[float]]:
154
+ return [self._model.embed(t) for t in texts]
155
+
156
+ def health(self) -> Dict[str, Any]:
157
+ return {"status": "ok", "detail": "deterministic local fallback"}
158
+
159
+
160
+ # ── shared base for remote/model-backed providers ─────────────────────────────
161
+ @dataclass
162
+ class _RemoteConfig:
163
+ model: str
164
+ base_url: str = ""
165
+ api_key: str = ""
166
+ dim: int = DEFAULT_EMBEDDING_DIM
167
+ timeout: float = 30.0
168
+ extra: Dict[str, Any] = field(default_factory=dict)
169
+
170
+
171
+ class _NetworkEmbeddingProvider(EmbeddingProvider):
172
+ """Common machinery for providers that call a model/server to embed."""
173
+
174
+ def __init__(self, cfg: _RemoteConfig):
175
+ self._cfg = cfg
176
+ self.dim = int(cfg.dim or DEFAULT_EMBEDDING_DIM)
177
+
178
+ # subclasses implement the raw call
179
+ def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
180
+ raise NotImplementedError
181
+
182
+ def embed_batch(self, texts: Sequence[str]) -> List[List[float]]:
183
+ clean = [str(t or "")[:50_000] for t in texts]
184
+ if not clean:
185
+ return []
186
+ vectors = self._embed_raw(clean)
187
+ out: List[List[float]] = []
188
+ for vec in vectors:
189
+ vec = [float(x) for x in (vec or [])]
190
+ if vec:
191
+ # lock the index identity to the true model dimensionality
192
+ self.dim = len(vec)
193
+ out.append(_l2_normalize(vec) if vec else [0.0] * self.dim)
194
+ return out
195
+
196
+
197
+ # ── 2. MLX (local Apple-Silicon model) ────────────────────────────────────────
198
+ class MLXEmbeddingProvider(_NetworkEmbeddingProvider):
199
+ provider = "mlx"
200
+
201
+ def __init__(self, cfg: _RemoteConfig):
202
+ super().__init__(cfg)
203
+ self.model_id = f"mlx:{cfg.model}:{self.dim}"
204
+ self._encoder = None
205
+
206
+ def _load(self):
207
+ if self._encoder is not None:
208
+ return self._encoder
209
+ try: # optional dependency; only imported when this provider is used
210
+ from mlx_embeddings.utils import load as mlx_load # type: ignore
211
+
212
+ model, tokenizer = mlx_load(self._cfg.model)
213
+ self._encoder = ("mlx_embeddings", model, tokenizer)
214
+ return self._encoder
215
+ except Exception as exc: # pragma: no cover - environment dependent
216
+ raise EmbeddingUnavailable(f"MLX embedding model unavailable: {exc}") from exc
217
+
218
+ def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
219
+ kind, model, tokenizer = self._load()
220
+ try:
221
+ import mlx.core as mx # type: ignore
222
+
223
+ out: List[List[float]] = []
224
+ for text in texts:
225
+ ids = tokenizer.encode(text)
226
+ tokens = mx.array([ids])
227
+ result = model(tokens)
228
+ pooled = result[0] if isinstance(result, (tuple, list)) else result
229
+ vec = mx.mean(pooled, axis=1)[0] if pooled.ndim == 3 else pooled[0]
230
+ out.append([float(x) for x in vec.tolist()])
231
+ return out
232
+ except EmbeddingUnavailable:
233
+ raise
234
+ except Exception as exc: # pragma: no cover - environment dependent
235
+ raise EmbeddingUnavailable(f"MLX embedding failed: {exc}") from exc
236
+
237
+ def health(self) -> Dict[str, Any]:
238
+ try:
239
+ self._load()
240
+ return {"status": "ok", "detail": f"MLX model {self._cfg.model} loaded"}
241
+ except Exception as exc:
242
+ return {"status": "unavailable", "detail": str(exc)}
243
+
244
+
245
+ # ── 3. Ollama ─────────────────────────────────────────────────────────────────
246
+ class OllamaEmbeddingProvider(_NetworkEmbeddingProvider):
247
+ provider = "ollama"
248
+
249
+ def __init__(self, cfg: _RemoteConfig):
250
+ super().__init__(cfg)
251
+ self._base = (cfg.base_url or "http://127.0.0.1:11434").rstrip("/")
252
+ if not cfg.dim:
253
+ self.dim = _guess_dim(cfg.model, DEFAULT_EMBEDDING_DIM)
254
+ self.model_id = f"ollama:{cfg.model}:{self.dim}"
255
+
256
+ def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
257
+ out: List[List[float]] = []
258
+ try:
259
+ import httpx
260
+
261
+ with httpx.Client(timeout=self._cfg.timeout) as client:
262
+ # /api/embed supports batching; fall back to /api/embeddings.
263
+ resp = client.post(
264
+ f"{self._base}/api/embed",
265
+ json={"model": self._cfg.model, "input": list(texts)},
266
+ )
267
+ if resp.status_code == 404:
268
+ for text in texts:
269
+ r = client.post(
270
+ f"{self._base}/api/embeddings",
271
+ json={"model": self._cfg.model, "prompt": text},
272
+ )
273
+ r.raise_for_status()
274
+ out.append(r.json().get("embedding") or [])
275
+ return out
276
+ resp.raise_for_status()
277
+ data = resp.json()
278
+ return data.get("embeddings") or [data.get("embedding") or []]
279
+ except Exception as exc:
280
+ raise EmbeddingUnavailable(f"Ollama embedding failed: {exc}") from exc
281
+
282
+ def health(self) -> Dict[str, Any]:
283
+ try:
284
+ import httpx
285
+
286
+ with httpx.Client(timeout=min(self._cfg.timeout, 5.0)) as client:
287
+ r = client.get(f"{self._base}/api/tags")
288
+ r.raise_for_status()
289
+ return {"status": "ok", "detail": f"Ollama reachable at {self._base}"}
290
+ except Exception as exc:
291
+ return {"status": "unavailable", "detail": f"Ollama unreachable: {exc}"}
292
+
293
+
294
+ # ── 4. OpenAI-compatible (/v1/embeddings) ─────────────────────────────────────
295
+ class OpenAICompatibleEmbeddingProvider(_NetworkEmbeddingProvider):
296
+ provider = "openai"
297
+
298
+ def __init__(self, cfg: _RemoteConfig):
299
+ super().__init__(cfg)
300
+ self._base = (cfg.base_url or "https://api.openai.com/v1").rstrip("/")
301
+ if not cfg.dim:
302
+ self.dim = _guess_dim(cfg.model, DEFAULT_EMBEDDING_DIM)
303
+ self.model_id = f"openai:{cfg.model}:{self.dim}"
304
+
305
+ def _headers(self) -> Dict[str, str]:
306
+ headers = {"Content-Type": "application/json"}
307
+ if self._cfg.api_key:
308
+ headers["Authorization"] = f"Bearer {self._cfg.api_key}"
309
+ return headers
310
+
311
+ def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
312
+ try:
313
+ import httpx
314
+
315
+ with httpx.Client(timeout=self._cfg.timeout) as client:
316
+ r = client.post(
317
+ f"{self._base}/embeddings",
318
+ headers=self._headers(),
319
+ json={"model": self._cfg.model, "input": list(texts)},
320
+ )
321
+ r.raise_for_status()
322
+ rows = sorted(r.json().get("data", []), key=lambda d: d.get("index", 0))
323
+ return [row.get("embedding") or [] for row in rows]
324
+ except Exception as exc:
325
+ raise EmbeddingUnavailable(f"OpenAI-compatible embedding failed: {exc}") from exc
326
+
327
+ def health(self) -> Dict[str, Any]:
328
+ try:
329
+ self._embed_raw(["ping"])
330
+ return {"status": "ok", "detail": f"{self._base} reachable"}
331
+ except Exception as exc:
332
+ return {"status": "unavailable", "detail": str(exc)}
333
+
334
+
335
+ # ── 5. Custom (user-supplied callable) ────────────────────────────────────────
336
+ class CustomEmbeddingProvider(_NetworkEmbeddingProvider):
337
+ """Loads a dotted ``module:callable`` (or ``module.callable``).
338
+
339
+ The callable receives ``List[str]`` and returns ``List[List[float]]``.
340
+ Configured via ``LATTICEAI_EMBEDDING_CUSTOM_TARGET``.
341
+ """
342
+
343
+ provider = "custom"
344
+
345
+ def __init__(self, cfg: _RemoteConfig):
346
+ super().__init__(cfg)
347
+ self._target_ref = str(cfg.extra.get("target") or os.getenv("LATTICEAI_EMBEDDING_CUSTOM_TARGET", ""))
348
+ self.model_id = f"custom:{cfg.model or self._target_ref or 'callable'}:{self.dim}"
349
+ self._fn = None
350
+
351
+ def _load(self):
352
+ if self._fn is not None:
353
+ return self._fn
354
+ ref = self._target_ref
355
+ if not ref:
356
+ raise EmbeddingUnavailable("custom embedding target not configured (LATTICEAI_EMBEDDING_CUSTOM_TARGET)")
357
+ module_name, _, attr = ref.replace(":", ".").rpartition(".")
358
+ if not module_name:
359
+ raise EmbeddingUnavailable(f"invalid custom embedding target: {ref}")
360
+ try:
361
+ module = importlib.import_module(module_name)
362
+ self._fn = getattr(module, attr)
363
+ return self._fn
364
+ except Exception as exc:
365
+ raise EmbeddingUnavailable(f"custom embedding target unavailable: {exc}") from exc
366
+
367
+ def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
368
+ fn = self._load()
369
+ try:
370
+ return list(fn(list(texts)))
371
+ except Exception as exc:
372
+ raise EmbeddingUnavailable(f"custom embedding failed: {exc}") from exc
373
+
374
+ def health(self) -> Dict[str, Any]:
375
+ try:
376
+ self._load()
377
+ return {"status": "ok", "detail": f"custom target {self._target_ref} loaded"}
378
+ except Exception as exc:
379
+ return {"status": "unavailable", "detail": str(exc)}
380
+
381
+
382
+ # ── factory + resolution ──────────────────────────────────────────────────────
383
+ PROVIDER_TYPES = ("hash", "mlx", "ollama", "openai", "custom")
384
+
385
+
386
+ def build_embedding_provider(
387
+ provider: str,
388
+ *,
389
+ model: str = "",
390
+ base_url: str = "",
391
+ api_key: str = "",
392
+ dim: int = 0,
393
+ timeout: float = 30.0,
394
+ extra: Optional[Dict[str, Any]] = None,
395
+ ) -> EmbeddingProvider:
396
+ """Construct a provider by name. Never makes a network call."""
397
+ kind = str(provider or "hash").strip().lower()
398
+ if kind in {"", "hash", "local", "fallback"}:
399
+ return HashEmbeddingProvider(dim=int(dim or DEFAULT_EMBEDDING_DIM))
400
+ cfg = _RemoteConfig(
401
+ model=model,
402
+ base_url=base_url,
403
+ api_key=api_key,
404
+ dim=int(dim or 0),
405
+ timeout=float(timeout or 30.0),
406
+ extra=dict(extra or {}),
407
+ )
408
+ if kind == "mlx":
409
+ return MLXEmbeddingProvider(cfg)
410
+ if kind == "ollama":
411
+ return OllamaEmbeddingProvider(cfg)
412
+ if kind in {"openai", "openai-compatible", "openai_compatible"}:
413
+ return OpenAICompatibleEmbeddingProvider(cfg)
414
+ if kind == "custom":
415
+ return CustomEmbeddingProvider(cfg)
416
+ raise ValueError(f"unknown embedding provider: {provider!r} (expected one of {PROVIDER_TYPES})")
417
+
418
+
419
+ @dataclass
420
+ class ResolvedEmbedder:
421
+ provider: EmbeddingProvider
422
+ requested: str
423
+ active: str
424
+ fell_back: bool
425
+ health: Dict[str, Any]
426
+ detail: str = ""
427
+
428
+ def as_dict(self) -> Dict[str, Any]:
429
+ return {
430
+ "requested_provider": self.requested,
431
+ "active_provider": self.active,
432
+ "fell_back": self.fell_back,
433
+ "health": self.health,
434
+ "detail": self.detail,
435
+ **self.provider.metadata(),
436
+ }
437
+
438
+
439
+ def resolve_embedder(
440
+ provider: str = "",
441
+ *,
442
+ model: str = "",
443
+ base_url: str = "",
444
+ api_key: str = "",
445
+ dim: int = 0,
446
+ timeout: float = 30.0,
447
+ extra: Optional[Dict[str, Any]] = None,
448
+ probe: bool = True,
449
+ ) -> ResolvedEmbedder:
450
+ """Build the requested provider, degrading to hash if it is unavailable.
451
+
452
+ Local-first guarantee: the app always gets a working embedder. When the
453
+ requested provider is unreachable we return the hash fallback but record
454
+ ``fell_back=True`` and the failing health detail so the UI shows it as
455
+ *Unavailable* — the system never pretends a down provider is live.
456
+ """
457
+ requested = str(provider or "hash").strip().lower() or "hash"
458
+ if requested in {"hash", "local", "fallback", ""}:
459
+ prov = HashEmbeddingProvider(dim=int(dim or DEFAULT_EMBEDDING_DIM))
460
+ return ResolvedEmbedder(prov, "hash", "hash", False, prov.health(), "deterministic local fallback")
461
+
462
+ try:
463
+ prov = build_embedding_provider(
464
+ requested, model=model, base_url=base_url, api_key=api_key, dim=dim, timeout=timeout, extra=extra
465
+ )
466
+ except Exception as exc:
467
+ fallback = HashEmbeddingProvider(dim=int(dim or DEFAULT_EMBEDDING_DIM))
468
+ return ResolvedEmbedder(
469
+ fallback, requested, "hash", True,
470
+ {"status": "unavailable", "detail": str(exc)},
471
+ f"could not construct {requested}; using hash fallback",
472
+ )
473
+
474
+ if probe:
475
+ try:
476
+ health = prov.health()
477
+ except Exception as exc: # provider health must never crash startup
478
+ health = {"status": "unavailable", "detail": str(exc)}
479
+ else:
480
+ health = {"status": "unknown", "detail": "not probed"}
481
+ if probe and health.get("status") != "ok":
482
+ fallback = HashEmbeddingProvider(dim=int(dim or DEFAULT_EMBEDDING_DIM))
483
+ return ResolvedEmbedder(
484
+ fallback, requested, "hash", True, health,
485
+ f"{requested} unavailable ({health.get('detail', '')}); using hash fallback",
486
+ )
487
+ return ResolvedEmbedder(prov, requested, prov.provider, False, health, "")
488
+
489
+
490
+ __all__ = [
491
+ "EmbeddingProvider",
492
+ "EmbeddingUnavailable",
493
+ "HashEmbeddingProvider",
494
+ "MLXEmbeddingProvider",
495
+ "OllamaEmbeddingProvider",
496
+ "OpenAICompatibleEmbeddingProvider",
497
+ "CustomEmbeddingProvider",
498
+ "ResolvedEmbedder",
499
+ "build_embedding_provider",
500
+ "resolve_embedder",
501
+ "PROVIDER_TYPES",
502
+ ]
@@ -0,0 +1,86 @@
1
+ """Local deterministic embeddings for Lattice AI search.
2
+
3
+ The v3 backend needs a local-first vector signal without introducing a cloud
4
+ runtime requirement. This module provides a small feature-hashing embedder that
5
+ is deterministic, cheap to run, and good enough for indexing/search tests. A
6
+ future runtime can swap the implementation behind the same interface when a
7
+ local model server is available.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import hashlib
13
+ import math
14
+ import os
15
+ import re
16
+ import struct
17
+ from dataclasses import dataclass
18
+ from typing import Iterable, List
19
+
20
+
21
+ DEFAULT_EMBEDDING_DIM = int(os.getenv("LATTICEAI_VECTOR_DIM", "384"))
22
+ EMBEDDING_MODEL_ID = f"lattice-local-hash-v1:{DEFAULT_EMBEDDING_DIM}"
23
+
24
+
25
+ def _tokenize(text: str) -> List[str]:
26
+ raw = str(text or "").lower()
27
+ tokens = re.findall(r"[a-z0-9][a-z0-9_.:/+-]{1,}|[가-힣]{2,}", raw)
28
+ features: List[str] = []
29
+ for token in tokens:
30
+ features.append(f"tok:{token}")
31
+ if len(token) >= 5 and re.search(r"[a-z]", token):
32
+ for i in range(0, len(token) - 2):
33
+ features.append(f"tri:{token[i:i+3]}")
34
+ if re.search(r"[가-힣]", token) and len(token) >= 3:
35
+ for i in range(0, len(token) - 1):
36
+ features.append(f"ko:{token[i:i+2]}")
37
+ return features
38
+
39
+
40
+ def _hash_to_index(feature: str, dim: int) -> tuple[int, float]:
41
+ digest = hashlib.blake2b(feature.encode("utf-8"), digest_size=8).digest()
42
+ value = int.from_bytes(digest, "big", signed=False)
43
+ sign = 1.0 if (value & 1) == 0 else -1.0
44
+ return value % dim, sign
45
+
46
+
47
+ @dataclass(frozen=True)
48
+ class LocalEmbeddingModel:
49
+ """Deterministic feature-hashing embedder.
50
+
51
+ The output vectors are L2-normalized, so cosine similarity is just a dot
52
+ product. No network access, model download, or global mutable state is
53
+ required.
54
+ """
55
+
56
+ dim: int = DEFAULT_EMBEDDING_DIM
57
+ model_id: str = EMBEDDING_MODEL_ID
58
+
59
+ def embed(self, text: str) -> List[float]:
60
+ vector = [0.0] * self.dim
61
+ features = _tokenize(text)
62
+ if not features:
63
+ return vector
64
+ for feature in features:
65
+ index, sign = _hash_to_index(feature, self.dim)
66
+ vector[index] += sign
67
+ norm = math.sqrt(sum(value * value for value in vector))
68
+ if norm <= 0:
69
+ return vector
70
+ return [value / norm for value in vector]
71
+
72
+ def similarity(self, left: Iterable[float], right: Iterable[float]) -> float:
73
+ return float(sum(a * b for a, b in zip(left, right)))
74
+
75
+ def encode(self, vector: Iterable[float]) -> bytes:
76
+ values = list(vector)
77
+ return struct.pack(f"<{len(values)}f", *values)
78
+
79
+ def decode(self, payload: bytes, dim: int | None = None) -> List[float]:
80
+ if not payload:
81
+ return []
82
+ count = int(dim or self.dim)
83
+ expected = count * 4
84
+ if len(payload) != expected:
85
+ count = len(payload) // 4
86
+ return list(struct.unpack(f"<{count}f", payload[: count * 4]))
@@ -18,7 +18,7 @@ from pathlib import Path
18
18
  from typing import Any, Callable, Dict, Iterable, List, Optional
19
19
 
20
20
 
21
- WORKSPACE_OS_VERSION = "2.2.7"
21
+ WORKSPACE_OS_VERSION = "3.0.1"
22
22
 
23
23
  # Workspace types separate single-user Personal workspaces from shared
24
24
  # Organization workspaces. Both keep the same local-first JSON store; the type
@@ -72,6 +72,9 @@ from latticeai.core.enterprise import (
72
72
  from latticeai.services.workspace_service import WorkspaceService
73
73
  from latticeai.services.model_service import ModelService
74
74
  from latticeai.services.chat_service import ChatService
75
+ from latticeai.services.search_service import SearchService
76
+ from latticeai.core.embedding_providers import resolve_embedder
77
+ from latticeai.services.agent_runtime import AgentRuntime
75
78
  from latticeai.services.model_runtime import (
76
79
  CLOUD_VERIFY_TTL_SECONDS,
77
80
  ENGINE_MODEL_CATALOG,
@@ -105,6 +108,7 @@ from latticeai.api.realtime import create_realtime_router
105
108
  from latticeai.api.marketplace import create_marketplace_router
106
109
  from latticeai.api.models import create_models_router
107
110
  from latticeai.api.chat import create_chat_router
111
+ from latticeai.api.search import create_search_router
108
112
  from latticeai.api.tools import create_tools_router
109
113
  from latticeai.api.static_routes import create_static_routes_router
110
114
  from latticeai.api.garden import create_garden_router
@@ -244,7 +248,26 @@ VPC_FILE = DATA_DIR / "vpc_config.json"
244
248
  MCP_FILE = DATA_DIR / "mcp_installs.json"
245
249
  AUDIT_FILE = DATA_DIR / "audit_log.json"
246
250
  SSO_FILE = DATA_DIR / "sso_config.json"
247
- KNOWLEDGE_GRAPH = KnowledgeGraphStore(DATA_DIR / "knowledge_graph.sqlite", DATA_DIR / "knowledge_graph_blobs") if ENABLE_GRAPH else None
251
+ # Resolve the configured embedding provider once at startup. Degrades to the
252
+ # offline hash fallback when the requested provider is unavailable, while
253
+ # recording the requested-vs-active provider for the Embeddings status surface.
254
+ EMBEDDER = resolve_embedder(
255
+ CONFIG.embedding_provider,
256
+ model=CONFIG.embedding_model,
257
+ base_url=CONFIG.embedding_base_url,
258
+ api_key=CONFIG.embedding_api_key,
259
+ dim=CONFIG.embedding_dim,
260
+ timeout=CONFIG.embedding_timeout,
261
+ extra={"target": CONFIG.embedding_custom_target},
262
+ probe=CONFIG.embedding_provider not in {"", "hash", "local", "fallback"},
263
+ )
264
+ if EMBEDDER.fell_back:
265
+ logging.warning("Embedding provider %s unavailable: %s", EMBEDDER.requested, EMBEDDER.detail)
266
+ KNOWLEDGE_GRAPH = KnowledgeGraphStore(
267
+ DATA_DIR / "knowledge_graph.sqlite",
268
+ DATA_DIR / "knowledge_graph_blobs",
269
+ embedder=EMBEDDER.provider,
270
+ ) if ENABLE_GRAPH else None
248
271
  LOCAL_KG_WATCHER = LocalKnowledgeWatcher(lambda: KNOWLEDGE_GRAPH) if ENABLE_GRAPH else None
249
272
  # ── v2 Realtime bus: constructed first so the store can fan every timeline
250
273
  # event into the realtime feed via a single additive sink (no per-call wiring).
@@ -1171,6 +1194,9 @@ def _workspace_graph():
1171
1194
  return KNOWLEDGE_GRAPH if (ENABLE_GRAPH and KNOWLEDGE_GRAPH) else None
1172
1195
 
1173
1196
 
1197
+ SEARCH_SERVICE = SearchService(graph_store=_workspace_graph())
1198
+
1199
+
1174
1200
  # ── Workspace OS + Organization router (latticeai.api.workspace, v1.2.0) ──────
1175
1201
  app.include_router(create_workspace_router(
1176
1202
  service=WORKSPACE_SERVICE,
@@ -1217,6 +1243,14 @@ PLATFORM = PlatformRuntime(
1217
1243
  get_tool_permission=get_tool_permission,
1218
1244
  )
1219
1245
 
1246
+ # Single AgentRuntime boundary over the orchestrator + run store.
1247
+ AGENT_RUNTIME = AgentRuntime(
1248
+ store=WORKSPACE_OS,
1249
+ orchestrator_factory=PLATFORM.build_orchestrator,
1250
+ workspace_graph=_workspace_graph,
1251
+ append_audit_event=append_audit_event,
1252
+ )
1253
+
1220
1254
  app.include_router(create_plugins_router(
1221
1255
  registry=PLUGIN_REGISTRY,
1222
1256
  require_user=require_user,
@@ -1252,6 +1286,7 @@ app.include_router(create_agents_router(
1252
1286
  append_audit_event=append_audit_event,
1253
1287
  ui_file_response=ui_file_response,
1254
1288
  static_dir=STATIC_DIR,
1289
+ agent_runtime=AGENT_RUNTIME,
1255
1290
  ))
1256
1291
 
1257
1292
  app.include_router(create_marketplace_router(
@@ -1351,6 +1386,19 @@ app.include_router(create_chat_router(
1351
1386
  base_dir=BASE_DIR,
1352
1387
  ))
1353
1388
 
1389
+ def _embedding_info() -> dict:
1390
+ from latticeai.core.embedding_providers import PROVIDER_TYPES
1391
+ info = EMBEDDER.as_dict()
1392
+ info["available_providers"] = list(PROVIDER_TYPES)
1393
+ return info
1394
+
1395
+
1396
+ app.include_router(create_search_router(
1397
+ service=SEARCH_SERVICE,
1398
+ require_user=require_user,
1399
+ embedding_info=_embedding_info,
1400
+ ))
1401
+
1354
1402
  app.include_router(create_tools_router(
1355
1403
  config=CONFIG,
1356
1404
  data_dir=DATA_DIR,