ltcai 2.2.7 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -32
- package/docs/CHANGELOG.md +82 -0
- package/docs/V3_BACKEND_ARCHITECTURE.md +138 -0
- package/docs/V3_FRONTEND.md +136 -0
- package/knowledge_graph.py +649 -21
- package/latticeai/__init__.py +1 -1
- package/latticeai/api/admin.py +47 -0
- package/latticeai/api/agents.py +54 -31
- package/latticeai/api/auth.py +1 -1
- package/latticeai/api/chat.py +10 -2
- package/latticeai/api/search.py +236 -0
- package/latticeai/api/static_routes.py +11 -2
- package/latticeai/core/config.py +16 -0
- package/latticeai/core/embedding_providers.py +502 -0
- package/latticeai/core/local_embeddings.py +86 -0
- package/latticeai/core/workspace_os.py +1 -1
- package/latticeai/server_app.py +49 -1
- package/latticeai/services/agent_runtime.py +245 -0
- package/latticeai/services/search_service.py +346 -0
- package/package.json +6 -4
- package/static/account.html +9 -9
- package/static/activity.html +4 -4
- package/static/admin.html +8 -8
- package/static/agents.html +4 -4
- package/static/chat.html +10 -10
- package/static/css/reference/account.css +137 -1
- package/static/css/reference/chat.css +31 -37
- package/static/css/responsive.css +42 -0
- package/static/css/tokens.css +125 -130
- package/static/graph.html +9 -9
- package/static/manifest.json +3 -3
- package/static/plugins.html +4 -4
- package/static/scripts/account.js +4 -4
- package/static/scripts/chat.js +40 -8
- package/static/scripts/workspace.js +78 -0
- package/static/v3/css/lattice.base.css +128 -0
- package/static/v3/css/lattice.components.css +447 -0
- package/static/v3/css/lattice.shell.css +407 -0
- package/static/v3/css/lattice.tokens.css +132 -0
- package/static/v3/css/lattice.views.css +277 -0
- package/static/v3/index.html +40 -0
- package/static/v3/js/app.js +26 -0
- package/static/v3/js/core/api.js +327 -0
- package/static/v3/js/core/components.js +215 -0
- package/static/v3/js/core/dom.js +148 -0
- package/static/v3/js/core/fixtures.js +171 -0
- package/static/v3/js/core/router.js +37 -0
- package/static/v3/js/core/routes.js +73 -0
- package/static/v3/js/core/shell.js +363 -0
- package/static/v3/js/core/store.js +113 -0
- package/static/v3/js/views/admin-audit.js +185 -0
- package/static/v3/js/views/admin-permissions.js +178 -0
- package/static/v3/js/views/admin-policies.js +103 -0
- package/static/v3/js/views/admin-private-vpc.js +138 -0
- package/static/v3/js/views/admin-security.js +181 -0
- package/static/v3/js/views/admin-users.js +168 -0
- package/static/v3/js/views/agents.js +194 -0
- package/static/v3/js/views/chat.js +450 -0
- package/static/v3/js/views/files.js +180 -0
- package/static/v3/js/views/home.js +119 -0
- package/static/v3/js/views/hybrid-search.js +195 -0
- package/static/v3/js/views/knowledge-graph.js +238 -0
- package/static/v3/js/views/models.js +247 -0
- package/static/v3/js/views/my-computer.js +237 -0
- package/static/v3/js/views/pipeline.js +161 -0
- package/static/v3/js/views/settings.js +258 -0
- package/static/workflows.html +4 -4
- package/static/workspace.css +340 -2
- package/static/workspace.html +43 -24
|
@@ -0,0 +1,502 @@
|
|
|
1
|
+
"""Provider-backed embeddings for Lattice AI retrieval.
|
|
2
|
+
|
|
3
|
+
The knowledge graph stores dense vectors keyed by ``(embedding_model,
|
|
4
|
+
embedding_dim)`` and only ever compares vectors that share those keys
|
|
5
|
+
(``knowledge_graph.vector_search``). That contract means the *embedder* can be
|
|
6
|
+
swapped behind a single interface as long as every implementation agrees on:
|
|
7
|
+
|
|
8
|
+
* ``model_id`` / ``dim`` — the index identity (a change forces a re-index, which
|
|
9
|
+
``index_status`` already reports as ``stale``/``needs_reindex``);
|
|
10
|
+
* ``encode`` / ``decode`` — the on-disk float32 codec (shared by all providers);
|
|
11
|
+
* ``embed`` returns an **L2-normalized** vector, so ``similarity`` is a plain dot
|
|
12
|
+
product and equals cosine similarity regardless of provider.
|
|
13
|
+
|
|
14
|
+
This module defines that :class:`EmbeddingProvider` interface and five concrete
|
|
15
|
+
implementations:
|
|
16
|
+
|
|
17
|
+
1. :class:`HashEmbeddingProvider` — deterministic, offline, always-available
|
|
18
|
+
fallback (wraps the legacy :class:`~latticeai.core.local_embeddings.LocalEmbeddingModel`).
|
|
19
|
+
2. :class:`MLXEmbeddingProvider` — local Apple-Silicon embedding models.
|
|
20
|
+
3. :class:`OllamaEmbeddingProvider` — a local/remote Ollama server.
|
|
21
|
+
4. :class:`OpenAICompatibleEmbeddingProvider` — any ``/v1/embeddings`` endpoint
|
|
22
|
+
(OpenAI, LM Studio, vLLM, llama.cpp, Together, …).
|
|
23
|
+
5. :class:`CustomEmbeddingProvider` — a user-supplied dotted callable.
|
|
24
|
+
|
|
25
|
+
:func:`resolve_embedder` builds the configured provider and, when that provider
|
|
26
|
+
is unavailable, degrades to the hash fallback while *reporting* the requested
|
|
27
|
+
vs. active provider — nothing is silently faked.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import importlib
|
|
33
|
+
import math
|
|
34
|
+
import os
|
|
35
|
+
import struct
|
|
36
|
+
from dataclasses import dataclass, field
|
|
37
|
+
from typing import Any, Dict, Iterable, List, Optional, Sequence
|
|
38
|
+
|
|
39
|
+
from latticeai.core.local_embeddings import DEFAULT_EMBEDDING_DIM, LocalEmbeddingModel
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class EmbeddingUnavailable(RuntimeError):
|
|
43
|
+
"""Raised when a configured provider cannot produce an embedding.
|
|
44
|
+
|
|
45
|
+
Callers in the hot path (``vector_search``) translate this into a clear
|
|
46
|
+
503/"provider unavailable" rather than a misleading empty result.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# Best-known output dimensionality for common embedding models, so the index
|
|
51
|
+
# identity is stable before the first (possibly remote) call. A configured
|
|
52
|
+
# ``dim`` always wins; an unknown model falls back to a one-time live probe.
|
|
53
|
+
_KNOWN_DIMS = {
|
|
54
|
+
"nomic-embed-text": 768,
|
|
55
|
+
"mxbai-embed-large": 1024,
|
|
56
|
+
"all-minilm": 384,
|
|
57
|
+
"all-minilm-l6-v2": 384,
|
|
58
|
+
"bge-small-en": 384,
|
|
59
|
+
"bge-base-en": 768,
|
|
60
|
+
"bge-large-en": 1024,
|
|
61
|
+
"gte-small": 384,
|
|
62
|
+
"gte-base": 768,
|
|
63
|
+
"gte-large": 1024,
|
|
64
|
+
"text-embedding-3-small": 1536,
|
|
65
|
+
"text-embedding-3-large": 3072,
|
|
66
|
+
"text-embedding-ada-002": 1536,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _guess_dim(model: str, default: int) -> int:
|
|
71
|
+
key = str(model or "").split("/")[-1].strip().lower()
|
|
72
|
+
key = key.split(":")[0]
|
|
73
|
+
return _KNOWN_DIMS.get(key, default)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _l2_normalize(vector: Sequence[float]) -> List[float]:
|
|
77
|
+
norm = math.sqrt(sum(float(v) * float(v) for v in vector))
|
|
78
|
+
if norm <= 0:
|
|
79
|
+
return [float(v) for v in vector]
|
|
80
|
+
return [float(v) / norm for v in vector]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class EmbeddingProvider:
|
|
84
|
+
"""Interface every embedder implements.
|
|
85
|
+
|
|
86
|
+
Subclasses must set ``model_id`` and ``dim`` and implement
|
|
87
|
+
:meth:`embed_batch`; the rest (single embed, codec, similarity) is shared.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
#: stable identity stored alongside every vector — change ⇒ re-index
|
|
91
|
+
model_id: str = ""
|
|
92
|
+
#: vector dimensionality
|
|
93
|
+
dim: int = DEFAULT_EMBEDDING_DIM
|
|
94
|
+
#: short provider kind ("hash" | "mlx" | "ollama" | "openai" | "custom")
|
|
95
|
+
provider: str = "hash"
|
|
96
|
+
#: "fallback" (hash) | "production" (real semantic model)
|
|
97
|
+
grade: str = "production"
|
|
98
|
+
|
|
99
|
+
# ── required ──────────────────────────────────────────────────────────
|
|
100
|
+
def embed_batch(self, texts: Sequence[str]) -> List[List[float]]:
|
|
101
|
+
raise NotImplementedError
|
|
102
|
+
|
|
103
|
+
# ── derived (shared) ──────────────────────────────────────────────────
|
|
104
|
+
def embed(self, text: str) -> List[float]:
|
|
105
|
+
result = self.embed_batch([text])
|
|
106
|
+
return result[0] if result else [0.0] * self.dim
|
|
107
|
+
|
|
108
|
+
def encode(self, vector: Iterable[float]) -> bytes:
|
|
109
|
+
values = [float(v) for v in vector]
|
|
110
|
+
return struct.pack(f"<{len(values)}f", *values)
|
|
111
|
+
|
|
112
|
+
def decode(self, payload: bytes, dim: Optional[int] = None) -> List[float]:
|
|
113
|
+
if not payload:
|
|
114
|
+
return []
|
|
115
|
+
count = int(dim or self.dim)
|
|
116
|
+
if len(payload) != count * 4:
|
|
117
|
+
count = len(payload) // 4
|
|
118
|
+
return list(struct.unpack(f"<{count}f", payload[: count * 4]))
|
|
119
|
+
|
|
120
|
+
def similarity(self, left: Iterable[float], right: Iterable[float]) -> float:
|
|
121
|
+
return float(sum(a * b for a, b in zip(left, right)))
|
|
122
|
+
|
|
123
|
+
# ── observability ─────────────────────────────────────────────────────
|
|
124
|
+
def health(self) -> Dict[str, Any]:
|
|
125
|
+
"""Return ``{status, detail}``; status ∈ ok | unavailable."""
|
|
126
|
+
return {"status": "ok", "detail": "ready"}
|
|
127
|
+
|
|
128
|
+
def metadata(self) -> Dict[str, Any]:
|
|
129
|
+
return {
|
|
130
|
+
"provider": self.provider,
|
|
131
|
+
"model": self.model_id,
|
|
132
|
+
"model_id": self.model_id,
|
|
133
|
+
"dim": self.dim,
|
|
134
|
+
"grade": self.grade,
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# ── 1. Hash (offline fallback) ────────────────────────────────────────────────
|
|
139
|
+
class HashEmbeddingProvider(EmbeddingProvider):
|
|
140
|
+
"""Deterministic feature-hashing embedder — no network, always available."""
|
|
141
|
+
|
|
142
|
+
provider = "hash"
|
|
143
|
+
grade = "fallback"
|
|
144
|
+
|
|
145
|
+
def __init__(self, dim: int = DEFAULT_EMBEDDING_DIM):
|
|
146
|
+
self._model = LocalEmbeddingModel(dim=dim)
|
|
147
|
+
self.dim = self._model.dim
|
|
148
|
+
self.model_id = self._model.model_id
|
|
149
|
+
|
|
150
|
+
def embed(self, text: str) -> List[float]:
|
|
151
|
+
return self._model.embed(text) # already L2-normalized
|
|
152
|
+
|
|
153
|
+
def embed_batch(self, texts: Sequence[str]) -> List[List[float]]:
|
|
154
|
+
return [self._model.embed(t) for t in texts]
|
|
155
|
+
|
|
156
|
+
def health(self) -> Dict[str, Any]:
|
|
157
|
+
return {"status": "ok", "detail": "deterministic local fallback"}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# ── shared base for remote/model-backed providers ─────────────────────────────
|
|
161
|
+
@dataclass
|
|
162
|
+
class _RemoteConfig:
|
|
163
|
+
model: str
|
|
164
|
+
base_url: str = ""
|
|
165
|
+
api_key: str = ""
|
|
166
|
+
dim: int = DEFAULT_EMBEDDING_DIM
|
|
167
|
+
timeout: float = 30.0
|
|
168
|
+
extra: Dict[str, Any] = field(default_factory=dict)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class _NetworkEmbeddingProvider(EmbeddingProvider):
|
|
172
|
+
"""Common machinery for providers that call a model/server to embed."""
|
|
173
|
+
|
|
174
|
+
def __init__(self, cfg: _RemoteConfig):
|
|
175
|
+
self._cfg = cfg
|
|
176
|
+
self.dim = int(cfg.dim or DEFAULT_EMBEDDING_DIM)
|
|
177
|
+
|
|
178
|
+
# subclasses implement the raw call
|
|
179
|
+
def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
|
|
180
|
+
raise NotImplementedError
|
|
181
|
+
|
|
182
|
+
def embed_batch(self, texts: Sequence[str]) -> List[List[float]]:
|
|
183
|
+
clean = [str(t or "")[:50_000] for t in texts]
|
|
184
|
+
if not clean:
|
|
185
|
+
return []
|
|
186
|
+
vectors = self._embed_raw(clean)
|
|
187
|
+
out: List[List[float]] = []
|
|
188
|
+
for vec in vectors:
|
|
189
|
+
vec = [float(x) for x in (vec or [])]
|
|
190
|
+
if vec:
|
|
191
|
+
# lock the index identity to the true model dimensionality
|
|
192
|
+
self.dim = len(vec)
|
|
193
|
+
out.append(_l2_normalize(vec) if vec else [0.0] * self.dim)
|
|
194
|
+
return out
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# ── 2. MLX (local Apple-Silicon model) ────────────────────────────────────────
|
|
198
|
+
class MLXEmbeddingProvider(_NetworkEmbeddingProvider):
|
|
199
|
+
provider = "mlx"
|
|
200
|
+
|
|
201
|
+
def __init__(self, cfg: _RemoteConfig):
|
|
202
|
+
super().__init__(cfg)
|
|
203
|
+
self.model_id = f"mlx:{cfg.model}:{self.dim}"
|
|
204
|
+
self._encoder = None
|
|
205
|
+
|
|
206
|
+
def _load(self):
|
|
207
|
+
if self._encoder is not None:
|
|
208
|
+
return self._encoder
|
|
209
|
+
try: # optional dependency; only imported when this provider is used
|
|
210
|
+
from mlx_embeddings.utils import load as mlx_load # type: ignore
|
|
211
|
+
|
|
212
|
+
model, tokenizer = mlx_load(self._cfg.model)
|
|
213
|
+
self._encoder = ("mlx_embeddings", model, tokenizer)
|
|
214
|
+
return self._encoder
|
|
215
|
+
except Exception as exc: # pragma: no cover - environment dependent
|
|
216
|
+
raise EmbeddingUnavailable(f"MLX embedding model unavailable: {exc}") from exc
|
|
217
|
+
|
|
218
|
+
def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
|
|
219
|
+
kind, model, tokenizer = self._load()
|
|
220
|
+
try:
|
|
221
|
+
import mlx.core as mx # type: ignore
|
|
222
|
+
|
|
223
|
+
out: List[List[float]] = []
|
|
224
|
+
for text in texts:
|
|
225
|
+
ids = tokenizer.encode(text)
|
|
226
|
+
tokens = mx.array([ids])
|
|
227
|
+
result = model(tokens)
|
|
228
|
+
pooled = result[0] if isinstance(result, (tuple, list)) else result
|
|
229
|
+
vec = mx.mean(pooled, axis=1)[0] if pooled.ndim == 3 else pooled[0]
|
|
230
|
+
out.append([float(x) for x in vec.tolist()])
|
|
231
|
+
return out
|
|
232
|
+
except EmbeddingUnavailable:
|
|
233
|
+
raise
|
|
234
|
+
except Exception as exc: # pragma: no cover - environment dependent
|
|
235
|
+
raise EmbeddingUnavailable(f"MLX embedding failed: {exc}") from exc
|
|
236
|
+
|
|
237
|
+
def health(self) -> Dict[str, Any]:
|
|
238
|
+
try:
|
|
239
|
+
self._load()
|
|
240
|
+
return {"status": "ok", "detail": f"MLX model {self._cfg.model} loaded"}
|
|
241
|
+
except Exception as exc:
|
|
242
|
+
return {"status": "unavailable", "detail": str(exc)}
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
# ── 3. Ollama ─────────────────────────────────────────────────────────────────
|
|
246
|
+
class OllamaEmbeddingProvider(_NetworkEmbeddingProvider):
|
|
247
|
+
provider = "ollama"
|
|
248
|
+
|
|
249
|
+
def __init__(self, cfg: _RemoteConfig):
|
|
250
|
+
super().__init__(cfg)
|
|
251
|
+
self._base = (cfg.base_url or "http://127.0.0.1:11434").rstrip("/")
|
|
252
|
+
if not cfg.dim:
|
|
253
|
+
self.dim = _guess_dim(cfg.model, DEFAULT_EMBEDDING_DIM)
|
|
254
|
+
self.model_id = f"ollama:{cfg.model}:{self.dim}"
|
|
255
|
+
|
|
256
|
+
def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
|
|
257
|
+
out: List[List[float]] = []
|
|
258
|
+
try:
|
|
259
|
+
import httpx
|
|
260
|
+
|
|
261
|
+
with httpx.Client(timeout=self._cfg.timeout) as client:
|
|
262
|
+
# /api/embed supports batching; fall back to /api/embeddings.
|
|
263
|
+
resp = client.post(
|
|
264
|
+
f"{self._base}/api/embed",
|
|
265
|
+
json={"model": self._cfg.model, "input": list(texts)},
|
|
266
|
+
)
|
|
267
|
+
if resp.status_code == 404:
|
|
268
|
+
for text in texts:
|
|
269
|
+
r = client.post(
|
|
270
|
+
f"{self._base}/api/embeddings",
|
|
271
|
+
json={"model": self._cfg.model, "prompt": text},
|
|
272
|
+
)
|
|
273
|
+
r.raise_for_status()
|
|
274
|
+
out.append(r.json().get("embedding") or [])
|
|
275
|
+
return out
|
|
276
|
+
resp.raise_for_status()
|
|
277
|
+
data = resp.json()
|
|
278
|
+
return data.get("embeddings") or [data.get("embedding") or []]
|
|
279
|
+
except Exception as exc:
|
|
280
|
+
raise EmbeddingUnavailable(f"Ollama embedding failed: {exc}") from exc
|
|
281
|
+
|
|
282
|
+
def health(self) -> Dict[str, Any]:
|
|
283
|
+
try:
|
|
284
|
+
import httpx
|
|
285
|
+
|
|
286
|
+
with httpx.Client(timeout=min(self._cfg.timeout, 5.0)) as client:
|
|
287
|
+
r = client.get(f"{self._base}/api/tags")
|
|
288
|
+
r.raise_for_status()
|
|
289
|
+
return {"status": "ok", "detail": f"Ollama reachable at {self._base}"}
|
|
290
|
+
except Exception as exc:
|
|
291
|
+
return {"status": "unavailable", "detail": f"Ollama unreachable: {exc}"}
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
# ── 4. OpenAI-compatible (/v1/embeddings) ─────────────────────────────────────
|
|
295
|
+
class OpenAICompatibleEmbeddingProvider(_NetworkEmbeddingProvider):
|
|
296
|
+
provider = "openai"
|
|
297
|
+
|
|
298
|
+
def __init__(self, cfg: _RemoteConfig):
|
|
299
|
+
super().__init__(cfg)
|
|
300
|
+
self._base = (cfg.base_url or "https://api.openai.com/v1").rstrip("/")
|
|
301
|
+
if not cfg.dim:
|
|
302
|
+
self.dim = _guess_dim(cfg.model, DEFAULT_EMBEDDING_DIM)
|
|
303
|
+
self.model_id = f"openai:{cfg.model}:{self.dim}"
|
|
304
|
+
|
|
305
|
+
def _headers(self) -> Dict[str, str]:
|
|
306
|
+
headers = {"Content-Type": "application/json"}
|
|
307
|
+
if self._cfg.api_key:
|
|
308
|
+
headers["Authorization"] = f"Bearer {self._cfg.api_key}"
|
|
309
|
+
return headers
|
|
310
|
+
|
|
311
|
+
def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
|
|
312
|
+
try:
|
|
313
|
+
import httpx
|
|
314
|
+
|
|
315
|
+
with httpx.Client(timeout=self._cfg.timeout) as client:
|
|
316
|
+
r = client.post(
|
|
317
|
+
f"{self._base}/embeddings",
|
|
318
|
+
headers=self._headers(),
|
|
319
|
+
json={"model": self._cfg.model, "input": list(texts)},
|
|
320
|
+
)
|
|
321
|
+
r.raise_for_status()
|
|
322
|
+
rows = sorted(r.json().get("data", []), key=lambda d: d.get("index", 0))
|
|
323
|
+
return [row.get("embedding") or [] for row in rows]
|
|
324
|
+
except Exception as exc:
|
|
325
|
+
raise EmbeddingUnavailable(f"OpenAI-compatible embedding failed: {exc}") from exc
|
|
326
|
+
|
|
327
|
+
def health(self) -> Dict[str, Any]:
|
|
328
|
+
try:
|
|
329
|
+
self._embed_raw(["ping"])
|
|
330
|
+
return {"status": "ok", "detail": f"{self._base} reachable"}
|
|
331
|
+
except Exception as exc:
|
|
332
|
+
return {"status": "unavailable", "detail": str(exc)}
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
# ── 5. Custom (user-supplied callable) ────────────────────────────────────────
|
|
336
|
+
class CustomEmbeddingProvider(_NetworkEmbeddingProvider):
|
|
337
|
+
"""Loads a dotted ``module:callable`` (or ``module.callable``).
|
|
338
|
+
|
|
339
|
+
The callable receives ``List[str]`` and returns ``List[List[float]]``.
|
|
340
|
+
Configured via ``LATTICEAI_EMBEDDING_CUSTOM_TARGET``.
|
|
341
|
+
"""
|
|
342
|
+
|
|
343
|
+
provider = "custom"
|
|
344
|
+
|
|
345
|
+
def __init__(self, cfg: _RemoteConfig):
|
|
346
|
+
super().__init__(cfg)
|
|
347
|
+
self._target_ref = str(cfg.extra.get("target") or os.getenv("LATTICEAI_EMBEDDING_CUSTOM_TARGET", ""))
|
|
348
|
+
self.model_id = f"custom:{cfg.model or self._target_ref or 'callable'}:{self.dim}"
|
|
349
|
+
self._fn = None
|
|
350
|
+
|
|
351
|
+
def _load(self):
|
|
352
|
+
if self._fn is not None:
|
|
353
|
+
return self._fn
|
|
354
|
+
ref = self._target_ref
|
|
355
|
+
if not ref:
|
|
356
|
+
raise EmbeddingUnavailable("custom embedding target not configured (LATTICEAI_EMBEDDING_CUSTOM_TARGET)")
|
|
357
|
+
module_name, _, attr = ref.replace(":", ".").rpartition(".")
|
|
358
|
+
if not module_name:
|
|
359
|
+
raise EmbeddingUnavailable(f"invalid custom embedding target: {ref}")
|
|
360
|
+
try:
|
|
361
|
+
module = importlib.import_module(module_name)
|
|
362
|
+
self._fn = getattr(module, attr)
|
|
363
|
+
return self._fn
|
|
364
|
+
except Exception as exc:
|
|
365
|
+
raise EmbeddingUnavailable(f"custom embedding target unavailable: {exc}") from exc
|
|
366
|
+
|
|
367
|
+
def _embed_raw(self, texts: Sequence[str]) -> List[List[float]]:
|
|
368
|
+
fn = self._load()
|
|
369
|
+
try:
|
|
370
|
+
return list(fn(list(texts)))
|
|
371
|
+
except Exception as exc:
|
|
372
|
+
raise EmbeddingUnavailable(f"custom embedding failed: {exc}") from exc
|
|
373
|
+
|
|
374
|
+
def health(self) -> Dict[str, Any]:
|
|
375
|
+
try:
|
|
376
|
+
self._load()
|
|
377
|
+
return {"status": "ok", "detail": f"custom target {self._target_ref} loaded"}
|
|
378
|
+
except Exception as exc:
|
|
379
|
+
return {"status": "unavailable", "detail": str(exc)}
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
# ── factory + resolution ──────────────────────────────────────────────────────
|
|
383
|
+
PROVIDER_TYPES = ("hash", "mlx", "ollama", "openai", "custom")
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def build_embedding_provider(
|
|
387
|
+
provider: str,
|
|
388
|
+
*,
|
|
389
|
+
model: str = "",
|
|
390
|
+
base_url: str = "",
|
|
391
|
+
api_key: str = "",
|
|
392
|
+
dim: int = 0,
|
|
393
|
+
timeout: float = 30.0,
|
|
394
|
+
extra: Optional[Dict[str, Any]] = None,
|
|
395
|
+
) -> EmbeddingProvider:
|
|
396
|
+
"""Construct a provider by name. Never makes a network call."""
|
|
397
|
+
kind = str(provider or "hash").strip().lower()
|
|
398
|
+
if kind in {"", "hash", "local", "fallback"}:
|
|
399
|
+
return HashEmbeddingProvider(dim=int(dim or DEFAULT_EMBEDDING_DIM))
|
|
400
|
+
cfg = _RemoteConfig(
|
|
401
|
+
model=model,
|
|
402
|
+
base_url=base_url,
|
|
403
|
+
api_key=api_key,
|
|
404
|
+
dim=int(dim or 0),
|
|
405
|
+
timeout=float(timeout or 30.0),
|
|
406
|
+
extra=dict(extra or {}),
|
|
407
|
+
)
|
|
408
|
+
if kind == "mlx":
|
|
409
|
+
return MLXEmbeddingProvider(cfg)
|
|
410
|
+
if kind == "ollama":
|
|
411
|
+
return OllamaEmbeddingProvider(cfg)
|
|
412
|
+
if kind in {"openai", "openai-compatible", "openai_compatible"}:
|
|
413
|
+
return OpenAICompatibleEmbeddingProvider(cfg)
|
|
414
|
+
if kind == "custom":
|
|
415
|
+
return CustomEmbeddingProvider(cfg)
|
|
416
|
+
raise ValueError(f"unknown embedding provider: {provider!r} (expected one of {PROVIDER_TYPES})")
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
@dataclass
|
|
420
|
+
class ResolvedEmbedder:
|
|
421
|
+
provider: EmbeddingProvider
|
|
422
|
+
requested: str
|
|
423
|
+
active: str
|
|
424
|
+
fell_back: bool
|
|
425
|
+
health: Dict[str, Any]
|
|
426
|
+
detail: str = ""
|
|
427
|
+
|
|
428
|
+
def as_dict(self) -> Dict[str, Any]:
|
|
429
|
+
return {
|
|
430
|
+
"requested_provider": self.requested,
|
|
431
|
+
"active_provider": self.active,
|
|
432
|
+
"fell_back": self.fell_back,
|
|
433
|
+
"health": self.health,
|
|
434
|
+
"detail": self.detail,
|
|
435
|
+
**self.provider.metadata(),
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def resolve_embedder(
|
|
440
|
+
provider: str = "",
|
|
441
|
+
*,
|
|
442
|
+
model: str = "",
|
|
443
|
+
base_url: str = "",
|
|
444
|
+
api_key: str = "",
|
|
445
|
+
dim: int = 0,
|
|
446
|
+
timeout: float = 30.0,
|
|
447
|
+
extra: Optional[Dict[str, Any]] = None,
|
|
448
|
+
probe: bool = True,
|
|
449
|
+
) -> ResolvedEmbedder:
|
|
450
|
+
"""Build the requested provider, degrading to hash if it is unavailable.
|
|
451
|
+
|
|
452
|
+
Local-first guarantee: the app always gets a working embedder. When the
|
|
453
|
+
requested provider is unreachable we return the hash fallback but record
|
|
454
|
+
``fell_back=True`` and the failing health detail so the UI shows it as
|
|
455
|
+
*Unavailable* — the system never pretends a down provider is live.
|
|
456
|
+
"""
|
|
457
|
+
requested = str(provider or "hash").strip().lower() or "hash"
|
|
458
|
+
if requested in {"hash", "local", "fallback", ""}:
|
|
459
|
+
prov = HashEmbeddingProvider(dim=int(dim or DEFAULT_EMBEDDING_DIM))
|
|
460
|
+
return ResolvedEmbedder(prov, "hash", "hash", False, prov.health(), "deterministic local fallback")
|
|
461
|
+
|
|
462
|
+
try:
|
|
463
|
+
prov = build_embedding_provider(
|
|
464
|
+
requested, model=model, base_url=base_url, api_key=api_key, dim=dim, timeout=timeout, extra=extra
|
|
465
|
+
)
|
|
466
|
+
except Exception as exc:
|
|
467
|
+
fallback = HashEmbeddingProvider(dim=int(dim or DEFAULT_EMBEDDING_DIM))
|
|
468
|
+
return ResolvedEmbedder(
|
|
469
|
+
fallback, requested, "hash", True,
|
|
470
|
+
{"status": "unavailable", "detail": str(exc)},
|
|
471
|
+
f"could not construct {requested}; using hash fallback",
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
if probe:
|
|
475
|
+
try:
|
|
476
|
+
health = prov.health()
|
|
477
|
+
except Exception as exc: # provider health must never crash startup
|
|
478
|
+
health = {"status": "unavailable", "detail": str(exc)}
|
|
479
|
+
else:
|
|
480
|
+
health = {"status": "unknown", "detail": "not probed"}
|
|
481
|
+
if probe and health.get("status") != "ok":
|
|
482
|
+
fallback = HashEmbeddingProvider(dim=int(dim or DEFAULT_EMBEDDING_DIM))
|
|
483
|
+
return ResolvedEmbedder(
|
|
484
|
+
fallback, requested, "hash", True, health,
|
|
485
|
+
f"{requested} unavailable ({health.get('detail', '')}); using hash fallback",
|
|
486
|
+
)
|
|
487
|
+
return ResolvedEmbedder(prov, requested, prov.provider, False, health, "")
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
__all__ = [
|
|
491
|
+
"EmbeddingProvider",
|
|
492
|
+
"EmbeddingUnavailable",
|
|
493
|
+
"HashEmbeddingProvider",
|
|
494
|
+
"MLXEmbeddingProvider",
|
|
495
|
+
"OllamaEmbeddingProvider",
|
|
496
|
+
"OpenAICompatibleEmbeddingProvider",
|
|
497
|
+
"CustomEmbeddingProvider",
|
|
498
|
+
"ResolvedEmbedder",
|
|
499
|
+
"build_embedding_provider",
|
|
500
|
+
"resolve_embedder",
|
|
501
|
+
"PROVIDER_TYPES",
|
|
502
|
+
]
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Local deterministic embeddings for Lattice AI search.
|
|
2
|
+
|
|
3
|
+
The v3 backend needs a local-first vector signal without introducing a cloud
|
|
4
|
+
runtime requirement. This module provides a small feature-hashing embedder that
|
|
5
|
+
is deterministic, cheap to run, and good enough for indexing/search tests. A
|
|
6
|
+
future runtime can swap the implementation behind the same interface when a
|
|
7
|
+
local model server is available.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import hashlib
|
|
13
|
+
import math
|
|
14
|
+
import os
|
|
15
|
+
import re
|
|
16
|
+
import struct
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from typing import Iterable, List
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
DEFAULT_EMBEDDING_DIM = int(os.getenv("LATTICEAI_VECTOR_DIM", "384"))
|
|
22
|
+
EMBEDDING_MODEL_ID = f"lattice-local-hash-v1:{DEFAULT_EMBEDDING_DIM}"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _tokenize(text: str) -> List[str]:
|
|
26
|
+
raw = str(text or "").lower()
|
|
27
|
+
tokens = re.findall(r"[a-z0-9][a-z0-9_.:/+-]{1,}|[가-힣]{2,}", raw)
|
|
28
|
+
features: List[str] = []
|
|
29
|
+
for token in tokens:
|
|
30
|
+
features.append(f"tok:{token}")
|
|
31
|
+
if len(token) >= 5 and re.search(r"[a-z]", token):
|
|
32
|
+
for i in range(0, len(token) - 2):
|
|
33
|
+
features.append(f"tri:{token[i:i+3]}")
|
|
34
|
+
if re.search(r"[가-힣]", token) and len(token) >= 3:
|
|
35
|
+
for i in range(0, len(token) - 1):
|
|
36
|
+
features.append(f"ko:{token[i:i+2]}")
|
|
37
|
+
return features
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _hash_to_index(feature: str, dim: int) -> tuple[int, float]:
|
|
41
|
+
digest = hashlib.blake2b(feature.encode("utf-8"), digest_size=8).digest()
|
|
42
|
+
value = int.from_bytes(digest, "big", signed=False)
|
|
43
|
+
sign = 1.0 if (value & 1) == 0 else -1.0
|
|
44
|
+
return value % dim, sign
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass(frozen=True)
|
|
48
|
+
class LocalEmbeddingModel:
|
|
49
|
+
"""Deterministic feature-hashing embedder.
|
|
50
|
+
|
|
51
|
+
The output vectors are L2-normalized, so cosine similarity is just a dot
|
|
52
|
+
product. No network access, model download, or global mutable state is
|
|
53
|
+
required.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
dim: int = DEFAULT_EMBEDDING_DIM
|
|
57
|
+
model_id: str = EMBEDDING_MODEL_ID
|
|
58
|
+
|
|
59
|
+
def embed(self, text: str) -> List[float]:
|
|
60
|
+
vector = [0.0] * self.dim
|
|
61
|
+
features = _tokenize(text)
|
|
62
|
+
if not features:
|
|
63
|
+
return vector
|
|
64
|
+
for feature in features:
|
|
65
|
+
index, sign = _hash_to_index(feature, self.dim)
|
|
66
|
+
vector[index] += sign
|
|
67
|
+
norm = math.sqrt(sum(value * value for value in vector))
|
|
68
|
+
if norm <= 0:
|
|
69
|
+
return vector
|
|
70
|
+
return [value / norm for value in vector]
|
|
71
|
+
|
|
72
|
+
def similarity(self, left: Iterable[float], right: Iterable[float]) -> float:
|
|
73
|
+
return float(sum(a * b for a, b in zip(left, right)))
|
|
74
|
+
|
|
75
|
+
def encode(self, vector: Iterable[float]) -> bytes:
|
|
76
|
+
values = list(vector)
|
|
77
|
+
return struct.pack(f"<{len(values)}f", *values)
|
|
78
|
+
|
|
79
|
+
def decode(self, payload: bytes, dim: int | None = None) -> List[float]:
|
|
80
|
+
if not payload:
|
|
81
|
+
return []
|
|
82
|
+
count = int(dim or self.dim)
|
|
83
|
+
expected = count * 4
|
|
84
|
+
if len(payload) != expected:
|
|
85
|
+
count = len(payload) // 4
|
|
86
|
+
return list(struct.unpack(f"<{count}f", payload[: count * 4]))
|
|
@@ -18,7 +18,7 @@ from pathlib import Path
|
|
|
18
18
|
from typing import Any, Callable, Dict, Iterable, List, Optional
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
WORKSPACE_OS_VERSION = "
|
|
21
|
+
WORKSPACE_OS_VERSION = "3.0.1"
|
|
22
22
|
|
|
23
23
|
# Workspace types separate single-user Personal workspaces from shared
|
|
24
24
|
# Organization workspaces. Both keep the same local-first JSON store; the type
|
package/latticeai/server_app.py
CHANGED
|
@@ -72,6 +72,9 @@ from latticeai.core.enterprise import (
|
|
|
72
72
|
from latticeai.services.workspace_service import WorkspaceService
|
|
73
73
|
from latticeai.services.model_service import ModelService
|
|
74
74
|
from latticeai.services.chat_service import ChatService
|
|
75
|
+
from latticeai.services.search_service import SearchService
|
|
76
|
+
from latticeai.core.embedding_providers import resolve_embedder
|
|
77
|
+
from latticeai.services.agent_runtime import AgentRuntime
|
|
75
78
|
from latticeai.services.model_runtime import (
|
|
76
79
|
CLOUD_VERIFY_TTL_SECONDS,
|
|
77
80
|
ENGINE_MODEL_CATALOG,
|
|
@@ -105,6 +108,7 @@ from latticeai.api.realtime import create_realtime_router
|
|
|
105
108
|
from latticeai.api.marketplace import create_marketplace_router
|
|
106
109
|
from latticeai.api.models import create_models_router
|
|
107
110
|
from latticeai.api.chat import create_chat_router
|
|
111
|
+
from latticeai.api.search import create_search_router
|
|
108
112
|
from latticeai.api.tools import create_tools_router
|
|
109
113
|
from latticeai.api.static_routes import create_static_routes_router
|
|
110
114
|
from latticeai.api.garden import create_garden_router
|
|
@@ -244,7 +248,26 @@ VPC_FILE = DATA_DIR / "vpc_config.json"
|
|
|
244
248
|
MCP_FILE = DATA_DIR / "mcp_installs.json"
|
|
245
249
|
AUDIT_FILE = DATA_DIR / "audit_log.json"
|
|
246
250
|
SSO_FILE = DATA_DIR / "sso_config.json"
|
|
247
|
-
|
|
251
|
+
# Resolve the configured embedding provider once at startup. Degrades to the
|
|
252
|
+
# offline hash fallback when the requested provider is unavailable, while
|
|
253
|
+
# recording the requested-vs-active provider for the Embeddings status surface.
|
|
254
|
+
EMBEDDER = resolve_embedder(
|
|
255
|
+
CONFIG.embedding_provider,
|
|
256
|
+
model=CONFIG.embedding_model,
|
|
257
|
+
base_url=CONFIG.embedding_base_url,
|
|
258
|
+
api_key=CONFIG.embedding_api_key,
|
|
259
|
+
dim=CONFIG.embedding_dim,
|
|
260
|
+
timeout=CONFIG.embedding_timeout,
|
|
261
|
+
extra={"target": CONFIG.embedding_custom_target},
|
|
262
|
+
probe=CONFIG.embedding_provider not in {"", "hash", "local", "fallback"},
|
|
263
|
+
)
|
|
264
|
+
if EMBEDDER.fell_back:
|
|
265
|
+
logging.warning("Embedding provider %s unavailable: %s", EMBEDDER.requested, EMBEDDER.detail)
|
|
266
|
+
KNOWLEDGE_GRAPH = KnowledgeGraphStore(
|
|
267
|
+
DATA_DIR / "knowledge_graph.sqlite",
|
|
268
|
+
DATA_DIR / "knowledge_graph_blobs",
|
|
269
|
+
embedder=EMBEDDER.provider,
|
|
270
|
+
) if ENABLE_GRAPH else None
|
|
248
271
|
LOCAL_KG_WATCHER = LocalKnowledgeWatcher(lambda: KNOWLEDGE_GRAPH) if ENABLE_GRAPH else None
|
|
249
272
|
# ── v2 Realtime bus: constructed first so the store can fan every timeline
|
|
250
273
|
# event into the realtime feed via a single additive sink (no per-call wiring).
|
|
@@ -1171,6 +1194,9 @@ def _workspace_graph():
|
|
|
1171
1194
|
return KNOWLEDGE_GRAPH if (ENABLE_GRAPH and KNOWLEDGE_GRAPH) else None
|
|
1172
1195
|
|
|
1173
1196
|
|
|
1197
|
+
SEARCH_SERVICE = SearchService(graph_store=_workspace_graph())
|
|
1198
|
+
|
|
1199
|
+
|
|
1174
1200
|
# ── Workspace OS + Organization router (latticeai.api.workspace, v1.2.0) ──────
|
|
1175
1201
|
app.include_router(create_workspace_router(
|
|
1176
1202
|
service=WORKSPACE_SERVICE,
|
|
@@ -1217,6 +1243,14 @@ PLATFORM = PlatformRuntime(
|
|
|
1217
1243
|
get_tool_permission=get_tool_permission,
|
|
1218
1244
|
)
|
|
1219
1245
|
|
|
1246
|
+
# Single AgentRuntime boundary over the orchestrator + run store.
|
|
1247
|
+
AGENT_RUNTIME = AgentRuntime(
|
|
1248
|
+
store=WORKSPACE_OS,
|
|
1249
|
+
orchestrator_factory=PLATFORM.build_orchestrator,
|
|
1250
|
+
workspace_graph=_workspace_graph,
|
|
1251
|
+
append_audit_event=append_audit_event,
|
|
1252
|
+
)
|
|
1253
|
+
|
|
1220
1254
|
app.include_router(create_plugins_router(
|
|
1221
1255
|
registry=PLUGIN_REGISTRY,
|
|
1222
1256
|
require_user=require_user,
|
|
@@ -1252,6 +1286,7 @@ app.include_router(create_agents_router(
|
|
|
1252
1286
|
append_audit_event=append_audit_event,
|
|
1253
1287
|
ui_file_response=ui_file_response,
|
|
1254
1288
|
static_dir=STATIC_DIR,
|
|
1289
|
+
agent_runtime=AGENT_RUNTIME,
|
|
1255
1290
|
))
|
|
1256
1291
|
|
|
1257
1292
|
app.include_router(create_marketplace_router(
|
|
@@ -1351,6 +1386,19 @@ app.include_router(create_chat_router(
|
|
|
1351
1386
|
base_dir=BASE_DIR,
|
|
1352
1387
|
))
|
|
1353
1388
|
|
|
1389
|
+
def _embedding_info() -> dict:
|
|
1390
|
+
from latticeai.core.embedding_providers import PROVIDER_TYPES
|
|
1391
|
+
info = EMBEDDER.as_dict()
|
|
1392
|
+
info["available_providers"] = list(PROVIDER_TYPES)
|
|
1393
|
+
return info
|
|
1394
|
+
|
|
1395
|
+
|
|
1396
|
+
app.include_router(create_search_router(
|
|
1397
|
+
service=SEARCH_SERVICE,
|
|
1398
|
+
require_user=require_user,
|
|
1399
|
+
embedding_info=_embedding_info,
|
|
1400
|
+
))
|
|
1401
|
+
|
|
1354
1402
|
app.include_router(create_tools_router(
|
|
1355
1403
|
config=CONFIG,
|
|
1356
1404
|
data_dir=DATA_DIR,
|