@pentatonic-ai/ai-agent-sdk 0.9.3 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/package.json +1 -1
- package/packages/memory/package-lock.json +3 -3
- package/packages/memory-engine/compat/server.py +45 -67
- package/packages/memory-engine/docker-compose.test.yml +0 -7
- package/packages/memory-engine/docker-compose.yml +10 -36
- package/packages/memory-engine/engine/services/l2/Dockerfile +7 -0
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +233 -60
- package/packages/memory-engine/tests/test_l2_qmd_vec_search.py +280 -0
- package/packages/memory-engine/engine/services/l4/Dockerfile +0 -19
- package/packages/memory-engine/engine/services/l4/server.py +0 -315
|
@@ -1,315 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
L4 sqlite-vec sidecar.
|
|
3
|
-
|
|
4
|
-
Vector index sidecar for the Pentatonic Memory Engine stack.
|
|
5
|
-
Exposes /health, /search, /index-batch, /refresh over HTTP.
|
|
6
|
-
|
|
7
|
-
Endpoints:
|
|
8
|
-
GET /health
|
|
9
|
-
POST /search body: {"query":"...", "limit":10}
|
|
10
|
-
POST /index-batch body: {"records":[{"id","text"}, ...]}
|
|
11
|
-
POST /refresh no-op (sqlite-vec writes are immediate)
|
|
12
|
-
|
|
13
|
-
Env:
|
|
14
|
-
L4_DB_PATH default /data/vec.db
|
|
15
|
-
L4_NV_EMBED_URL default http://nv-embed:8041/v1/embeddings
|
|
16
|
-
PORT default 8042
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
from __future__ import annotations
|
|
20
|
-
|
|
21
|
-
import argparse
|
|
22
|
-
import hashlib
|
|
23
|
-
import os
|
|
24
|
-
import sqlite3
|
|
25
|
-
import struct
|
|
26
|
-
import sys
|
|
27
|
-
import time
|
|
28
|
-
from pathlib import Path
|
|
29
|
-
from typing import Any
|
|
30
|
-
|
|
31
|
-
from fastapi import FastAPI, HTTPException
|
|
32
|
-
from pydantic import BaseModel
|
|
33
|
-
|
|
34
|
-
# Shared embedding client lives at engine/services/_shared/. Add the parent of
|
|
35
|
-
# the service dir to sys.path so `from _shared.embed_provider import ...` works
|
|
36
|
-
# regardless of how the service is launched (uvicorn, python server.py, etc.).
|
|
37
|
-
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
38
|
-
from _shared.embed_provider import EmbedClient # noqa: E402
|
|
39
|
-
|
|
40
|
-
# ----------------------------------------------------------------------
|
|
41
|
-
# Config
|
|
42
|
-
# ----------------------------------------------------------------------
|
|
43
|
-
|
|
44
|
-
DB_PATH = os.environ.get("L4_DB_PATH", "/data/vec.db")
|
|
45
|
-
EMBED_DIM = int(os.environ.get("L4_EMBED_DIM", "4096"))
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
# ----------------------------------------------------------------------
|
|
50
|
-
# DB helpers
|
|
51
|
-
# ----------------------------------------------------------------------
|
|
52
|
-
|
|
53
|
-
def _vec_to_blob(vec: list[float]) -> bytes:
|
|
54
|
-
"""Pack a list of floats as little-endian f32 bytes for sqlite-vec."""
|
|
55
|
-
return struct.pack(f"<{len(vec)}f", *vec)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def _blob_to_vec(blob: bytes) -> list[float]:
|
|
59
|
-
n = len(blob) // 4
|
|
60
|
-
return list(struct.unpack(f"<{n}f", blob))
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def _cosine(a: list[float], b: list[float]) -> float:
|
|
64
|
-
import math
|
|
65
|
-
dot = sum(x * y for x, y in zip(a, b))
|
|
66
|
-
na = math.sqrt(sum(x * x for x in a))
|
|
67
|
-
nb = math.sqrt(sum(y * y for y in b))
|
|
68
|
-
if na == 0 or nb == 0:
|
|
69
|
-
return 0.0
|
|
70
|
-
return dot / (na * nb)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def _get_db() -> sqlite3.Connection:
|
|
74
|
-
"""Open DB and ensure schema. We use plain BLOB columns rather than
|
|
75
|
-
the sqlite-vec virtual table because sqlite-vec is an optional ext
|
|
76
|
-
that may not be loadable in every container — plain BLOB lets us
|
|
77
|
-
fall back to a Python-side cosine pass without losing correctness.
|
|
78
|
-
"""
|
|
79
|
-
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
|
|
80
|
-
conn = sqlite3.connect(DB_PATH, timeout=10)
|
|
81
|
-
conn.execute("PRAGMA journal_mode=WAL")
|
|
82
|
-
conn.execute("""
|
|
83
|
-
CREATE TABLE IF NOT EXISTS chunks (
|
|
84
|
-
id TEXT PRIMARY KEY,
|
|
85
|
-
text TEXT,
|
|
86
|
-
embedding BLOB,
|
|
87
|
-
indexed_at REAL
|
|
88
|
-
)
|
|
89
|
-
""")
|
|
90
|
-
return conn
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
# ----------------------------------------------------------------------
|
|
94
|
-
# Embedding client
|
|
95
|
-
# ----------------------------------------------------------------------
|
|
96
|
-
|
|
97
|
-
_embed: EmbedClient | None = None
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def _embed_client() -> EmbedClient:
|
|
101
|
-
"""Lazily build the embed client so env vars are read at first use."""
|
|
102
|
-
global _embed
|
|
103
|
-
if _embed is None:
|
|
104
|
-
_embed = EmbedClient.from_env(
|
|
105
|
-
prefix="L4_",
|
|
106
|
-
default_url="http://nv-embed:8041/v1/embeddings",
|
|
107
|
-
)
|
|
108
|
-
return _embed
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
async def _embed_batch(texts: list[str]) -> list[list[float]]:
|
|
112
|
-
"""Embed a batch of texts via the shared EmbedClient."""
|
|
113
|
-
return await _embed_client().embed_batch_async(texts)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
# ----------------------------------------------------------------------
|
|
117
|
-
# FastAPI
|
|
118
|
-
# ----------------------------------------------------------------------
|
|
119
|
-
|
|
120
|
-
class SearchRequest(BaseModel):
|
|
121
|
-
query: str
|
|
122
|
-
limit: int = 10
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
class IndexBatchRequest(BaseModel):
|
|
126
|
-
records: list[dict[str, Any]]
|
|
127
|
-
# When supplied (parallel to `records`), skip the embed call and use
|
|
128
|
-
# these vectors directly. Compat shim populates this when shared-embed
|
|
129
|
-
# mode is on so we don't duplicate the embed work across layers.
|
|
130
|
-
embeddings: list[list[float]] | None = None
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
app = FastAPI(title="L4 sqlite-vec sidecar (OSS)")
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
@app.get("/health")
|
|
137
|
-
def health():
|
|
138
|
-
try:
|
|
139
|
-
conn = _get_db()
|
|
140
|
-
n = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()[0]
|
|
141
|
-
conn.close()
|
|
142
|
-
return {"status": "ok", "loaded": True, "n_vectors": n,
|
|
143
|
-
"dim": EMBED_DIM, "db_path": DB_PATH,
|
|
144
|
-
# BLOB+Python-cosine is the intentional implementation path,
|
|
145
|
-
# not a degraded fallback (see _get_db docstring). The previous
|
|
146
|
-
# "sqlite-vec-fallback" label gave operators the wrong signal.
|
|
147
|
-
"backend": "sqlite-vec"}
|
|
148
|
-
except Exception as exc:
|
|
149
|
-
return {"status": "degraded", "error": str(exc)}
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
@app.post("/search")
|
|
153
|
-
async def search(req: SearchRequest):
|
|
154
|
-
if not req.query:
|
|
155
|
-
return []
|
|
156
|
-
try:
|
|
157
|
-
embs = await _embed_batch([req.query])
|
|
158
|
-
if not embs or embs[0] is None:
|
|
159
|
-
raise HTTPException(status_code=502, detail="embed failed")
|
|
160
|
-
q_vec = embs[0]
|
|
161
|
-
except Exception as exc:
|
|
162
|
-
raise HTTPException(status_code=502, detail=f"embed: {exc}")
|
|
163
|
-
|
|
164
|
-
conn = _get_db()
|
|
165
|
-
rows = conn.execute("SELECT id, text, embedding FROM chunks").fetchall()
|
|
166
|
-
conn.close()
|
|
167
|
-
|
|
168
|
-
# Cosine similarity in Python — fine for OSS / small corpora. For
|
|
169
|
-
# large corpora: consider a dedicated vector DB.
|
|
170
|
-
scored: list[tuple[float, str, str]] = []
|
|
171
|
-
for rid, text, blob in rows:
|
|
172
|
-
if not blob:
|
|
173
|
-
continue
|
|
174
|
-
v = _blob_to_vec(blob)
|
|
175
|
-
if len(v) != len(q_vec):
|
|
176
|
-
continue
|
|
177
|
-
s = _cosine(q_vec, v)
|
|
178
|
-
scored.append((s, rid, text))
|
|
179
|
-
scored.sort(reverse=True)
|
|
180
|
-
out = [
|
|
181
|
-
{"path": rid, "text": text, "score": float(s),
|
|
182
|
-
"source": "L4-sqlite-vec", "layer": "L4"}
|
|
183
|
-
for s, rid, text in scored[: req.limit]
|
|
184
|
-
]
|
|
185
|
-
return out
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
@app.post("/index-batch")
|
|
189
|
-
async def index_batch(req: IndexBatchRequest):
|
|
190
|
-
if not req.records:
|
|
191
|
-
return {"status": "ok", "inserted": 0}
|
|
192
|
-
texts = [(r.get("text") or r.get("content") or "")[:8192] for r in req.records]
|
|
193
|
-
t0 = time.perf_counter()
|
|
194
|
-
# Shared-embed shortcut: caller (compat shim) computed vectors once
|
|
195
|
-
# and forwards them so we skip the embed RPC. Length must match
|
|
196
|
-
# records — defensive bail if it doesn't.
|
|
197
|
-
if req.embeddings is not None and len(req.embeddings) == len(req.records):
|
|
198
|
-
embs = req.embeddings
|
|
199
|
-
else:
|
|
200
|
-
embs = await _embed_batch(texts)
|
|
201
|
-
embed_ms = (time.perf_counter() - t0) * 1000.0
|
|
202
|
-
|
|
203
|
-
conn = _get_db()
|
|
204
|
-
t1 = time.perf_counter()
|
|
205
|
-
rows = []
|
|
206
|
-
for r, emb, txt in zip(req.records, embs, texts):
|
|
207
|
-
if not emb:
|
|
208
|
-
continue
|
|
209
|
-
rid = r.get("id") or hashlib.sha1(txt.encode("utf-8")).hexdigest()[:32]
|
|
210
|
-
rows.append((rid, txt, _vec_to_blob(emb), time.time()))
|
|
211
|
-
if rows:
|
|
212
|
-
conn.executemany(
|
|
213
|
-
"INSERT OR REPLACE INTO chunks(id, text, embedding, indexed_at) "
|
|
214
|
-
"VALUES (?, ?, ?, ?)", rows,
|
|
215
|
-
)
|
|
216
|
-
conn.commit()
|
|
217
|
-
insert_ms = (time.perf_counter() - t1) * 1000.0
|
|
218
|
-
conn.close()
|
|
219
|
-
return {"status": "ok", "inserted": len(rows),
|
|
220
|
-
"embed_ms": round(embed_ms, 1), "insert_ms": round(insert_ms, 1)}
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
@app.post("/refresh")
|
|
224
|
-
def refresh():
|
|
225
|
-
"""No-op for sqlite-vec — writes are immediate. Kept for API parity."""
|
|
226
|
-
return {"status": "ok", "noop": True}
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
# ----------------------------------------------------------------------
|
|
230
|
-
# /health/deep — synthetic round-trip
|
|
231
|
-
# ----------------------------------------------------------------------
|
|
232
|
-
|
|
233
|
-
# Fixed sentinel id used by /health/deep. Upserted on every probe call,
|
|
234
|
-
# so the row is idempotent. Kept under id="__healthcheck__sentinel" so
|
|
235
|
-
# the L4 corpus has at most one healthcheck row regardless of probe rate.
|
|
236
|
-
_HEALTH_SENTINEL_ID = "__healthcheck__sentinel"
|
|
237
|
-
_HEALTH_SENTINEL_TEXT = (
|
|
238
|
-
"healthcheck sentinel — embed-write-search round-trip verifier"
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
@app.get("/health/deep")
|
|
243
|
-
async def health_deep():
|
|
244
|
-
"""Real functional probe: embed → write → search the sentinel.
|
|
245
|
-
|
|
246
|
-
Catches the class of failure that plain /health misses — broken
|
|
247
|
-
embed paths, write 500s, query path bugs — i.e. exactly the bug
|
|
248
|
-
shape that silently degraded L6 from v0.8.0 → v0.8.2.
|
|
249
|
-
|
|
250
|
-
Returns:
|
|
251
|
-
{status, embed_ms, write_ms, search_ms, hit, ok}
|
|
252
|
-
|
|
253
|
-
`hit` confirms the sentinel was returned from search; `ok` is the
|
|
254
|
-
aggregate `embed_ok AND write_ok AND hit`. HTTP status stays 200
|
|
255
|
-
regardless so callers can read the body for diagnostics; status:
|
|
256
|
-
field carries the verdict.
|
|
257
|
-
"""
|
|
258
|
-
t_total = time.perf_counter()
|
|
259
|
-
out: dict[str, Any] = {"status": "ok", "ok": True}
|
|
260
|
-
try:
|
|
261
|
-
t0 = time.perf_counter()
|
|
262
|
-
embs = await _embed_batch([_HEALTH_SENTINEL_TEXT])
|
|
263
|
-
out["embed_ms"] = round((time.perf_counter() - t0) * 1000.0, 1)
|
|
264
|
-
if not embs or not embs[0]:
|
|
265
|
-
out["status"] = "embed_failed"
|
|
266
|
-
out["ok"] = False
|
|
267
|
-
return out
|
|
268
|
-
vec = embs[0]
|
|
269
|
-
except Exception as exc:
|
|
270
|
-
out["status"] = f"embed_error: {type(exc).__name__}"
|
|
271
|
-
out["ok"] = False
|
|
272
|
-
return out
|
|
273
|
-
|
|
274
|
-
try:
|
|
275
|
-
conn = _get_db()
|
|
276
|
-
t1 = time.perf_counter()
|
|
277
|
-
conn.execute(
|
|
278
|
-
"INSERT OR REPLACE INTO chunks(id, text, embedding, indexed_at) "
|
|
279
|
-
"VALUES (?, ?, ?, ?)",
|
|
280
|
-
(_HEALTH_SENTINEL_ID, _HEALTH_SENTINEL_TEXT, _vec_to_blob(vec), time.time()),
|
|
281
|
-
)
|
|
282
|
-
conn.commit()
|
|
283
|
-
out["write_ms"] = round((time.perf_counter() - t1) * 1000.0, 1)
|
|
284
|
-
|
|
285
|
-
t2 = time.perf_counter()
|
|
286
|
-
rows = conn.execute("SELECT id, embedding FROM chunks WHERE id = ?",
|
|
287
|
-
(_HEALTH_SENTINEL_ID,)).fetchone()
|
|
288
|
-
out["search_ms"] = round((time.perf_counter() - t2) * 1000.0, 1)
|
|
289
|
-
conn.close()
|
|
290
|
-
except Exception as exc:
|
|
291
|
-
out["status"] = f"db_error: {type(exc).__name__}"
|
|
292
|
-
out["ok"] = False
|
|
293
|
-
return out
|
|
294
|
-
|
|
295
|
-
out["hit"] = rows is not None
|
|
296
|
-
if not out["hit"]:
|
|
297
|
-
out["status"] = "sentinel_missing"
|
|
298
|
-
out["ok"] = False
|
|
299
|
-
out["total_ms"] = round((time.perf_counter() - t_total) * 1000.0, 1)
|
|
300
|
-
return out
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
# ----------------------------------------------------------------------
|
|
304
|
-
# Entrypoint
|
|
305
|
-
# ----------------------------------------------------------------------
|
|
306
|
-
|
|
307
|
-
if __name__ == "__main__":
|
|
308
|
-
parser = argparse.ArgumentParser()
|
|
309
|
-
parser.add_argument("--port", type=int, default=int(os.environ.get("PORT", "8042")))
|
|
310
|
-
parser.add_argument("--data-dir", default=None)
|
|
311
|
-
args = parser.parse_args()
|
|
312
|
-
if args.data_dir:
|
|
313
|
-
os.environ["L4_DB_PATH"] = str(Path(args.data_dir) / "vec.db")
|
|
314
|
-
import uvicorn
|
|
315
|
-
uvicorn.run("server:app", host="0.0.0.0", port=args.port, log_level="info")
|