@pentatonic-ai/ai-agent-sdk 0.9.3 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,315 +0,0 @@
1
- """
2
- L4 sqlite-vec sidecar.
3
-
4
- Vector index sidecar for the Pentatonic Memory Engine stack.
5
- Exposes /health, /search, /index-batch, /refresh over HTTP.
6
-
7
- Endpoints:
8
- GET /health
9
- POST /search body: {"query":"...", "limit":10}
10
- POST /index-batch body: {"records":[{"id","text"}, ...]}
11
- POST /refresh no-op (sqlite-vec writes are immediate)
12
-
13
- Env:
14
- L4_DB_PATH default /data/vec.db
15
- L4_NV_EMBED_URL default http://nv-embed:8041/v1/embeddings
16
- PORT default 8042
17
- """
18
-
19
- from __future__ import annotations
20
-
21
- import argparse
22
- import hashlib
23
- import os
24
- import sqlite3
25
- import struct
26
- import sys
27
- import time
28
- from pathlib import Path
29
- from typing import Any
30
-
31
- from fastapi import FastAPI, HTTPException
32
- from pydantic import BaseModel
33
-
34
- # Shared embedding client lives at engine/services/_shared/. Add the parent of
35
- # the service dir to sys.path so `from _shared.embed_provider import ...` works
36
- # regardless of how the service is launched (uvicorn, python server.py, etc.).
37
- sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
38
- from _shared.embed_provider import EmbedClient # noqa: E402
39
-
40
- # ----------------------------------------------------------------------
41
- # Config
42
- # ----------------------------------------------------------------------
43
-
44
- DB_PATH = os.environ.get("L4_DB_PATH", "/data/vec.db")
45
- EMBED_DIM = int(os.environ.get("L4_EMBED_DIM", "4096"))
46
-
47
-
48
-
49
- # ----------------------------------------------------------------------
50
- # DB helpers
51
- # ----------------------------------------------------------------------
52
-
53
- def _vec_to_blob(vec: list[float]) -> bytes:
54
- """Pack a list of floats as little-endian f32 bytes for sqlite-vec."""
55
- return struct.pack(f"<{len(vec)}f", *vec)
56
-
57
-
58
- def _blob_to_vec(blob: bytes) -> list[float]:
59
- n = len(blob) // 4
60
- return list(struct.unpack(f"<{n}f", blob))
61
-
62
-
63
- def _cosine(a: list[float], b: list[float]) -> float:
64
- import math
65
- dot = sum(x * y for x, y in zip(a, b))
66
- na = math.sqrt(sum(x * x for x in a))
67
- nb = math.sqrt(sum(y * y for y in b))
68
- if na == 0 or nb == 0:
69
- return 0.0
70
- return dot / (na * nb)
71
-
72
-
73
- def _get_db() -> sqlite3.Connection:
74
- """Open DB and ensure schema. We use plain BLOB columns rather than
75
- the sqlite-vec virtual table because sqlite-vec is an optional ext
76
- that may not be loadable in every container — plain BLOB lets us
77
- fall back to a Python-side cosine pass without losing correctness.
78
- """
79
- Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
80
- conn = sqlite3.connect(DB_PATH, timeout=10)
81
- conn.execute("PRAGMA journal_mode=WAL")
82
- conn.execute("""
83
- CREATE TABLE IF NOT EXISTS chunks (
84
- id TEXT PRIMARY KEY,
85
- text TEXT,
86
- embedding BLOB,
87
- indexed_at REAL
88
- )
89
- """)
90
- return conn
91
-
92
-
93
- # ----------------------------------------------------------------------
94
- # Embedding client
95
- # ----------------------------------------------------------------------
96
-
97
- _embed: EmbedClient | None = None
98
-
99
-
100
- def _embed_client() -> EmbedClient:
101
- """Lazily build the embed client so env vars are read at first use."""
102
- global _embed
103
- if _embed is None:
104
- _embed = EmbedClient.from_env(
105
- prefix="L4_",
106
- default_url="http://nv-embed:8041/v1/embeddings",
107
- )
108
- return _embed
109
-
110
-
111
- async def _embed_batch(texts: list[str]) -> list[list[float]]:
112
- """Embed a batch of texts via the shared EmbedClient."""
113
- return await _embed_client().embed_batch_async(texts)
114
-
115
-
116
- # ----------------------------------------------------------------------
117
- # FastAPI
118
- # ----------------------------------------------------------------------
119
-
120
- class SearchRequest(BaseModel):
121
- query: str
122
- limit: int = 10
123
-
124
-
125
- class IndexBatchRequest(BaseModel):
126
- records: list[dict[str, Any]]
127
- # When supplied (parallel to `records`), skip the embed call and use
128
- # these vectors directly. Compat shim populates this when shared-embed
129
- # mode is on so we don't duplicate the embed work across layers.
130
- embeddings: list[list[float]] | None = None
131
-
132
-
133
- app = FastAPI(title="L4 sqlite-vec sidecar (OSS)")
134
-
135
-
136
- @app.get("/health")
137
- def health():
138
- try:
139
- conn = _get_db()
140
- n = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()[0]
141
- conn.close()
142
- return {"status": "ok", "loaded": True, "n_vectors": n,
143
- "dim": EMBED_DIM, "db_path": DB_PATH,
144
- # BLOB+Python-cosine is the intentional implementation path,
145
- # not a degraded fallback (see _get_db docstring). The previous
146
- # "sqlite-vec-fallback" label gave operators the wrong signal.
147
- "backend": "sqlite-vec"}
148
- except Exception as exc:
149
- return {"status": "degraded", "error": str(exc)}
150
-
151
-
152
- @app.post("/search")
153
- async def search(req: SearchRequest):
154
- if not req.query:
155
- return []
156
- try:
157
- embs = await _embed_batch([req.query])
158
- if not embs or embs[0] is None:
159
- raise HTTPException(status_code=502, detail="embed failed")
160
- q_vec = embs[0]
161
- except Exception as exc:
162
- raise HTTPException(status_code=502, detail=f"embed: {exc}")
163
-
164
- conn = _get_db()
165
- rows = conn.execute("SELECT id, text, embedding FROM chunks").fetchall()
166
- conn.close()
167
-
168
- # Cosine similarity in Python — fine for OSS / small corpora. For
169
- # large corpora: consider a dedicated vector DB.
170
- scored: list[tuple[float, str, str]] = []
171
- for rid, text, blob in rows:
172
- if not blob:
173
- continue
174
- v = _blob_to_vec(blob)
175
- if len(v) != len(q_vec):
176
- continue
177
- s = _cosine(q_vec, v)
178
- scored.append((s, rid, text))
179
- scored.sort(reverse=True)
180
- out = [
181
- {"path": rid, "text": text, "score": float(s),
182
- "source": "L4-sqlite-vec", "layer": "L4"}
183
- for s, rid, text in scored[: req.limit]
184
- ]
185
- return out
186
-
187
-
188
- @app.post("/index-batch")
189
- async def index_batch(req: IndexBatchRequest):
190
- if not req.records:
191
- return {"status": "ok", "inserted": 0}
192
- texts = [(r.get("text") or r.get("content") or "")[:8192] for r in req.records]
193
- t0 = time.perf_counter()
194
- # Shared-embed shortcut: caller (compat shim) computed vectors once
195
- # and forwards them so we skip the embed RPC. Length must match
196
- # records — defensive bail if it doesn't.
197
- if req.embeddings is not None and len(req.embeddings) == len(req.records):
198
- embs = req.embeddings
199
- else:
200
- embs = await _embed_batch(texts)
201
- embed_ms = (time.perf_counter() - t0) * 1000.0
202
-
203
- conn = _get_db()
204
- t1 = time.perf_counter()
205
- rows = []
206
- for r, emb, txt in zip(req.records, embs, texts):
207
- if not emb:
208
- continue
209
- rid = r.get("id") or hashlib.sha1(txt.encode("utf-8")).hexdigest()[:32]
210
- rows.append((rid, txt, _vec_to_blob(emb), time.time()))
211
- if rows:
212
- conn.executemany(
213
- "INSERT OR REPLACE INTO chunks(id, text, embedding, indexed_at) "
214
- "VALUES (?, ?, ?, ?)", rows,
215
- )
216
- conn.commit()
217
- insert_ms = (time.perf_counter() - t1) * 1000.0
218
- conn.close()
219
- return {"status": "ok", "inserted": len(rows),
220
- "embed_ms": round(embed_ms, 1), "insert_ms": round(insert_ms, 1)}
221
-
222
-
223
- @app.post("/refresh")
224
- def refresh():
225
- """No-op for sqlite-vec — writes are immediate. Kept for API parity."""
226
- return {"status": "ok", "noop": True}
227
-
228
-
229
- # ----------------------------------------------------------------------
230
- # /health/deep — synthetic round-trip
231
- # ----------------------------------------------------------------------
232
-
233
- # Fixed sentinel id used by /health/deep. Upserted on every probe call,
234
- # so the row is idempotent. Kept under id="__healthcheck__sentinel" so
235
- # the L4 corpus has at most one healthcheck row regardless of probe rate.
236
- _HEALTH_SENTINEL_ID = "__healthcheck__sentinel"
237
- _HEALTH_SENTINEL_TEXT = (
238
- "healthcheck sentinel — embed-write-search round-trip verifier"
239
- )
240
-
241
-
242
- @app.get("/health/deep")
243
- async def health_deep():
244
- """Real functional probe: embed → write → search the sentinel.
245
-
246
- Catches the class of failure that plain /health misses — broken
247
- embed paths, write 500s, query path bugs — i.e. exactly the bug
248
- shape that silently degraded L6 from v0.8.0 → v0.8.2.
249
-
250
- Returns:
251
- {status, embed_ms, write_ms, search_ms, hit, ok}
252
-
253
- `hit` confirms the sentinel was returned from search; `ok` is the
254
- aggregate `embed_ok AND write_ok AND hit`. HTTP status stays 200
255
- regardless so callers can read the body for diagnostics; status:
256
- field carries the verdict.
257
- """
258
- t_total = time.perf_counter()
259
- out: dict[str, Any] = {"status": "ok", "ok": True}
260
- try:
261
- t0 = time.perf_counter()
262
- embs = await _embed_batch([_HEALTH_SENTINEL_TEXT])
263
- out["embed_ms"] = round((time.perf_counter() - t0) * 1000.0, 1)
264
- if not embs or not embs[0]:
265
- out["status"] = "embed_failed"
266
- out["ok"] = False
267
- return out
268
- vec = embs[0]
269
- except Exception as exc:
270
- out["status"] = f"embed_error: {type(exc).__name__}"
271
- out["ok"] = False
272
- return out
273
-
274
- try:
275
- conn = _get_db()
276
- t1 = time.perf_counter()
277
- conn.execute(
278
- "INSERT OR REPLACE INTO chunks(id, text, embedding, indexed_at) "
279
- "VALUES (?, ?, ?, ?)",
280
- (_HEALTH_SENTINEL_ID, _HEALTH_SENTINEL_TEXT, _vec_to_blob(vec), time.time()),
281
- )
282
- conn.commit()
283
- out["write_ms"] = round((time.perf_counter() - t1) * 1000.0, 1)
284
-
285
- t2 = time.perf_counter()
286
- rows = conn.execute("SELECT id, embedding FROM chunks WHERE id = ?",
287
- (_HEALTH_SENTINEL_ID,)).fetchone()
288
- out["search_ms"] = round((time.perf_counter() - t2) * 1000.0, 1)
289
- conn.close()
290
- except Exception as exc:
291
- out["status"] = f"db_error: {type(exc).__name__}"
292
- out["ok"] = False
293
- return out
294
-
295
- out["hit"] = rows is not None
296
- if not out["hit"]:
297
- out["status"] = "sentinel_missing"
298
- out["ok"] = False
299
- out["total_ms"] = round((time.perf_counter() - t_total) * 1000.0, 1)
300
- return out
301
-
302
-
303
- # ----------------------------------------------------------------------
304
- # Entrypoint
305
- # ----------------------------------------------------------------------
306
-
307
- if __name__ == "__main__":
308
- parser = argparse.ArgumentParser()
309
- parser.add_argument("--port", type=int, default=int(os.environ.get("PORT", "8042")))
310
- parser.add_argument("--data-dir", default=None)
311
- args = parser.parse_args()
312
- if args.data_dir:
313
- os.environ["L4_DB_PATH"] = str(Path(args.data_dir) / "vec.db")
314
- import uvicorn
315
- uvicorn.run("server:app", host="0.0.0.0", port=args.port, log_level="info")