@pentatonic-ai/ai-agent-sdk 0.9.4 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +39 -72
- package/dist/index.js +36 -69
- package/package.json +9 -2
- package/packages/memory/package-lock.json +49 -33
- package/packages/memory/package.json +4 -1
- package/packages/memory/src/__tests__/engine.test.js +40 -5
- package/packages/memory/src/engine.js +38 -3
- package/packages/memory-engine/docker-compose.yml +24 -2
- package/packages/memory-engine/engine/services/_shared/embed_provider.py +125 -31
- package/packages/memory-engine/engine/services/l2/Dockerfile +7 -0
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +233 -60
- package/packages/memory-engine/tests/test_embed_provider.py +201 -0
- package/packages/memory-engine/tests/test_l2_qmd_vec_search.py +280 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
"""Tests for the sqlite-vec-backed QMD search path in l2-hybridrag-proxy.
|
|
2
|
+
|
|
3
|
+
Validates the migration from the legacy Python-cosine-over-JSON path
|
|
4
|
+
(which had a silent `ORDER BY id LIMIT 2000` correctness bug — only
|
|
5
|
+
the OLDEST 2000 chunks were ever considered) to native sqlite-vec
|
|
6
|
+
KNN MATCH over a vec0 virtual table.
|
|
7
|
+
|
|
8
|
+
Pure-Python tests — no Neo4j, no Milvus. The proxy module is loaded
|
|
9
|
+
via importlib so we can call helpers and handlers directly, and
|
|
10
|
+
QMD_DB_PATH is overridden to a tmp_path file per test.
|
|
11
|
+
|
|
12
|
+
Run:
|
|
13
|
+
|
|
14
|
+
cd packages/memory-engine
|
|
15
|
+
.venv/bin/python -m pytest tests/test_l2_qmd_vec_search.py -v
|
|
16
|
+
|
|
17
|
+
The tests skip cleanly when ``sqlite_vec`` is not importable — useful
|
|
18
|
+
for unit-only runs on machines that don't have the wheel installed.
|
|
19
|
+
"""
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import importlib.util
|
|
23
|
+
import json
|
|
24
|
+
import struct
|
|
25
|
+
import sys
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
import pytest
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
import sqlite_vec # noqa: F401
|
|
32
|
+
_SQLITE_VEC_OK = True
|
|
33
|
+
except ImportError:
|
|
34
|
+
_SQLITE_VEC_OK = False
|
|
35
|
+
|
|
36
|
+
_skip_no_sqlite_vec = pytest.mark.skipif(
|
|
37
|
+
not _SQLITE_VEC_OK,
|
|
38
|
+
reason="sqlite_vec wheel not installed in this venv",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
ENGINE_ROOT = Path(__file__).resolve().parent.parent / "engine" / "services" / "l2"
|
|
43
|
+
sys.path.insert(0, str(ENGINE_ROOT))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@pytest.fixture(scope="module")
|
|
47
|
+
def proxy_module():
|
|
48
|
+
"""Load l2-hybridrag-proxy as a module. Same pattern as
|
|
49
|
+
test_channel_stat_reader / test_people_list_reader so the
|
|
50
|
+
module-load failure mode (missing deps) skips cleanly rather than
|
|
51
|
+
erroring."""
|
|
52
|
+
spec = importlib.util.spec_from_file_location(
|
|
53
|
+
"l2_proxy_module_qmd_vec",
|
|
54
|
+
ENGINE_ROOT / "l2-hybridrag-proxy.py",
|
|
55
|
+
)
|
|
56
|
+
assert spec and spec.loader
|
|
57
|
+
try:
|
|
58
|
+
mod = importlib.util.module_from_spec(spec)
|
|
59
|
+
spec.loader.exec_module(mod)
|
|
60
|
+
except ImportError:
|
|
61
|
+
pytest.skip("l2 proxy deps unavailable in this venv (fine for unit-only runs)")
|
|
62
|
+
return mod
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@pytest.fixture
|
|
66
|
+
def qmd_db(tmp_path, proxy_module, monkeypatch):
|
|
67
|
+
"""Per-test qmd.sqlite at a tmp path, with the proxy module pointed
|
|
68
|
+
at it. Yields the path so tests can run their own asserting queries
|
|
69
|
+
against it."""
|
|
70
|
+
db_path = tmp_path / "qmd.sqlite"
|
|
71
|
+
monkeypatch.setattr(proxy_module, "QMD_DB_PATH", str(db_path))
|
|
72
|
+
return db_path
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _make_vec(seed: int, dim: int) -> list[float]:
|
|
76
|
+
"""Deterministic synthetic embedding — small enough to test fast,
|
|
77
|
+
structured enough that nearest-neighbour relationships are stable
|
|
78
|
+
across runs. The first slot dominates the cosine direction so we
|
|
79
|
+
can build orthogonal-ish clusters by varying its sign + magnitude."""
|
|
80
|
+
import random as _r
|
|
81
|
+
rng = _r.Random(seed)
|
|
82
|
+
return [rng.gauss(0.0, 1.0) for _ in range(dim)]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
# 1. vec_index MATCH semantics — sanity check the SDK glue against sqlite-vec.
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@_skip_no_sqlite_vec
|
|
91
|
+
def test_vec_index_match_returns_top_k(qmd_db, proxy_module) -> None:
|
|
92
|
+
"""Insert N known vectors with a planted ringer, query with the
|
|
93
|
+
ringer's vector, assert the ringer is the top hit. This is the
|
|
94
|
+
minimum signal that ``_ensure_vec_index`` + native MATCH actually
|
|
95
|
+
work end-to-end against the dim our proxy is configured for."""
|
|
96
|
+
conn = proxy_module._open_qmd_conn()
|
|
97
|
+
proxy_module._ensure_vec_index(conn)
|
|
98
|
+
dim = proxy_module.EMBED_DIM
|
|
99
|
+
# 20 rows of noise + 1 planted ringer at id=999. Planted vector is
|
|
100
|
+
# near-orthogonal to the noise (which uses positive-slot dominance)
|
|
101
|
+
# by flipping the first slot's sign — confirms the cosine MATCH
|
|
102
|
+
# actually orders by similarity, not by row id.
|
|
103
|
+
for i in range(20):
|
|
104
|
+
v = _make_vec(seed=i + 1, dim=dim)
|
|
105
|
+
v[0] = abs(v[0]) + 10.0 # bias positive
|
|
106
|
+
conn.execute(
|
|
107
|
+
"INSERT INTO vec_index(id, embedding) VALUES (?, ?)",
|
|
108
|
+
(i + 1, struct.pack(f"{dim}f", *v)),
|
|
109
|
+
)
|
|
110
|
+
ringer = _make_vec(seed=999, dim=dim)
|
|
111
|
+
ringer[0] = -abs(ringer[0]) - 10.0 # bias negative — opposite cluster
|
|
112
|
+
conn.execute(
|
|
113
|
+
"INSERT INTO vec_index(id, embedding) VALUES (?, ?)",
|
|
114
|
+
(999, struct.pack(f"{dim}f", *ringer)),
|
|
115
|
+
)
|
|
116
|
+
conn.commit()
|
|
117
|
+
qbytes = struct.pack(f"{dim}f", *ringer)
|
|
118
|
+
rows = conn.execute(
|
|
119
|
+
"""
|
|
120
|
+
SELECT id, distance
|
|
121
|
+
FROM vec_index
|
|
122
|
+
WHERE embedding MATCH ? AND k = ?
|
|
123
|
+
ORDER BY distance
|
|
124
|
+
""",
|
|
125
|
+
(qbytes, 5),
|
|
126
|
+
).fetchall()
|
|
127
|
+
conn.close()
|
|
128
|
+
assert len(rows) == 5
|
|
129
|
+
top_id, top_dist = rows[0]
|
|
130
|
+
assert top_id == 999, f"expected ringer id=999, got {top_id} ({rows!r})"
|
|
131
|
+
# Cosine distance = 1 - cos_sim, so identity vector → ~0 distance.
|
|
132
|
+
# Ringer-vs-itself is exact, so we expect ~0 here; allow float32
|
|
133
|
+
# round-trip slop.
|
|
134
|
+
assert top_dist < 1e-3, f"ringer-vs-itself should be ~0, got {top_dist}"
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
# 2. search_qmd_informed uses vec_index, not the legacy JSON-cosine path.
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@_skip_no_sqlite_vec
|
|
143
|
+
def test_search_qmd_informed_uses_vec_index(qmd_db, proxy_module, monkeypatch) -> None:
|
|
144
|
+
"""Full search path test: seed chunks + vec_index, mock
|
|
145
|
+
``get_embedding`` to return a vector that matches the ringer,
|
|
146
|
+
assert the returned results are sourced from the vec_index JOIN
|
|
147
|
+
(which preserves path/text from chunks) and ranked by similarity.
|
|
148
|
+
|
|
149
|
+
This is the test that would fail if someone reverted the search
|
|
150
|
+
body to the legacy ``ORDER BY id LIMIT 2000`` path — because the
|
|
151
|
+
ringer's id is 999 (well outside the 2000-row prefix), the legacy
|
|
152
|
+
path would never see it."""
|
|
153
|
+
import sqlite3
|
|
154
|
+
conn = proxy_module._open_qmd_conn()
|
|
155
|
+
proxy_module._ensure_vec_index(conn)
|
|
156
|
+
conn.execute(
|
|
157
|
+
"""
|
|
158
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
159
|
+
id INTEGER PRIMARY KEY,
|
|
160
|
+
path TEXT,
|
|
161
|
+
text TEXT,
|
|
162
|
+
embedding TEXT,
|
|
163
|
+
embedding_model TEXT,
|
|
164
|
+
embedding_dim INTEGER,
|
|
165
|
+
chunk_index INTEGER,
|
|
166
|
+
created_at TEXT
|
|
167
|
+
)
|
|
168
|
+
"""
|
|
169
|
+
)
|
|
170
|
+
dim = proxy_module.EMBED_DIM
|
|
171
|
+
# Noise rows 1..20 + planted ringer id=999. Same orthogonal-cluster
|
|
172
|
+
# setup as test 1 — guarantees the ringer wins on cosine.
|
|
173
|
+
for i in range(20):
|
|
174
|
+
v = _make_vec(seed=i + 1, dim=dim)
|
|
175
|
+
v[0] = abs(v[0]) + 10.0
|
|
176
|
+
conn.execute(
|
|
177
|
+
"INSERT INTO chunks(id, path, text, embedding) VALUES (?, ?, ?, ?)",
|
|
178
|
+
(i + 1, f"noise/{i}.md", f"noise text {i}", json.dumps(v)),
|
|
179
|
+
)
|
|
180
|
+
conn.execute(
|
|
181
|
+
"INSERT INTO vec_index(id, embedding) VALUES (?, ?)",
|
|
182
|
+
(i + 1, struct.pack(f"{dim}f", *v)),
|
|
183
|
+
)
|
|
184
|
+
ringer = _make_vec(seed=999, dim=dim)
|
|
185
|
+
ringer[0] = -abs(ringer[0]) - 10.0
|
|
186
|
+
conn.execute(
|
|
187
|
+
"INSERT INTO chunks(id, path, text, embedding) VALUES (?, ?, ?, ?)",
|
|
188
|
+
(999, "ringer/needle.md", "needle in the haystack", json.dumps(ringer)),
|
|
189
|
+
)
|
|
190
|
+
conn.execute(
|
|
191
|
+
"INSERT INTO vec_index(id, embedding) VALUES (?, ?)",
|
|
192
|
+
(999, struct.pack(f"{dim}f", *ringer)),
|
|
193
|
+
)
|
|
194
|
+
conn.commit()
|
|
195
|
+
conn.close()
|
|
196
|
+
|
|
197
|
+
# Mock get_embedding to return the ringer's vector for any query.
|
|
198
|
+
monkeypatch.setattr(proxy_module, "get_embedding", lambda *_a, **_kw: ringer)
|
|
199
|
+
|
|
200
|
+
out = proxy_module.search_qmd_informed(
|
|
201
|
+
"any query — get_embedding is mocked",
|
|
202
|
+
{"graph_entities": []},
|
|
203
|
+
limit=3,
|
|
204
|
+
)
|
|
205
|
+
assert out, "search returned empty; vec_index path must surface ringer"
|
|
206
|
+
top = out[0]
|
|
207
|
+
assert top["path"] == "ringer/needle.md", (
|
|
208
|
+
f"top hit should be the ringer at row 999; got {top['path']}. "
|
|
209
|
+
f"If this fails, the search may have reverted to the LIMIT 2000 "
|
|
210
|
+
f"legacy path which never sees row 999."
|
|
211
|
+
)
|
|
212
|
+
assert top["text"] == "needle in the haystack"
|
|
213
|
+
assert top["base_similarity"] > 0.9
|
|
214
|
+
assert top["source"] == "vector"
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# ---------------------------------------------------------------------------
|
|
218
|
+
# 3. Backfill is idempotent — second run on a populated vec_index is no-op.
|
|
219
|
+
# ---------------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
@_skip_no_sqlite_vec
|
|
223
|
+
def test_backfill_idempotent(qmd_db, proxy_module) -> None:
|
|
224
|
+
"""Seed chunks with JSON embeddings only (no vec_index rows),
|
|
225
|
+
call ``_backfill_vec_index`` twice, assert:
|
|
226
|
+
1. First call copies all rows into vec_index.
|
|
227
|
+
2. Second call observes vec_n >= chunks_n and is a no-op (no
|
|
228
|
+
duplicate inserts, no errors).
|
|
229
|
+
|
|
230
|
+
Catches the failure mode where a missing idempotency check would
|
|
231
|
+
INSERT duplicate ids on the second invocation, blow up the UNIQUE
|
|
232
|
+
constraint, and corrupt the index."""
|
|
233
|
+
import asyncio
|
|
234
|
+
conn = proxy_module._open_qmd_conn()
|
|
235
|
+
proxy_module._ensure_vec_index(conn)
|
|
236
|
+
conn.execute(
|
|
237
|
+
"""
|
|
238
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
239
|
+
id INTEGER PRIMARY KEY,
|
|
240
|
+
path TEXT,
|
|
241
|
+
text TEXT,
|
|
242
|
+
embedding TEXT,
|
|
243
|
+
embedding_model TEXT,
|
|
244
|
+
embedding_dim INTEGER,
|
|
245
|
+
chunk_index INTEGER,
|
|
246
|
+
created_at TEXT
|
|
247
|
+
)
|
|
248
|
+
"""
|
|
249
|
+
)
|
|
250
|
+
dim = proxy_module.EMBED_DIM
|
|
251
|
+
N = 7
|
|
252
|
+
for i in range(N):
|
|
253
|
+
v = _make_vec(seed=i + 100, dim=dim)
|
|
254
|
+
conn.execute(
|
|
255
|
+
"INSERT INTO chunks(id, path, text, embedding) VALUES (?, ?, ?, ?)",
|
|
256
|
+
(i + 1, f"p/{i}.md", f"t{i}", json.dumps(v)),
|
|
257
|
+
)
|
|
258
|
+
conn.commit()
|
|
259
|
+
pre_chunks = conn.execute("SELECT count(*) FROM chunks").fetchone()[0]
|
|
260
|
+
pre_vec = conn.execute("SELECT count(*) FROM vec_index").fetchone()[0]
|
|
261
|
+
conn.close()
|
|
262
|
+
assert pre_chunks == N and pre_vec == 0, (
|
|
263
|
+
f"setup mismatch: chunks={pre_chunks}, vec={pre_vec}"
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# First run — should copy all N rows.
|
|
267
|
+
asyncio.run(proxy_module._backfill_vec_index())
|
|
268
|
+
conn = proxy_module._open_qmd_conn()
|
|
269
|
+
mid_vec = conn.execute("SELECT count(*) FROM vec_index").fetchone()[0]
|
|
270
|
+
conn.close()
|
|
271
|
+
assert mid_vec == N, f"first backfill should copy all {N} rows, got {mid_vec}"
|
|
272
|
+
|
|
273
|
+
# Second run — must no-op cleanly. No exception, no duplicate inserts.
|
|
274
|
+
asyncio.run(proxy_module._backfill_vec_index())
|
|
275
|
+
conn = proxy_module._open_qmd_conn()
|
|
276
|
+
final_vec = conn.execute("SELECT count(*) FROM vec_index").fetchone()[0]
|
|
277
|
+
conn.close()
|
|
278
|
+
assert final_vec == N, (
|
|
279
|
+
f"second backfill should be no-op; got {final_vec} rows instead of {N}"
|
|
280
|
+
)
|