@pentatonic-ai/ai-agent-sdk 0.9.4 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -906,7 +906,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
906
906
  }
907
907
 
908
908
  // src/telemetry.js
909
- var VERSION = "0.9.4";
909
+ var VERSION = "0.9.5";
910
910
  var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
911
911
  function machineId() {
912
912
  const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
package/dist/index.js CHANGED
@@ -875,7 +875,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
875
875
  }
876
876
 
877
877
  // src/telemetry.js
878
- var VERSION = "0.9.4";
878
+ var VERSION = "0.9.5";
879
879
  var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
880
880
  function machineId() {
881
881
  const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.9.4",
3
+ "version": "0.9.5",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -220,7 +220,14 @@ services:
220
220
  interval: 10s
221
221
  timeout: 5s
222
222
  retries: 30
223
- start_period: 60s
223
+ # 180s gives L2 enough time to finish Neo4j schema + index creation
224
+ # on a cold start before compat's healthcheck starts counting failures.
225
+ # Observed concretely on the v0.9.4 deploy (2026-05-14): L2 took
226
+ # ~90s to warm up; with start_period: 60s, compat went unhealthy
227
+ # mid-startup, cloudflared's `depends_on: condition: service_healthy`
228
+ # failed, and `docker compose up` errored out before wait_for_health
229
+ # could observe the eventual recovery.
230
+ start_period: 180s
224
231
 
225
232
  networks:
226
233
  engine-net:
@@ -9,9 +9,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
9
9
 
10
10
  # Reranker = sentence-transformers MiniLM cross-encoder.
11
11
  # Torch CPU wheels are fine — reranker is small enough to be CPU-bound.
12
+ #
13
+ # sqlite-vec 0.1.9: native KNN over packed-f32 vectors stored in a vec0
14
+ # virtual table. Replaces the legacy hand-rolled Python cosine loop over
15
+ # JSON-serialised embeddings in search_qmd_informed (~15s timeout at 450k
16
+ # rows → ~50ms native MATCH). Pin to 0.1.9 — that's the version probed
17
+ # against L4 QMD's wire format (struct.pack f32 + cosine distance_metric).
12
18
  RUN pip install --no-cache-dir \
13
19
  fastapi "uvicorn[standard]" httpx requests pydantic \
14
20
  neo4j \
21
+ sqlite-vec==0.1.9 \
15
22
  "sentence-transformers" \
16
23
  "torch" --extra-index-url https://download.pytorch.org/whl/cpu
17
24
 
@@ -18,6 +18,7 @@ import json
18
18
  import logging
19
19
  import os
20
20
  import sqlite3
21
+ import struct
21
22
  import sys
22
23
  import time
23
24
  from contextlib import asynccontextmanager
@@ -34,6 +35,11 @@ from neo4j.time import DateTime as Neo4jDateTime, Date as Neo4jDate
34
35
  from pydantic import BaseModel
35
36
  import uvicorn
36
37
 
38
+ try:
39
+ import sqlite_vec # 0.1.9 — native KNN MATCH over packed-f32 vec0 tables
40
+ except ImportError:
41
+ sqlite_vec = None # Caller logs loudly if helpers can't load the extension
42
+
37
43
  # Shared embed client lives at engine/services/_shared/.
38
44
  sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
39
45
  from _shared.embed_provider import EmbedClient # noqa: E402
@@ -101,6 +107,59 @@ QMD_DB_PATH = _resolve_qmd_db()
101
107
  OLLAMA_URL = os.environ.get("PME_OLLAMA_URL", "http://localhost:11434/api/embeddings")
102
108
  EMBEDDING_MODEL = os.environ.get("PME_EMBED_MODEL", "nomic-embed-text")
103
109
 
110
+ # Embedding dimension for the vec0 virtual table. Production gateway
111
+ # (lambda-gateway.pentatonic.com/v1/embed via pentatonic-gateway provider)
112
+ # returns NV-Embed-v2 4096-dim vectors. The vec0 schema requires the dim
113
+ # at DDL time and writers must match — keep this in lockstep with the
114
+ # gateway / EmbedClient config.
115
+ EMBED_DIM = int(os.environ.get("PME_EMBED_DIM", "4096"))
116
+
117
+
118
+ def _open_qmd_conn() -> sqlite3.Connection:
119
+ """Open qmd.sqlite with sqlite-vec loaded.
120
+
121
+ Falls back to a plain sqlite3 connection if the extension can't load —
122
+ MATCH-form queries will then fail loudly at execute time, which is the
123
+ right signal (loud error > silent degradation back to Python cosine).
124
+ Callers that only need scalar columns (chunks.path, chunks.text) work
125
+ fine without the extension.
126
+
127
+ ``check_same_thread=False`` is intentional: the async backfill yields
128
+ via ``asyncio.to_thread`` to keep /search responsive, which means the
129
+ connection is handed off between event-loop / thread-pool workers.
130
+ sqlite's default thread-safety check would otherwise reject the
131
+ cross-thread reuse even though only one worker touches it at a time.
132
+ """
133
+ conn = sqlite3.connect(QMD_DB_PATH, timeout=10, check_same_thread=False)
134
+ if sqlite_vec is None:
135
+ log.error("sqlite_vec module not importable — qmd vec_index unavailable")
136
+ return conn
137
+ try:
138
+ conn.enable_load_extension(True)
139
+ sqlite_vec.load(conn)
140
+ conn.enable_load_extension(False)
141
+ except Exception as e:
142
+ log.error(f"sqlite-vec load failed: {e} — qmd search will be degraded")
143
+ return conn
144
+
145
+
146
+ def _ensure_vec_index(conn: sqlite3.Connection) -> None:
147
+ """Create the vec0 KNN index if not already present. Idempotent.
148
+
149
+ `distance_metric=cosine` is non-default — sqlite-vec defaults to L2
150
+ (Euclidean). Probe confirmed cosine returns `1 - cos_sim` as the
151
+ distance. The id column is a regular INTEGER PRIMARY KEY so we can
152
+ JOIN back to `chunks` on the row's autoinc id.
153
+ """
154
+ conn.execute(
155
+ f"""
156
+ CREATE VIRTUAL TABLE IF NOT EXISTS vec_index USING vec0(
157
+ id INTEGER PRIMARY KEY,
158
+ embedding float[{EMBED_DIM}] distance_metric=cosine
159
+ )
160
+ """
161
+ )
162
+
104
163
  # NV-Embed-v2 service (primary, 4096-dim). URL/auth/path/body/response are
105
164
  # managed by the shared EmbedClient; PME_EMBED_PROVIDER (default openai)
106
165
  # selects auth scheme (Bearer vs X-API-Key) and request shape.
@@ -177,13 +236,25 @@ def get_http_client() -> httpx.AsyncClient:
177
236
  async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
178
237
  """Open the neo4j driver + HTTP client at process startup, close on
179
238
  shutdown. Without this, the first request pays driver-open latency
180
- and the driver is never properly closed on SIGTERM (leaking conns)."""
239
+ and the driver is never properly closed on SIGTERM (leaking conns).
240
+
241
+ Also schedules the vec_index backfill as a background task so the
242
+ proxy can start serving immediately while older chunks copy across
243
+ into the KNN index — first-time migration of ~450k rows takes
244
+ minutes and would otherwise block /health.
245
+ """
181
246
  global _neo4j_driver, _http_client
182
247
  _neo4j_driver = AsyncGraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
183
248
  _http_client = httpx.AsyncClient(timeout=30.0)
249
+ backfill_task = asyncio.create_task(_backfill_vec_index())
184
250
  try:
185
251
  yield
186
252
  finally:
253
+ backfill_task.cancel()
254
+ try:
255
+ await backfill_task
256
+ except (asyncio.CancelledError, Exception):
257
+ pass
187
258
  if _neo4j_driver is not None:
188
259
  await _neo4j_driver.close()
189
260
  _neo4j_driver = None
@@ -192,6 +263,82 @@ async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
192
263
  _http_client = None
193
264
 
194
265
 
266
+ async def _backfill_vec_index() -> None:
267
+ """One-time migration: copy existing chunks.embedding (JSON) into
268
+ vec_index (f32 bytes).
269
+
270
+ Idempotent. Runs at proxy startup if vec_index has fewer rows than
271
+ chunks. Async so it doesn't block /health — the proxy serves
272
+ requests in parallel and search degrades to partial-corpus results
273
+ until the backfill finishes (any chunk already mirrored into
274
+ vec_index is findable; the rest are invisible to search but still
275
+ in L0/L3/L5/L6).
276
+
277
+ At 450k rows + ~460 rows/s insert rate this takes ~16 min on a
278
+ cold prod instance. Subsequent restarts no-op cleanly.
279
+ """
280
+ if sqlite_vec is None:
281
+ log.error("sqlite_vec module missing — backfill skipped, search will be degraded")
282
+ return
283
+ if not os.path.exists(QMD_DB_PATH):
284
+ log.info("vec_index backfill skipped — qmd.sqlite does not exist yet")
285
+ return
286
+ try:
287
+ conn = await asyncio.to_thread(_open_qmd_conn)
288
+ await asyncio.to_thread(_ensure_vec_index, conn)
289
+ chunks_n = conn.execute(
290
+ "SELECT count(*) FROM chunks WHERE embedding IS NOT NULL"
291
+ ).fetchone()[0]
292
+ vec_n = conn.execute("SELECT count(*) FROM vec_index").fetchone()[0]
293
+ if vec_n >= chunks_n:
294
+ log.info(f"vec_index backfill skipped — already in sync ({vec_n}/{chunks_n})")
295
+ conn.close()
296
+ return
297
+ missing = chunks_n - vec_n
298
+ log.info(f"vec_index backfill starting — {missing} rows to copy")
299
+ cursor = conn.execute(
300
+ """
301
+ SELECT c.id, c.embedding
302
+ FROM chunks c
303
+ LEFT JOIN vec_index v ON v.id = c.id
304
+ WHERE v.id IS NULL AND c.embedding IS NOT NULL
305
+ """
306
+ )
307
+ BATCH = 500
308
+ copied = 0
309
+ while True:
310
+ batch = await asyncio.to_thread(cursor.fetchmany, BATCH)
311
+ if not batch:
312
+ break
313
+ def _insert_batch() -> int:
314
+ inserted = 0
315
+ with conn:
316
+ for cid, emb_json in batch:
317
+ try:
318
+ vec = json.loads(emb_json)
319
+ except Exception:
320
+ continue
321
+ if len(vec) != EMBED_DIM:
322
+ continue
323
+ conn.execute(
324
+ "INSERT INTO vec_index(id, embedding) VALUES (?, ?)",
325
+ (cid, struct.pack(f"{len(vec)}f", *vec)),
326
+ )
327
+ inserted += 1
328
+ return inserted
329
+ copied += await asyncio.to_thread(_insert_batch)
330
+ log.info(f"vec_index backfill progress: {copied}/{missing}")
331
+ # Yield generously so /search + writers aren't starved.
332
+ await asyncio.sleep(0)
333
+ log.info(f"vec_index backfill done — {copied} rows copied")
334
+ conn.close()
335
+ except asyncio.CancelledError:
336
+ log.info("vec_index backfill cancelled during shutdown")
337
+ raise
338
+ except Exception as e:
339
+ log.error(f"vec_index backfill failed: {e}")
340
+
341
+
195
342
  app = FastAPI(title="Sequential HybridRAG Proxy", version="1.0.0", lifespan=lifespan)
196
343
 
197
344
  # ---------------------------------------------------------------------------
@@ -613,7 +760,15 @@ def cross_encoder_rerank(query: str, results: List[Dict], top_k: int = 16) -> Li
613
760
  return scored[:top_k] + remaining
614
761
 
615
762
  def search_qmd_informed(query: str, graph_context: Dict, limit: int = 12) -> List[Dict]:
616
- """Phase 2: QMD vector search informed by graph results."""
763
+ """Phase 2: QMD vector search via sqlite-vec MATCH.
764
+
765
+ Replaces the legacy Python cosine loop over JSON-serialised embeddings
766
+ (which also had an `ORDER BY id LIMIT 2000` bug — only the OLDEST
767
+ 2000 rows were ever considered, so 99%+ of the corpus was invisible to
768
+ search at production scale). Now: native KNN over the vec0 index,
769
+ full-corpus top-k. Wall time at 450k rows: ~50ms native MATCH vs
770
+ ~15s timeout previously.
771
+ """
617
772
  if not os.path.exists(QMD_DB_PATH):
618
773
  return []
619
774
 
@@ -621,69 +776,64 @@ def search_qmd_informed(query: str, graph_context: Dict, limit: int = 12) -> Lis
621
776
  if not query_embedding:
622
777
  return []
623
778
 
624
- # Enhance query with graph entities for better vector search
625
779
  enhanced_query = query
626
780
  if graph_context["graph_entities"]:
627
781
  enhanced_query += " " + " ".join(graph_context["graph_entities"][:3])
628
-
629
- enhanced_embedding = get_embedding(enhanced_query)
630
- if not enhanced_embedding:
631
- enhanced_embedding = query_embedding
782
+ enhanced_embedding = get_embedding(enhanced_query) or query_embedding
783
+
784
+ if len(enhanced_embedding) != EMBED_DIM:
785
+ # Dim mismatch vs vec0 DDL — the MATCH would error inside sqlite-vec.
786
+ # Bail with a loud log; an embedding-model mismatch in prod is the
787
+ # likely root cause and silent degradation would hide it.
788
+ log.error(
789
+ f"QMD search: query dim {len(enhanced_embedding)} != vec_index dim "
790
+ f"{EMBED_DIM} — embedding model mismatch?"
791
+ )
792
+ return []
793
+ qbytes = struct.pack(f"{len(enhanced_embedding)}f", *enhanced_embedding)
632
794
 
633
795
  try:
634
- conn = sqlite3.connect(QMD_DB_PATH, timeout=5)
635
- conn.row_factory = sqlite3.Row
636
-
637
- # Get vectors and compute similarity
638
- rows = conn.execute("""
639
- SELECT id, path, text, embedding
640
- FROM chunks
641
- WHERE embedding IS NOT NULL
642
- ORDER BY id
643
- LIMIT 2000
644
- """).fetchall()
796
+ conn = _open_qmd_conn()
797
+ # Pull a candidate pool larger than `limit` so entity-boost
798
+ # re-ranking has material to work with — 4× limit, floor 50.
799
+ k_pool = max(limit * 4, 50)
800
+ rows = conn.execute(
801
+ """
802
+ SELECT c.id, c.path, c.text, v.distance
803
+ FROM vec_index v
804
+ JOIN chunks c ON c.id = v.id
805
+ WHERE v.embedding MATCH ? AND k = ?
806
+ ORDER BY v.distance
807
+ """,
808
+ (qbytes, k_pool),
809
+ ).fetchall()
810
+ conn.close()
645
811
 
646
812
  results = []
647
- for row in rows:
648
- try:
649
- # Deserialize embedding
650
- embedding_data = row["embedding"]
651
- if isinstance(embedding_data, str):
652
- embedding = json.loads(embedding_data)
653
- else:
654
- embedding = list(embedding_data)
655
-
656
- # Cosine similarity with enhanced query
657
- dot = sum(a * b for a, b in zip(enhanced_embedding, embedding))
658
- norm_q = sum(x * x for x in enhanced_embedding) ** 0.5
659
- norm_e = sum(x * x for x in embedding) ** 0.5
660
-
661
- if norm_q > 0 and norm_e > 0:
662
- similarity = dot / (norm_q * norm_e)
663
-
664
- # Boost score if path contains graph entities
665
- entity_boost = 0
666
- path_lower = row["path"].lower()
667
- for entity in graph_context["graph_entities"]:
668
- if entity.lower() in path_lower or entity.lower() in row["text"].lower():
669
- entity_boost = GRAPH_PRIORITY_BOOST
670
- break
671
-
672
- final_score = (similarity * VECTOR_BASE_WEIGHT) + entity_boost
673
-
674
- if similarity > 0.2: # Threshold for inclusion
675
- results.append({
676
- "path": row["path"],
677
- "text": row["text"][:600],
678
- "score": final_score,
679
- "source": "vector",
680
- "base_similarity": similarity,
681
- "entity_boost": entity_boost
682
- })
683
- except Exception as e:
684
- logging.debug(f"Suppressed: {e}")
685
-
686
- conn.close()
813
+ for row_id, path, text, distance in rows:
814
+ # vec0 distance_metric=cosine returns `1 - cos_sim` —
815
+ # invert to align with the rest of the codebase's `similarity`
816
+ # convention (1.0 = identical, 0.0 = orthogonal).
817
+ similarity = 1.0 - distance
818
+ if similarity <= 0.2:
819
+ continue
820
+ entity_boost = 0
821
+ path_lower = (path or "").lower()
822
+ text_lower = (text or "").lower()
823
+ for entity in graph_context["graph_entities"]:
824
+ el = entity.lower()
825
+ if el in path_lower or el in text_lower:
826
+ entity_boost = GRAPH_PRIORITY_BOOST
827
+ break
828
+ final_score = (similarity * VECTOR_BASE_WEIGHT) + entity_boost
829
+ results.append({
830
+ "path": path,
831
+ "text": (text or "")[:600],
832
+ "score": final_score,
833
+ "source": "vector",
834
+ "base_similarity": similarity,
835
+ "entity_boost": entity_boost,
836
+ })
687
837
  results.sort(key=lambda x: x["score"], reverse=True)
688
838
  return results[:limit]
689
839
 
@@ -1598,7 +1748,11 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
1598
1748
  log.warning(f"L4 embed count mismatch: {len(embeddings)} != {len(norm)}")
1599
1749
  qmd_db = Path(QMD_DB_PATH)
1600
1750
  qmd_db.parent.mkdir(parents=True, exist_ok=True)
1601
- conn = sqlite3.connect(str(qmd_db), timeout=10)
1751
+ # Open with sqlite-vec loaded so we can dual-write to vec_index
1752
+ # below. If extension load fails, vec_index inserts silently no-op
1753
+ # via the try/except — chunks (JSON) still gets the write so the
1754
+ # corpus stays whole; search just degrades to the old path.
1755
+ conn = _open_qmd_conn()
1602
1756
  conn.execute("PRAGMA journal_mode=WAL")
1603
1757
  conn.execute("""
1604
1758
  CREATE TABLE IF NOT EXISTS chunks (
@@ -1612,14 +1766,33 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
1612
1766
  created_at TEXT
1613
1767
  )
1614
1768
  """)
1769
+ try:
1770
+ _ensure_vec_index(conn)
1771
+ except Exception as e:
1772
+ log.error(f"vec_index DDL failed: {e} — falling back to chunks-only write")
1615
1773
  for n, vec in zip(norm, embeddings):
1616
1774
  if not vec:
1617
1775
  continue
1618
- conn.execute(
1776
+ cur = conn.execute(
1619
1777
  "INSERT INTO chunks (path, text, embedding, embedding_model, embedding_dim, chunk_index, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)",
1620
1778
  (f"bench/{arena}/{n['path']}.md", n["content"],
1621
1779
  json.dumps(vec), "nv-embed-v2", len(vec), 0, now_iso),
1622
1780
  )
1781
+ # Mirror into the vec0 KNN index so search_qmd_informed can
1782
+ # MATCH on the f32-packed vector. Dim must match the vec0 DDL
1783
+ # (EMBED_DIM); skip rows where the embedding shape disagrees
1784
+ # so a single bad row doesn't poison the batch insert.
1785
+ if cur.lastrowid is not None and len(vec) == EMBED_DIM:
1786
+ try:
1787
+ conn.execute(
1788
+ "INSERT INTO vec_index(id, embedding) VALUES (?, ?)",
1789
+ (cur.lastrowid, struct.pack(f"{len(vec)}f", *vec)),
1790
+ )
1791
+ except Exception as e:
1792
+ # vec_index dual-write is defensive — the JSON column
1793
+ # in chunks is still the source of truth until the
1794
+ # backfill task confirms vec_index is in sync.
1795
+ log.debug(f"vec_index insert skipped for row {cur.lastrowid}: {e}")
1623
1796
  l4_inserted += 1
1624
1797
  conn.commit()
1625
1798
  conn.close()
@@ -0,0 +1,280 @@
1
+ """Tests for the sqlite-vec-backed QMD search path in l2-hybridrag-proxy.
2
+
3
+ Validates the migration from the legacy Python-cosine-over-JSON path
4
+ (which had a silent `ORDER BY id LIMIT 2000` correctness bug — only
5
+ the OLDEST 2000 chunks were ever considered) to native sqlite-vec
6
+ KNN MATCH over a vec0 virtual table.
7
+
8
+ Pure-Python tests — no Neo4j, no Milvus. The proxy module is loaded
9
+ via importlib so we can call helpers and handlers directly, and
10
+ QMD_DB_PATH is overridden to a tmp_path file per test.
11
+
12
+ Run:
13
+
14
+ cd packages/memory-engine
15
+ .venv/bin/python -m pytest tests/test_l2_qmd_vec_search.py -v
16
+
17
+ The tests skip cleanly when ``sqlite_vec`` is not importable — useful
18
+ for unit-only runs on machines that don't have the wheel installed.
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import importlib.util
23
+ import json
24
+ import struct
25
+ import sys
26
+ from pathlib import Path
27
+
28
+ import pytest
29
+
30
+ try:
31
+ import sqlite_vec # noqa: F401
32
+ _SQLITE_VEC_OK = True
33
+ except ImportError:
34
+ _SQLITE_VEC_OK = False
35
+
36
+ _skip_no_sqlite_vec = pytest.mark.skipif(
37
+ not _SQLITE_VEC_OK,
38
+ reason="sqlite_vec wheel not installed in this venv",
39
+ )
40
+
41
+
42
+ ENGINE_ROOT = Path(__file__).resolve().parent.parent / "engine" / "services" / "l2"
43
+ sys.path.insert(0, str(ENGINE_ROOT))
44
+
45
+
46
+ @pytest.fixture(scope="module")
47
+ def proxy_module():
48
+ """Load l2-hybridrag-proxy as a module. Same pattern as
49
+ test_channel_stat_reader / test_people_list_reader so the
50
+ module-load failure mode (missing deps) skips cleanly rather than
51
+ erroring."""
52
+ spec = importlib.util.spec_from_file_location(
53
+ "l2_proxy_module_qmd_vec",
54
+ ENGINE_ROOT / "l2-hybridrag-proxy.py",
55
+ )
56
+ assert spec and spec.loader
57
+ try:
58
+ mod = importlib.util.module_from_spec(spec)
59
+ spec.loader.exec_module(mod)
60
+ except ImportError:
61
+ pytest.skip("l2 proxy deps unavailable in this venv (fine for unit-only runs)")
62
+ return mod
63
+
64
+
65
+ @pytest.fixture
66
+ def qmd_db(tmp_path, proxy_module, monkeypatch):
67
+ """Per-test qmd.sqlite at a tmp path, with the proxy module pointed
68
+ at it. Yields the path so tests can run their own asserting queries
69
+ against it."""
70
+ db_path = tmp_path / "qmd.sqlite"
71
+ monkeypatch.setattr(proxy_module, "QMD_DB_PATH", str(db_path))
72
+ return db_path
73
+
74
+
75
+ def _make_vec(seed: int, dim: int) -> list[float]:
76
+ """Deterministic synthetic embedding — small enough to test fast,
77
+ structured enough that nearest-neighbour relationships are stable
78
+ across runs. The first slot dominates the cosine direction so we
79
+ can build orthogonal-ish clusters by varying its sign + magnitude."""
80
+ import random as _r
81
+ rng = _r.Random(seed)
82
+ return [rng.gauss(0.0, 1.0) for _ in range(dim)]
83
+
84
+
85
+ # ---------------------------------------------------------------------------
86
+ # 1. vec_index MATCH semantics — sanity check the SDK glue against sqlite-vec.
87
+ # ---------------------------------------------------------------------------
88
+
89
+
90
+ @_skip_no_sqlite_vec
91
+ def test_vec_index_match_returns_top_k(qmd_db, proxy_module) -> None:
92
+ """Insert N known vectors with a planted ringer, query with the
93
+ ringer's vector, assert the ringer is the top hit. This is the
94
+ minimum signal that ``_ensure_vec_index`` + native MATCH actually
95
+ work end-to-end against the dim our proxy is configured for."""
96
+ conn = proxy_module._open_qmd_conn()
97
+ proxy_module._ensure_vec_index(conn)
98
+ dim = proxy_module.EMBED_DIM
99
+ # 20 rows of noise + 1 planted ringer at id=999. Planted vector is
100
+ # near-orthogonal to the noise (which uses positive-slot dominance)
101
+ # by flipping the first slot's sign — confirms the cosine MATCH
102
+ # actually orders by similarity, not by row id.
103
+ for i in range(20):
104
+ v = _make_vec(seed=i + 1, dim=dim)
105
+ v[0] = abs(v[0]) + 10.0 # bias positive
106
+ conn.execute(
107
+ "INSERT INTO vec_index(id, embedding) VALUES (?, ?)",
108
+ (i + 1, struct.pack(f"{dim}f", *v)),
109
+ )
110
+ ringer = _make_vec(seed=999, dim=dim)
111
+ ringer[0] = -abs(ringer[0]) - 10.0 # bias negative — opposite cluster
112
+ conn.execute(
113
+ "INSERT INTO vec_index(id, embedding) VALUES (?, ?)",
114
+ (999, struct.pack(f"{dim}f", *ringer)),
115
+ )
116
+ conn.commit()
117
+ qbytes = struct.pack(f"{dim}f", *ringer)
118
+ rows = conn.execute(
119
+ """
120
+ SELECT id, distance
121
+ FROM vec_index
122
+ WHERE embedding MATCH ? AND k = ?
123
+ ORDER BY distance
124
+ """,
125
+ (qbytes, 5),
126
+ ).fetchall()
127
+ conn.close()
128
+ assert len(rows) == 5
129
+ top_id, top_dist = rows[0]
130
+ assert top_id == 999, f"expected ringer id=999, got {top_id} ({rows!r})"
131
+ # Cosine distance = 1 - cos_sim, so identity vector → ~0 distance.
132
+ # Ringer-vs-itself is exact, so we expect ~0 here; allow float32
133
+ # round-trip slop.
134
+ assert top_dist < 1e-3, f"ringer-vs-itself should be ~0, got {top_dist}"
135
+
136
+
137
+ # ---------------------------------------------------------------------------
138
+ # 2. search_qmd_informed uses vec_index, not the legacy JSON-cosine path.
139
+ # ---------------------------------------------------------------------------
140
+
141
+
142
+ @_skip_no_sqlite_vec
143
+ def test_search_qmd_informed_uses_vec_index(qmd_db, proxy_module, monkeypatch) -> None:
144
+ """Full search path test: seed chunks + vec_index, mock
145
+ ``get_embedding`` to return a vector that matches the ringer,
146
+ assert the returned results are sourced from the vec_index JOIN
147
+ (which preserves path/text from chunks) and ranked by similarity.
148
+
149
+ This is the test that would fail if someone reverted the search
150
+ body to the legacy ``ORDER BY id LIMIT 2000`` path — because the
151
+ ringer's id is 999 (well outside the 2000-row prefix), the legacy
152
+ path would never see it."""
153
+ import sqlite3
154
+ conn = proxy_module._open_qmd_conn()
155
+ proxy_module._ensure_vec_index(conn)
156
+ conn.execute(
157
+ """
158
+ CREATE TABLE IF NOT EXISTS chunks (
159
+ id INTEGER PRIMARY KEY,
160
+ path TEXT,
161
+ text TEXT,
162
+ embedding TEXT,
163
+ embedding_model TEXT,
164
+ embedding_dim INTEGER,
165
+ chunk_index INTEGER,
166
+ created_at TEXT
167
+ )
168
+ """
169
+ )
170
+ dim = proxy_module.EMBED_DIM
171
+ # Noise rows 1..20 + planted ringer id=999. Same orthogonal-cluster
172
+ # setup as test 1 — guarantees the ringer wins on cosine.
173
+ for i in range(20):
174
+ v = _make_vec(seed=i + 1, dim=dim)
175
+ v[0] = abs(v[0]) + 10.0
176
+ conn.execute(
177
+ "INSERT INTO chunks(id, path, text, embedding) VALUES (?, ?, ?, ?)",
178
+ (i + 1, f"noise/{i}.md", f"noise text {i}", json.dumps(v)),
179
+ )
180
+ conn.execute(
181
+ "INSERT INTO vec_index(id, embedding) VALUES (?, ?)",
182
+ (i + 1, struct.pack(f"{dim}f", *v)),
183
+ )
184
+ ringer = _make_vec(seed=999, dim=dim)
185
+ ringer[0] = -abs(ringer[0]) - 10.0
186
+ conn.execute(
187
+ "INSERT INTO chunks(id, path, text, embedding) VALUES (?, ?, ?, ?)",
188
+ (999, "ringer/needle.md", "needle in the haystack", json.dumps(ringer)),
189
+ )
190
+ conn.execute(
191
+ "INSERT INTO vec_index(id, embedding) VALUES (?, ?)",
192
+ (999, struct.pack(f"{dim}f", *ringer)),
193
+ )
194
+ conn.commit()
195
+ conn.close()
196
+
197
+ # Mock get_embedding to return the ringer's vector for any query.
198
+ monkeypatch.setattr(proxy_module, "get_embedding", lambda *_a, **_kw: ringer)
199
+
200
+ out = proxy_module.search_qmd_informed(
201
+ "any query — get_embedding is mocked",
202
+ {"graph_entities": []},
203
+ limit=3,
204
+ )
205
+ assert out, "search returned empty; vec_index path must surface ringer"
206
+ top = out[0]
207
+ assert top["path"] == "ringer/needle.md", (
208
+ f"top hit should be the ringer at row 999; got {top['path']}. "
209
+ f"If this fails, the search may have reverted to the LIMIT 2000 "
210
+ f"legacy path which never sees row 999."
211
+ )
212
+ assert top["text"] == "needle in the haystack"
213
+ assert top["base_similarity"] > 0.9
214
+ assert top["source"] == "vector"
215
+
216
+
217
+ # ---------------------------------------------------------------------------
218
+ # 3. Backfill is idempotent — second run on a populated vec_index is no-op.
219
+ # ---------------------------------------------------------------------------
220
+
221
+
222
+ @_skip_no_sqlite_vec
223
+ def test_backfill_idempotent(qmd_db, proxy_module) -> None:
224
+ """Seed chunks with JSON embeddings only (no vec_index rows),
225
+ call ``_backfill_vec_index`` twice, assert:
226
+ 1. First call copies all rows into vec_index.
227
+ 2. Second call observes vec_n >= chunks_n and is a no-op (no
228
+ duplicate inserts, no errors).
229
+
230
+ Catches the failure mode where a missing idempotency check would
231
+ INSERT duplicate ids on the second invocation, blow up the UNIQUE
232
+ constraint, and corrupt the index."""
233
+ import asyncio
234
+ conn = proxy_module._open_qmd_conn()
235
+ proxy_module._ensure_vec_index(conn)
236
+ conn.execute(
237
+ """
238
+ CREATE TABLE IF NOT EXISTS chunks (
239
+ id INTEGER PRIMARY KEY,
240
+ path TEXT,
241
+ text TEXT,
242
+ embedding TEXT,
243
+ embedding_model TEXT,
244
+ embedding_dim INTEGER,
245
+ chunk_index INTEGER,
246
+ created_at TEXT
247
+ )
248
+ """
249
+ )
250
+ dim = proxy_module.EMBED_DIM
251
+ N = 7
252
+ for i in range(N):
253
+ v = _make_vec(seed=i + 100, dim=dim)
254
+ conn.execute(
255
+ "INSERT INTO chunks(id, path, text, embedding) VALUES (?, ?, ?, ?)",
256
+ (i + 1, f"p/{i}.md", f"t{i}", json.dumps(v)),
257
+ )
258
+ conn.commit()
259
+ pre_chunks = conn.execute("SELECT count(*) FROM chunks").fetchone()[0]
260
+ pre_vec = conn.execute("SELECT count(*) FROM vec_index").fetchone()[0]
261
+ conn.close()
262
+ assert pre_chunks == N and pre_vec == 0, (
263
+ f"setup mismatch: chunks={pre_chunks}, vec={pre_vec}"
264
+ )
265
+
266
+ # First run — should copy all N rows.
267
+ asyncio.run(proxy_module._backfill_vec_index())
268
+ conn = proxy_module._open_qmd_conn()
269
+ mid_vec = conn.execute("SELECT count(*) FROM vec_index").fetchone()[0]
270
+ conn.close()
271
+ assert mid_vec == N, f"first backfill should copy all {N} rows, got {mid_vec}"
272
+
273
+ # Second run — must no-op cleanly. No exception, no duplicate inserts.
274
+ asyncio.run(proxy_module._backfill_vec_index())
275
+ conn = proxy_module._open_qmd_conn()
276
+ final_vec = conn.execute("SELECT count(*) FROM vec_index").fetchone()[0]
277
+ conn.close()
278
+ assert final_vec == N, (
279
+ f"second backfill should be no-op; got {final_vec} rows instead of {N}"
280
+ )