@pentatonic-ai/ai-agent-sdk 0.7.3 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/packages/memory-engine/compat/server.py +21 -7
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +67 -21
- package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +24 -5
- package/packages/memory-engine/engine/services/l6/l6-document-store.py +4 -26
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.4",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -204,8 +204,12 @@ async def _index_l4(records: list[dict[str, Any]]) -> int:
|
|
|
204
204
|
return 0
|
|
205
205
|
|
|
206
206
|
|
|
207
|
-
async def _index_l5(records: list[dict[str, Any]]) -> int:
|
|
208
|
-
"""Index records into the L5 Milvus comms layer (chats collection).
|
|
207
|
+
async def _index_l5(records: list[dict[str, Any]], arena: str = "general") -> int:
|
|
208
|
+
"""Index records into the L5 Milvus comms layer (chats collection).
|
|
209
|
+
|
|
210
|
+
arena is forwarded as a Milvus dynamic field so /search can filter
|
|
211
|
+
by arena natively (vs the shim's defence-in-depth post-filter).
|
|
212
|
+
"""
|
|
209
213
|
payload = {
|
|
210
214
|
"collection": "chats",
|
|
211
215
|
"records": [
|
|
@@ -215,6 +219,7 @@ async def _index_l5(records: list[dict[str, Any]]) -> int:
|
|
|
215
219
|
"source": (r.get("metadata") or {}).get("source", "shim"),
|
|
216
220
|
"channel": "pentatonic-memory",
|
|
217
221
|
"contact": (r.get("metadata") or {}).get("user", ""),
|
|
222
|
+
"arena": (r.get("metadata") or {}).get("arena") or arena,
|
|
218
223
|
}
|
|
219
224
|
for r in records
|
|
220
225
|
],
|
|
@@ -369,7 +374,7 @@ async def store(req: StoreRequest):
|
|
|
369
374
|
import asyncio
|
|
370
375
|
l4_count, l5_count, l6_count, l2_internal = await asyncio.gather(
|
|
371
376
|
_index_l4([record]),
|
|
372
|
-
_index_l5([record]),
|
|
377
|
+
_index_l5([record], arena=arena),
|
|
373
378
|
_index_l6([record], arena=arena),
|
|
374
379
|
_index_l2_internal([record], arena=arena),
|
|
375
380
|
)
|
|
@@ -414,7 +419,7 @@ async def store_batch(req: StoreBatchRequest):
|
|
|
414
419
|
import asyncio
|
|
415
420
|
l4_count, l5_count, l6_count, l2_internal = await asyncio.gather(
|
|
416
421
|
_index_l4(normalised),
|
|
417
|
-
_index_l5(normalised),
|
|
422
|
+
_index_l5(normalised, arena=req.arena or "general"),
|
|
418
423
|
_index_l6(normalised, arena=req.arena or "general"),
|
|
419
424
|
_index_l2_internal(normalised, arena=req.arena or "general"),
|
|
420
425
|
)
|
|
@@ -633,9 +638,12 @@ async def search(req: SearchRequest):
|
|
|
633
638
|
out_results = _apply_metadata_filters(out_results, req)
|
|
634
639
|
return {"results": out_results[: req.limit or 10]}
|
|
635
640
|
try:
|
|
641
|
+
get_params: dict[str, Any] = {"q": req.query, "limit": _search_overfetch(req)}
|
|
642
|
+
if req.arena:
|
|
643
|
+
get_params["arena"] = req.arena
|
|
636
644
|
r = await _client().get(
|
|
637
645
|
f"{L2_PROXY_URL}/search",
|
|
638
|
-
params=
|
|
646
|
+
params=get_params,
|
|
639
647
|
timeout=30.0,
|
|
640
648
|
)
|
|
641
649
|
r.raise_for_status()
|
|
@@ -643,10 +651,16 @@ async def search(req: SearchRequest):
|
|
|
643
651
|
except Exception as exc:
|
|
644
652
|
last_err = exc
|
|
645
653
|
try:
|
|
654
|
+
post_body: dict[str, Any] = {
|
|
655
|
+
"query": req.query,
|
|
656
|
+
"limit": _search_overfetch(req),
|
|
657
|
+
"min_score": req.min_score or 0.001,
|
|
658
|
+
}
|
|
659
|
+
if req.arena:
|
|
660
|
+
post_body["arena"] = req.arena
|
|
646
661
|
r = await _client().post(
|
|
647
662
|
f"{L2_PROXY_URL}/v1/search",
|
|
648
|
-
json=
|
|
649
|
-
"min_score": req.min_score or 0.001},
|
|
663
|
+
json=post_body,
|
|
650
664
|
timeout=30.0,
|
|
651
665
|
)
|
|
652
666
|
r.raise_for_status()
|
|
@@ -719,12 +719,17 @@ L0_MEMORY_DB = Path(os.environ.get(
|
|
|
719
719
|
str(Path.home() / ".pentatonic" / "memory" / "main.sqlite"),
|
|
720
720
|
))
|
|
721
721
|
|
|
722
|
-
def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
|
|
722
|
+
def search_l0_bm25(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
|
|
723
723
|
"""Search native BM25 index over workspace memory files.
|
|
724
|
-
|
|
724
|
+
|
|
725
725
|
Covers chunks from daily notes, memory files, people profiles,
|
|
726
726
|
infrastructure docs, project files — corpus that L3-L6 don't index.
|
|
727
727
|
Sub-millisecond local SQLite reads, zero network overhead.
|
|
728
|
+
|
|
729
|
+
arena (optional): when set, filter to paths under bench/<arena>/.
|
|
730
|
+
Records stored via the compat shim land under that prefix per
|
|
731
|
+
_stash_all_keys; this is the L0 path-based equivalent of the
|
|
732
|
+
arena dynamic-field filter on L5/L6.
|
|
728
733
|
"""
|
|
729
734
|
if not L0_MEMORY_DB.exists():
|
|
730
735
|
return []
|
|
@@ -741,16 +746,21 @@ def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
|
|
|
741
746
|
|
|
742
747
|
conn = sqlite3.connect(str(L0_MEMORY_DB), timeout=2)
|
|
743
748
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
744
|
-
|
|
749
|
+
sql = """
|
|
745
750
|
SELECT path, text, bm25(chunks_fts) as rank
|
|
746
751
|
FROM chunks_fts
|
|
747
752
|
WHERE chunks_fts MATCH ?
|
|
748
753
|
AND path NOT LIKE '%/snapshots/%'
|
|
749
754
|
AND path NOT LIKE '%/archive/%'
|
|
750
755
|
AND path NOT LIKE '%-backup-%'
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
756
|
+
"""
|
|
757
|
+
params: list = [fts_query]
|
|
758
|
+
if arena:
|
|
759
|
+
sql += " AND path LIKE ?"
|
|
760
|
+
params.append(f"bench/{arena}/%")
|
|
761
|
+
sql += " ORDER BY rank ASC LIMIT ?"
|
|
762
|
+
params.append(limit * 2)
|
|
763
|
+
rows = conn.execute(sql, params).fetchall()
|
|
754
764
|
conn.close()
|
|
755
765
|
|
|
756
766
|
results = []
|
|
@@ -761,12 +771,20 @@ def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
|
|
|
761
771
|
seen_paths.add(path)
|
|
762
772
|
relevance = -rank if rank < 0 else 0.001
|
|
763
773
|
score = min(relevance / (1 + relevance) * 0.85, 0.75)
|
|
774
|
+
# Parse arena from path (bench/<arena>/...) so downstream
|
|
775
|
+
# consumers can read it directly without parsing again.
|
|
776
|
+
row_arena = ""
|
|
777
|
+
if path.startswith("bench/"):
|
|
778
|
+
parts = path.split("/", 2)
|
|
779
|
+
if len(parts) >= 3:
|
|
780
|
+
row_arena = parts[1]
|
|
764
781
|
results.append({
|
|
765
782
|
"path": f"L0/{path}",
|
|
766
783
|
"snippet": text[:500],
|
|
767
784
|
"score": round(score, 4),
|
|
768
785
|
"layer": "L0_workspace_bm25",
|
|
769
786
|
"source": path,
|
|
787
|
+
"arena": row_arena,
|
|
770
788
|
})
|
|
771
789
|
if len(results) >= limit:
|
|
772
790
|
break
|
|
@@ -782,12 +800,20 @@ def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
|
|
|
782
800
|
|
|
783
801
|
L5_API_URL = os.environ.get("PME_L5_URL", "http://127.0.0.1:8034")
|
|
784
802
|
|
|
785
|
-
def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
|
|
786
|
-
"""Search L5 Communications Context via L5 API (emails, chats, calendar).
|
|
803
|
+
def search_l5_communications(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
|
|
804
|
+
"""Search L5 Communications Context via L5 API (emails, chats, calendar).
|
|
805
|
+
|
|
806
|
+
arena (optional): forwarded to L5; filters Milvus by the arena
|
|
807
|
+
dynamic field. Records id is included in the result so callers
|
|
808
|
+
can attach metadata via the shim's _META_CACHE.
|
|
809
|
+
"""
|
|
787
810
|
try:
|
|
811
|
+
params: dict = {"q": query, "limit": limit}
|
|
812
|
+
if arena:
|
|
813
|
+
params["arena"] = arena
|
|
788
814
|
resp = requests.get(
|
|
789
815
|
f"{L5_API_URL}/search",
|
|
790
|
-
params=
|
|
816
|
+
params=params,
|
|
791
817
|
timeout=10,
|
|
792
818
|
)
|
|
793
819
|
if resp.status_code != 200:
|
|
@@ -804,10 +830,15 @@ def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
|
|
|
804
830
|
continue # skip low relevance
|
|
805
831
|
contact = hit.get("contact", "")
|
|
806
832
|
channel = hit.get("channel", "")
|
|
807
|
-
|
|
808
|
-
|
|
833
|
+
hit_id = hit.get("id", "")
|
|
834
|
+
# Use record id as path label so the shim can attach
|
|
835
|
+
# metadata via _META_CACHE; falls back to source label
|
|
836
|
+
# for legacy records that have no id.
|
|
837
|
+
path_label = hit_id or f"L5/{source}"
|
|
838
|
+
if not hit_id and contact:
|
|
809
839
|
path_label = f"L5/{channel}/{contact}"
|
|
810
840
|
results.append({
|
|
841
|
+
"id": hit_id,
|
|
811
842
|
"path": path_label,
|
|
812
843
|
"snippet": hit.get("text", "")[:500],
|
|
813
844
|
"score": scaled_score,
|
|
@@ -815,6 +846,7 @@ def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
|
|
|
815
846
|
"source": source,
|
|
816
847
|
"collection": hit.get("collection", ""),
|
|
817
848
|
"timestamp": hit.get("timestamp", ""),
|
|
849
|
+
"arena": hit.get("arena", ""),
|
|
818
850
|
})
|
|
819
851
|
return results
|
|
820
852
|
except Exception as e:
|
|
@@ -825,12 +857,19 @@ def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
|
|
|
825
857
|
# L6: Document Store Search
|
|
826
858
|
L6_URL = os.environ.get("PME_L6_URL", "http://localhost:8037")
|
|
827
859
|
|
|
828
|
-
def search_l6_documents(query: str, limit: int = 6) -> List[Dict]:
|
|
829
|
-
"""Search L6 Document Store (research, legal, financial, project docs).
|
|
860
|
+
def search_l6_documents(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
|
|
861
|
+
"""Search L6 Document Store (research, legal, financial, project docs).
|
|
862
|
+
|
|
863
|
+
arena (optional): forwarded to L6 — L6 already supports arena
|
|
864
|
+
natively (see l6-document-store.py search_vector / search_fts).
|
|
865
|
+
"""
|
|
830
866
|
try:
|
|
867
|
+
params: dict = {"q": query, "method": "hybrid", "limit": limit, "rerank": "true"}
|
|
868
|
+
if arena:
|
|
869
|
+
params["arena"] = arena
|
|
831
870
|
resp = requests.get(
|
|
832
871
|
f"{L6_URL}/search",
|
|
833
|
-
params=
|
|
872
|
+
params=params,
|
|
834
873
|
timeout=10,
|
|
835
874
|
)
|
|
836
875
|
if resp.status_code != 200:
|
|
@@ -875,13 +914,19 @@ def search_l6_documents(query: str, limit: int = 6) -> List[Dict]:
|
|
|
875
914
|
return []
|
|
876
915
|
|
|
877
916
|
|
|
878
|
-
def sequential_hybridrag_search(query: str, limit: int = 16) -> List[Dict]:
|
|
879
|
-
"""Main HybridRAG processing: L0 BM25 → L1 System Files → L2 HybridRAG (L3 Graph + L4 Vector + L5 Comms + L6 Docs).
|
|
917
|
+
def sequential_hybridrag_search(query: str, limit: int = 16, arena: str = None) -> List[Dict]:
|
|
918
|
+
"""Main HybridRAG processing: L0 BM25 → L1 System Files → L2 HybridRAG (L3 Graph + L4 Vector + L5 Comms + L6 Docs).
|
|
919
|
+
|
|
920
|
+
arena (optional): tenant scope. Forwarded to L0 (path-prefix
|
|
921
|
+
filter), L5 (Milvus dynamic-field filter), L6 (native arena).
|
|
922
|
+
L4 vector and L3 graph don't yet support native arena filtering;
|
|
923
|
+
the compat shim post-filter catches those before they leak out.
|
|
924
|
+
"""
|
|
880
925
|
start_time = time.time()
|
|
881
|
-
log.info(f"Starting sequential HybridRAG search for: '{query}'")
|
|
926
|
+
log.info(f"Starting sequential HybridRAG search for: '{query}' arena={arena!r}")
|
|
882
927
|
|
|
883
928
|
# L0: BM25 workspace memory (keyword search — complements semantic layers)
|
|
884
|
-
l0_results = search_l0_bm25(query, limit=6)
|
|
929
|
+
l0_results = search_l0_bm25(query, limit=6, arena=arena)
|
|
885
930
|
log.info(f"L0 BM25 workspace: {len(l0_results)} results")
|
|
886
931
|
|
|
887
932
|
# L1: System Files (HIGHEST PRIORITY)
|
|
@@ -902,11 +947,11 @@ def sequential_hybridrag_search(query: str, limit: int = 16) -> List[Dict]:
|
|
|
902
947
|
log.info(f"L4 Vector search: {len(vector_results)} results (HyDE={'on' if hyde_query != query else 'off'})")
|
|
903
948
|
|
|
904
949
|
# L5: Communications Context (emails, chats, calendar) — also use HyDE
|
|
905
|
-
l5_results = search_l5_communications(hyde_query, limit=6)
|
|
950
|
+
l5_results = search_l5_communications(hyde_query, limit=6, arena=arena)
|
|
906
951
|
log.info(f"L5 Communications: {len(l5_results)} results")
|
|
907
952
|
|
|
908
953
|
# L6: Document Store (research, legal, financial, project docs)
|
|
909
|
-
l6_results = search_l6_documents(hyde_query, limit=6)
|
|
954
|
+
l6_results = search_l6_documents(hyde_query, limit=6, arena=arena)
|
|
910
955
|
log.info(f"L6 Documents: {len(l6_results)} results")
|
|
911
956
|
|
|
912
957
|
# L2: HybridRAG fusion (combines all layers with L1 priority)
|
|
@@ -966,10 +1011,11 @@ async def search_endpoint(request: Request) -> dict:
|
|
|
966
1011
|
body = await request.json()
|
|
967
1012
|
query = body.get("query", "")
|
|
968
1013
|
limit = body.get("limit", 16)
|
|
1014
|
+
arena = body.get("arena") or None
|
|
969
1015
|
if not query:
|
|
970
1016
|
raise HTTPException(status_code=400, detail="query is required")
|
|
971
1017
|
|
|
972
|
-
results = sequential_hybridrag_search(query, limit=limit)
|
|
1018
|
+
results = sequential_hybridrag_search(query, limit=limit, arena=arena)
|
|
973
1019
|
|
|
974
1020
|
# Also return raw graph entities for context enrichment
|
|
975
1021
|
entities = extract_query_entities(query)
|
|
@@ -449,8 +449,13 @@ def index_memory(client):
|
|
|
449
449
|
|
|
450
450
|
# --- Search ---
|
|
451
451
|
|
|
452
|
-
def search(query: str, collection: str = None, limit: int = 10):
|
|
453
|
-
"""Search across collections.
|
|
452
|
+
def search(query: str, collection: str = None, limit: int = 10, arena: str = None):
|
|
453
|
+
"""Search across collections.
|
|
454
|
+
|
|
455
|
+
arena (optional): when set, filter to records whose arena dynamic
|
|
456
|
+
field matches. Records indexed before arena was added carry no
|
|
457
|
+
arena field — those are dropped under multi-tenant safety.
|
|
458
|
+
"""
|
|
454
459
|
client = get_client()
|
|
455
460
|
vectors = embed_texts([query])
|
|
456
461
|
if not vectors or all(v == 0.0 for v in vectors[0]):
|
|
@@ -460,6 +465,12 @@ def search(query: str, collection: str = None, limit: int = 10):
|
|
|
460
465
|
collections = [collection] if collection else ["chats", "emails", "contacts", "memory"]
|
|
461
466
|
all_results = []
|
|
462
467
|
|
|
468
|
+
filter_expr = ""
|
|
469
|
+
if arena:
|
|
470
|
+
# Escape double quotes; Milvus filter syntax for dynamic fields.
|
|
471
|
+
safe = str(arena).replace('"', '\\"')
|
|
472
|
+
filter_expr = f'arena == "{safe}"'
|
|
473
|
+
|
|
463
474
|
for coll in collections:
|
|
464
475
|
if not client.has_collection(coll):
|
|
465
476
|
continue
|
|
@@ -468,12 +479,14 @@ def search(query: str, collection: str = None, limit: int = 10):
|
|
|
468
479
|
collection_name=coll,
|
|
469
480
|
data=[vectors[0]],
|
|
470
481
|
limit=limit,
|
|
471
|
-
|
|
482
|
+
filter=filter_expr,
|
|
483
|
+
output_fields=["text", "source", "channel", "contact", "timestamp", "arena"],
|
|
472
484
|
)
|
|
473
485
|
for hits in results:
|
|
474
486
|
for hit in hits:
|
|
475
487
|
entity = hit.get("entity", {})
|
|
476
488
|
all_results.append({
|
|
489
|
+
"id": hit.get("id", ""),
|
|
477
490
|
"collection": coll,
|
|
478
491
|
"score": round(hit.get("distance", 0), 4),
|
|
479
492
|
"text": entity.get("text", ""),
|
|
@@ -481,6 +494,7 @@ def search(query: str, collection: str = None, limit: int = 10):
|
|
|
481
494
|
"channel": entity.get("channel", ""),
|
|
482
495
|
"contact": entity.get("contact", ""),
|
|
483
496
|
"timestamp": entity.get("timestamp", ""),
|
|
497
|
+
"arena": entity.get("arena", ""),
|
|
484
498
|
})
|
|
485
499
|
except Exception as e:
|
|
486
500
|
print(f" Search error in {coll}: {e}")
|
|
@@ -547,8 +561,9 @@ def serve(port=8034):
|
|
|
547
561
|
return health()
|
|
548
562
|
|
|
549
563
|
@api.get("/search")
|
|
550
|
-
def api_search(q: str = Query(...), collection: str = None, limit: int = 10
|
|
551
|
-
|
|
564
|
+
def api_search(q: str = Query(...), collection: str = None, limit: int = 10,
|
|
565
|
+
arena: str = None):
|
|
566
|
+
results = search(q, collection=collection, limit=limit, arena=arena)
|
|
552
567
|
return {"query": q, "results": results, "count": len(results)}
|
|
553
568
|
|
|
554
569
|
@api.get("/stats")
|
|
@@ -618,6 +633,10 @@ def serve(port=8034):
|
|
|
618
633
|
"channel": (r.get("channel") or "")[:64],
|
|
619
634
|
"contact": (r.get("contact") or "")[:256],
|
|
620
635
|
"timestamp": (r.get("timestamp") or _now)[:32],
|
|
636
|
+
# arena lands in the dynamic-field section of the
|
|
637
|
+
# collection (enable_dynamic_field=True). Filterable
|
|
638
|
+
# via `arena == "..."` in /search.
|
|
639
|
+
"arena": (r.get("arena") or "general")[:64],
|
|
621
640
|
})
|
|
622
641
|
t1 = _time.time()
|
|
623
642
|
if rows:
|
|
@@ -94,35 +94,13 @@ log = logging.getLogger("l6-document-store")
|
|
|
94
94
|
_embed_client = httpx.Client(timeout=60)
|
|
95
95
|
|
|
96
96
|
def embed_text(text: str) -> List[float]:
|
|
97
|
-
"""
|
|
98
|
-
|
|
99
|
-
try:
|
|
100
|
-
resp = _embed_client.post(NV_EMBED_URL, json={"input": text[:4000]})
|
|
101
|
-
resp.raise_for_status()
|
|
102
|
-
return resp.json()["data"][0]["embedding"]
|
|
103
|
-
except Exception as e:
|
|
104
|
-
log.warning(f"NV-Embed-v2 failed, falling back to Ollama: {e}")
|
|
105
|
-
|
|
106
|
-
# Ollama fallback
|
|
107
|
-
resp = _embed_client.post(
|
|
108
|
-
f"{OLLAMA_URL}/api/embeddings",
|
|
109
|
-
json={"model": EMBED_MODEL, "prompt": text[:8000]},
|
|
110
|
-
)
|
|
111
|
-
resp.raise_for_status()
|
|
112
|
-
return resp.json()["embedding"]
|
|
97
|
+
"""Single-text embed via _embed_post (OpenAI-compat first, lambda-gateway fallback)."""
|
|
98
|
+
return _embed_post([text[:8000]])[0]
|
|
113
99
|
|
|
114
100
|
|
|
115
101
|
def embed_batch(texts: List[str]) -> List[List[float]]:
|
|
116
|
-
"""
|
|
117
|
-
|
|
118
|
-
try:
|
|
119
|
-
resp = _embed_client.post(NV_EMBED_URL, json={"input": [t[:4000] for t in texts]})
|
|
120
|
-
resp.raise_for_status()
|
|
121
|
-
return [d["embedding"] for d in resp.json()["data"]]
|
|
122
|
-
except Exception as e:
|
|
123
|
-
log.warning(f"NV-Embed-v2 batch failed, falling back to sequential: {e}")
|
|
124
|
-
|
|
125
|
-
return [embed_text(t) for t in texts]
|
|
102
|
+
"""Batched embed via _embed_post."""
|
|
103
|
+
return _embed_post([t[:8000] for t in texts])
|
|
126
104
|
|
|
127
105
|
# ---------------------------------------------------------------------------
|
|
128
106
|
# Cross-Encoder Reranker
|