@pentatonic-ai/ai-agent-sdk 0.7.3 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.7.3",
3
+ "version": "0.7.4",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -204,8 +204,12 @@ async def _index_l4(records: list[dict[str, Any]]) -> int:
204
204
  return 0
205
205
 
206
206
 
207
- async def _index_l5(records: list[dict[str, Any]]) -> int:
208
- """Index records into the L5 Milvus comms layer (chats collection)."""
207
+ async def _index_l5(records: list[dict[str, Any]], arena: str = "general") -> int:
208
+ """Index records into the L5 Milvus comms layer (chats collection).
209
+
210
+ arena is forwarded as a Milvus dynamic field so /search can filter
211
+ by arena natively (vs the shim's defence-in-depth post-filter).
212
+ """
209
213
  payload = {
210
214
  "collection": "chats",
211
215
  "records": [
@@ -215,6 +219,7 @@ async def _index_l5(records: list[dict[str, Any]]) -> int:
215
219
  "source": (r.get("metadata") or {}).get("source", "shim"),
216
220
  "channel": "pentatonic-memory",
217
221
  "contact": (r.get("metadata") or {}).get("user", ""),
222
+ "arena": (r.get("metadata") or {}).get("arena") or arena,
218
223
  }
219
224
  for r in records
220
225
  ],
@@ -369,7 +374,7 @@ async def store(req: StoreRequest):
369
374
  import asyncio
370
375
  l4_count, l5_count, l6_count, l2_internal = await asyncio.gather(
371
376
  _index_l4([record]),
372
- _index_l5([record]),
377
+ _index_l5([record], arena=arena),
373
378
  _index_l6([record], arena=arena),
374
379
  _index_l2_internal([record], arena=arena),
375
380
  )
@@ -414,7 +419,7 @@ async def store_batch(req: StoreBatchRequest):
414
419
  import asyncio
415
420
  l4_count, l5_count, l6_count, l2_internal = await asyncio.gather(
416
421
  _index_l4(normalised),
417
- _index_l5(normalised),
422
+ _index_l5(normalised, arena=req.arena or "general"),
418
423
  _index_l6(normalised, arena=req.arena or "general"),
419
424
  _index_l2_internal(normalised, arena=req.arena or "general"),
420
425
  )
@@ -633,9 +638,12 @@ async def search(req: SearchRequest):
633
638
  out_results = _apply_metadata_filters(out_results, req)
634
639
  return {"results": out_results[: req.limit or 10]}
635
640
  try:
641
+ get_params: dict[str, Any] = {"q": req.query, "limit": _search_overfetch(req)}
642
+ if req.arena:
643
+ get_params["arena"] = req.arena
636
644
  r = await _client().get(
637
645
  f"{L2_PROXY_URL}/search",
638
- params={"q": req.query, "limit": _search_overfetch(req)},
646
+ params=get_params,
639
647
  timeout=30.0,
640
648
  )
641
649
  r.raise_for_status()
@@ -643,10 +651,16 @@ async def search(req: SearchRequest):
643
651
  except Exception as exc:
644
652
  last_err = exc
645
653
  try:
654
+ post_body: dict[str, Any] = {
655
+ "query": req.query,
656
+ "limit": _search_overfetch(req),
657
+ "min_score": req.min_score or 0.001,
658
+ }
659
+ if req.arena:
660
+ post_body["arena"] = req.arena
646
661
  r = await _client().post(
647
662
  f"{L2_PROXY_URL}/v1/search",
648
- json={"query": req.query, "limit": _search_overfetch(req),
649
- "min_score": req.min_score or 0.001},
663
+ json=post_body,
650
664
  timeout=30.0,
651
665
  )
652
666
  r.raise_for_status()
@@ -719,12 +719,17 @@ L0_MEMORY_DB = Path(os.environ.get(
719
719
  str(Path.home() / ".pentatonic" / "memory" / "main.sqlite"),
720
720
  ))
721
721
 
722
- def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
722
+ def search_l0_bm25(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
723
723
  """Search native BM25 index over workspace memory files.
724
-
724
+
725
725
  Covers chunks from daily notes, memory files, people profiles,
726
726
  infrastructure docs, project files — corpus that L3-L6 don't index.
727
727
  Sub-millisecond local SQLite reads, zero network overhead.
728
+
729
+ arena (optional): when set, filter to paths under bench/<arena>/.
730
+ Records stored via the compat shim land under that prefix per
731
+ _stash_all_keys; this is the L0 path-based equivalent of the
732
+ arena dynamic-field filter on L5/L6.
728
733
  """
729
734
  if not L0_MEMORY_DB.exists():
730
735
  return []
@@ -741,16 +746,21 @@ def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
741
746
 
742
747
  conn = sqlite3.connect(str(L0_MEMORY_DB), timeout=2)
743
748
  conn.execute("PRAGMA journal_mode=WAL")
744
- rows = conn.execute("""
749
+ sql = """
745
750
  SELECT path, text, bm25(chunks_fts) as rank
746
751
  FROM chunks_fts
747
752
  WHERE chunks_fts MATCH ?
748
753
  AND path NOT LIKE '%/snapshots/%'
749
754
  AND path NOT LIKE '%/archive/%'
750
755
  AND path NOT LIKE '%-backup-%'
751
- ORDER BY rank ASC
752
- LIMIT ?
753
- """, (fts_query, limit * 2)).fetchall()
756
+ """
757
+ params: list = [fts_query]
758
+ if arena:
759
+ sql += " AND path LIKE ?"
760
+ params.append(f"bench/{arena}/%")
761
+ sql += " ORDER BY rank ASC LIMIT ?"
762
+ params.append(limit * 2)
763
+ rows = conn.execute(sql, params).fetchall()
754
764
  conn.close()
755
765
 
756
766
  results = []
@@ -761,12 +771,20 @@ def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
761
771
  seen_paths.add(path)
762
772
  relevance = -rank if rank < 0 else 0.001
763
773
  score = min(relevance / (1 + relevance) * 0.85, 0.75)
774
+ # Parse arena from path (bench/<arena>/...) so downstream
775
+ # consumers can read it directly without parsing again.
776
+ row_arena = ""
777
+ if path.startswith("bench/"):
778
+ parts = path.split("/", 2)
779
+ if len(parts) >= 3:
780
+ row_arena = parts[1]
764
781
  results.append({
765
782
  "path": f"L0/{path}",
766
783
  "snippet": text[:500],
767
784
  "score": round(score, 4),
768
785
  "layer": "L0_workspace_bm25",
769
786
  "source": path,
787
+ "arena": row_arena,
770
788
  })
771
789
  if len(results) >= limit:
772
790
  break
@@ -782,12 +800,20 @@ def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
782
800
 
783
801
  L5_API_URL = os.environ.get("PME_L5_URL", "http://127.0.0.1:8034")
784
802
 
785
- def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
786
- """Search L5 Communications Context via L5 API (emails, chats, calendar)."""
803
+ def search_l5_communications(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
804
+ """Search L5 Communications Context via L5 API (emails, chats, calendar).
805
+
806
+ arena (optional): forwarded to L5; filters Milvus by the arena
807
+ dynamic field. Records id is included in the result so callers
808
+ can attach metadata via the shim's _META_CACHE.
809
+ """
787
810
  try:
811
+ params: dict = {"q": query, "limit": limit}
812
+ if arena:
813
+ params["arena"] = arena
788
814
  resp = requests.get(
789
815
  f"{L5_API_URL}/search",
790
- params={"q": query, "limit": limit},
816
+ params=params,
791
817
  timeout=10,
792
818
  )
793
819
  if resp.status_code != 200:
@@ -804,10 +830,15 @@ def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
804
830
  continue # skip low relevance
805
831
  contact = hit.get("contact", "")
806
832
  channel = hit.get("channel", "")
807
- path_label = f"L5/{source}"
808
- if contact:
833
+ hit_id = hit.get("id", "")
834
+ # Use record id as path label so the shim can attach
835
+ # metadata via _META_CACHE; falls back to source label
836
+ # for legacy records that have no id.
837
+ path_label = hit_id or f"L5/{source}"
838
+ if not hit_id and contact:
809
839
  path_label = f"L5/{channel}/{contact}"
810
840
  results.append({
841
+ "id": hit_id,
811
842
  "path": path_label,
812
843
  "snippet": hit.get("text", "")[:500],
813
844
  "score": scaled_score,
@@ -815,6 +846,7 @@ def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
815
846
  "source": source,
816
847
  "collection": hit.get("collection", ""),
817
848
  "timestamp": hit.get("timestamp", ""),
849
+ "arena": hit.get("arena", ""),
818
850
  })
819
851
  return results
820
852
  except Exception as e:
@@ -825,12 +857,19 @@ def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
825
857
  # L6: Document Store Search
826
858
  L6_URL = os.environ.get("PME_L6_URL", "http://localhost:8037")
827
859
 
828
- def search_l6_documents(query: str, limit: int = 6) -> List[Dict]:
829
- """Search L6 Document Store (research, legal, financial, project docs)."""
860
+ def search_l6_documents(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
861
+ """Search L6 Document Store (research, legal, financial, project docs).
862
+
863
+ arena (optional): forwarded to L6 — L6 already supports arena
864
+ natively (see l6-document-store.py search_vector / search_fts).
865
+ """
830
866
  try:
867
+ params: dict = {"q": query, "method": "hybrid", "limit": limit, "rerank": "true"}
868
+ if arena:
869
+ params["arena"] = arena
831
870
  resp = requests.get(
832
871
  f"{L6_URL}/search",
833
- params={"q": query, "method": "hybrid", "limit": limit, "rerank": "true"},
872
+ params=params,
834
873
  timeout=10,
835
874
  )
836
875
  if resp.status_code != 200:
@@ -875,13 +914,19 @@ def search_l6_documents(query: str, limit: int = 6) -> List[Dict]:
875
914
  return []
876
915
 
877
916
 
878
- def sequential_hybridrag_search(query: str, limit: int = 16) -> List[Dict]:
879
- """Main HybridRAG processing: L0 BM25 → L1 System Files → L2 HybridRAG (L3 Graph + L4 Vector + L5 Comms + L6 Docs)."""
917
+ def sequential_hybridrag_search(query: str, limit: int = 16, arena: str = None) -> List[Dict]:
918
+ """Main HybridRAG processing: L0 BM25 → L1 System Files → L2 HybridRAG (L3 Graph + L4 Vector + L5 Comms + L6 Docs).
919
+
920
+ arena (optional): tenant scope. Forwarded to L0 (path-prefix
921
+ filter), L5 (Milvus dynamic-field filter), L6 (native arena).
922
+ L4 vector and L3 graph don't yet support native arena filtering;
923
+ the compat shim post-filter catches those before they leak out.
924
+ """
880
925
  start_time = time.time()
881
- log.info(f"Starting sequential HybridRAG search for: '{query}'")
926
+ log.info(f"Starting sequential HybridRAG search for: '{query}' arena={arena!r}")
882
927
 
883
928
  # L0: BM25 workspace memory (keyword search — complements semantic layers)
884
- l0_results = search_l0_bm25(query, limit=6)
929
+ l0_results = search_l0_bm25(query, limit=6, arena=arena)
885
930
  log.info(f"L0 BM25 workspace: {len(l0_results)} results")
886
931
 
887
932
  # L1: System Files (HIGHEST PRIORITY)
@@ -902,11 +947,11 @@ def sequential_hybridrag_search(query: str, limit: int = 16) -> List[Dict]:
902
947
  log.info(f"L4 Vector search: {len(vector_results)} results (HyDE={'on' if hyde_query != query else 'off'})")
903
948
 
904
949
  # L5: Communications Context (emails, chats, calendar) — also use HyDE
905
- l5_results = search_l5_communications(hyde_query, limit=6)
950
+ l5_results = search_l5_communications(hyde_query, limit=6, arena=arena)
906
951
  log.info(f"L5 Communications: {len(l5_results)} results")
907
952
 
908
953
  # L6: Document Store (research, legal, financial, project docs)
909
- l6_results = search_l6_documents(hyde_query, limit=6)
954
+ l6_results = search_l6_documents(hyde_query, limit=6, arena=arena)
910
955
  log.info(f"L6 Documents: {len(l6_results)} results")
911
956
 
912
957
  # L2: HybridRAG fusion (combines all layers with L1 priority)
@@ -966,10 +1011,11 @@ async def search_endpoint(request: Request) -> dict:
966
1011
  body = await request.json()
967
1012
  query = body.get("query", "")
968
1013
  limit = body.get("limit", 16)
1014
+ arena = body.get("arena") or None
969
1015
  if not query:
970
1016
  raise HTTPException(status_code=400, detail="query is required")
971
1017
 
972
- results = sequential_hybridrag_search(query, limit=limit)
1018
+ results = sequential_hybridrag_search(query, limit=limit, arena=arena)
973
1019
 
974
1020
  # Also return raw graph entities for context enrichment
975
1021
  entities = extract_query_entities(query)
@@ -449,8 +449,13 @@ def index_memory(client):
449
449
 
450
450
  # --- Search ---
451
451
 
452
- def search(query: str, collection: str = None, limit: int = 10):
453
- """Search across collections."""
452
+ def search(query: str, collection: str = None, limit: int = 10, arena: str = None):
453
+ """Search across collections.
454
+
455
+ arena (optional): when set, filter to records whose arena dynamic
456
+ field matches. Records indexed before arena was added carry no
457
+ arena field — those are dropped under multi-tenant safety.
458
+ """
454
459
  client = get_client()
455
460
  vectors = embed_texts([query])
456
461
  if not vectors or all(v == 0.0 for v in vectors[0]):
@@ -460,6 +465,12 @@ def search(query: str, collection: str = None, limit: int = 10):
460
465
  collections = [collection] if collection else ["chats", "emails", "contacts", "memory"]
461
466
  all_results = []
462
467
 
468
+ filter_expr = ""
469
+ if arena:
470
+ # Escape double quotes; Milvus filter syntax for dynamic fields.
471
+ safe = str(arena).replace('"', '\\"')
472
+ filter_expr = f'arena == "{safe}"'
473
+
463
474
  for coll in collections:
464
475
  if not client.has_collection(coll):
465
476
  continue
@@ -468,12 +479,14 @@ def search(query: str, collection: str = None, limit: int = 10):
468
479
  collection_name=coll,
469
480
  data=[vectors[0]],
470
481
  limit=limit,
471
- output_fields=["text", "source", "channel", "contact", "timestamp"],
482
+ filter=filter_expr,
483
+ output_fields=["text", "source", "channel", "contact", "timestamp", "arena"],
472
484
  )
473
485
  for hits in results:
474
486
  for hit in hits:
475
487
  entity = hit.get("entity", {})
476
488
  all_results.append({
489
+ "id": hit.get("id", ""),
477
490
  "collection": coll,
478
491
  "score": round(hit.get("distance", 0), 4),
479
492
  "text": entity.get("text", ""),
@@ -481,6 +494,7 @@ def search(query: str, collection: str = None, limit: int = 10):
481
494
  "channel": entity.get("channel", ""),
482
495
  "contact": entity.get("contact", ""),
483
496
  "timestamp": entity.get("timestamp", ""),
497
+ "arena": entity.get("arena", ""),
484
498
  })
485
499
  except Exception as e:
486
500
  print(f" Search error in {coll}: {e}")
@@ -547,8 +561,9 @@ def serve(port=8034):
547
561
  return health()
548
562
 
549
563
  @api.get("/search")
550
- def api_search(q: str = Query(...), collection: str = None, limit: int = 10):
551
- results = search(q, collection=collection, limit=limit)
564
+ def api_search(q: str = Query(...), collection: str = None, limit: int = 10,
565
+ arena: str = None):
566
+ results = search(q, collection=collection, limit=limit, arena=arena)
552
567
  return {"query": q, "results": results, "count": len(results)}
553
568
 
554
569
  @api.get("/stats")
@@ -618,6 +633,10 @@ def serve(port=8034):
618
633
  "channel": (r.get("channel") or "")[:64],
619
634
  "contact": (r.get("contact") or "")[:256],
620
635
  "timestamp": (r.get("timestamp") or _now)[:32],
636
+ # arena lands in the dynamic-field section of the
637
+ # collection (enable_dynamic_field=True). Filterable
638
+ # via `arena == "..."` in /search.
639
+ "arena": (r.get("arena") or "general")[:64],
621
640
  })
622
641
  t1 = _time.time()
623
642
  if rows:
@@ -94,35 +94,13 @@ log = logging.getLogger("l6-document-store")
94
94
  _embed_client = httpx.Client(timeout=60)
95
95
 
96
96
  def embed_text(text: str) -> List[float]:
97
- """Get embedding NV-Embed-v2 primary, Ollama fallback."""
98
- if NV_EMBED_ENABLED:
99
- try:
100
- resp = _embed_client.post(NV_EMBED_URL, json={"input": text[:4000]})
101
- resp.raise_for_status()
102
- return resp.json()["data"][0]["embedding"]
103
- except Exception as e:
104
- log.warning(f"NV-Embed-v2 failed, falling back to Ollama: {e}")
105
-
106
- # Ollama fallback
107
- resp = _embed_client.post(
108
- f"{OLLAMA_URL}/api/embeddings",
109
- json={"model": EMBED_MODEL, "prompt": text[:8000]},
110
- )
111
- resp.raise_for_status()
112
- return resp.json()["embedding"]
97
+ """Single-text embed via _embed_post (OpenAI-compat first, lambda-gateway fallback)."""
98
+ return _embed_post([text[:8000]])[0]
113
99
 
114
100
 
115
101
  def embed_batch(texts: List[str]) -> List[List[float]]:
116
- """Embed a batch of texts — NV-Embed-v2 supports native batching."""
117
- if NV_EMBED_ENABLED:
118
- try:
119
- resp = _embed_client.post(NV_EMBED_URL, json={"input": [t[:4000] for t in texts]})
120
- resp.raise_for_status()
121
- return [d["embedding"] for d in resp.json()["data"]]
122
- except Exception as e:
123
- log.warning(f"NV-Embed-v2 batch failed, falling back to sequential: {e}")
124
-
125
- return [embed_text(t) for t in texts]
102
+ """Batched embed via _embed_post."""
103
+ return _embed_post([t[:8000] for t in texts])
126
104
 
127
105
  # ---------------------------------------------------------------------------
128
106
  # Cross-Encoder Reranker