@pentatonic-ai/ai-agent-sdk 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.7.1",
3
+ "version": "0.7.3",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -85,6 +85,19 @@ class SearchRequest(BaseModel):
85
85
  query: str
86
86
  limit: Optional[int] = 10
87
87
  min_score: Optional[float] = 0.001
88
+ # Tenant scope. Required for multi-tenant deployments. Forwarded to
89
+ # layers that support arena filtering natively (L6); applied as a
90
+ # post-filter on the shim for layers that don't yet (L2, L4, L5).
91
+ # When unset, search is global — same behaviour as v0.7.x; safe for
92
+ # single-tenant deployments. Multi-tenant callers MUST set this.
93
+ arena: Optional[str] = None
94
+ # Arbitrary metadata equality filters, applied as a post-filter on
95
+ # the shim. Useful for `kind`, `layer_type`, `source_repo`, etc.
96
+ # Keys not present on a result's metadata are treated as no-match.
97
+ # Each pair is exact string equality. Engine doesn't currently
98
+ # forward these to underlying stores, so over-fetch happens; the
99
+ # shim trims to the requested limit after filtering.
100
+ metadata_filter: Optional[dict[str, Any]] = None
88
101
 
89
102
 
90
103
  class ForgetRequest(BaseModel):
@@ -424,6 +437,51 @@ async def store_batch(req: StoreBatchRequest):
424
437
  }
425
438
 
426
439
 
440
+ def _apply_metadata_filters(results: list[dict[str, Any]], req: SearchRequest) -> list[dict[str, Any]]:
441
+ """Post-filter results by arena + arbitrary metadata equality.
442
+
443
+ Many layer searches don't yet honour arena/metadata at the storage
444
+ level, so the shim enforces tenant isolation here as defence in
445
+ depth. Even if the underlying layer leaks across arenas, the shim
446
+ drops cross-tenant rows before returning.
447
+ """
448
+ arena = req.arena
449
+ extra = req.metadata_filter or {}
450
+ if not arena and not extra:
451
+ return results
452
+ out: list[dict[str, Any]] = []
453
+ for item in results:
454
+ meta = item.get("metadata") or {}
455
+ if arena:
456
+ row_arena = meta.get("arena") or item.get("arena")
457
+ if row_arena and row_arena != arena:
458
+ continue
459
+ # If row has no arena tag at all, drop on multi-tenant
460
+ # safety: a row without arena predates the multi-tenant
461
+ # plumbing and could belong to anyone.
462
+ if arena and not row_arena:
463
+ continue
464
+ ok = True
465
+ for k, v in extra.items():
466
+ if str(meta.get(k, "")) != str(v):
467
+ ok = False
468
+ break
469
+ if ok:
470
+ out.append(item)
471
+ return out
472
+
473
+
474
+ def _search_overfetch(req: SearchRequest) -> int:
475
+ """Decide how many results to over-fetch from layers.
476
+
477
+ Post-filtering can drop many rows; we ask layers for more than the
478
+ user's limit so we have headroom after filtering. 5x is a balance
479
+ between accuracy and latency.
480
+ """
481
+ base = req.limit or 10
482
+ return base * 5 if (req.arena or req.metadata_filter) else base * 3
483
+
484
+
427
485
  @app.post("/search")
428
486
  async def search(req: SearchRequest):
429
487
  """
@@ -431,6 +489,12 @@ async def search(req: SearchRequest):
431
489
  queries L0 BM25, L4 vec, L5 Milvus, L6 doc-store in parallel and fuses
432
490
  the results with Reciprocal Rank Fusion. L3 KG adds entity-aware
433
491
  boosting for graph queries.
492
+
493
+ Multi-tenancy: pass `arena` to scope results to a single tenant.
494
+ Underlying layers may or may not honour arena natively (L6 does;
495
+ L2/L4/L5 don't yet — engine TODO); the shim applies arena as a
496
+ post-filter regardless, so cross-tenant leakage is prevented even
497
+ when a layer is non-compliant.
434
498
  """
435
499
  if not req.query:
436
500
  return {"results": []}
@@ -452,10 +516,19 @@ async def search(req: SearchRequest):
452
516
  import asyncio
453
517
  async def _q_l6(query: str):
454
518
  try:
519
+ params: dict[str, Any] = {
520
+ "q": query,
521
+ "limit": _search_overfetch(req),
522
+ "method": "hybrid",
523
+ }
524
+ if req.arena:
525
+ # L6 supports arena natively (l6-document-store.py:837).
526
+ # Forward it so the underlying Milvus query and FTS
527
+ # query both filter to this tenant before returning.
528
+ params["arena"] = req.arena
455
529
  r = await _client().get(
456
530
  f"{L6_DOC_URL}/search",
457
- params={"q": query, "limit": (req.limit or 10) * 3,
458
- "method": "hybrid"},
531
+ params=params,
459
532
  timeout=30.0,
460
533
  )
461
534
  r.raise_for_status()
@@ -534,21 +607,35 @@ async def search(req: SearchRequest):
534
607
  if m:
535
608
  attached_meta = m
536
609
  break
610
+ # Some layers surface arena/kind/layer_type as top-level
611
+ # fields on the raw item rather than nested under metadata.
612
+ # Pull them through into the formatted metadata so the
613
+ # post-filter can see them even on cache miss (e.g., after
614
+ # a compat container restart wipes _META_CACHE).
615
+ raw_top_level = {
616
+ k: item[k]
617
+ for k in ("arena", "kind", "layer_type", "doc_type", "source_file")
618
+ if item.get(k)
619
+ }
620
+ merged_meta = {**raw_top_level, **(attached_meta or item.get("metadata") or {})}
537
621
  out_results.append({
538
622
  "id": key,
539
623
  "content": item.get("text") or item.get("content") or item.get("snippet") or "",
540
- "metadata": attached_meta or item.get("metadata") or {},
624
+ "metadata": merged_meta,
541
625
  "similarity": float(rrf_scores[key]),
542
626
  "layer_id": f"ml_{CLIENT_ID}_episodic",
543
627
  "client_id": CLIENT_ID,
544
628
  "source": item.get("source_file") or item.get("path") or "",
545
629
  "engine_layer": "+".join(sorted(set(layer_provenance.get(key, [])))),
546
630
  })
547
- return {"results": out_results}
631
+ # Defense-in-depth post-filter (arena + arbitrary metadata),
632
+ # then trim to the requested limit.
633
+ out_results = _apply_metadata_filters(out_results, req)
634
+ return {"results": out_results[: req.limit or 10]}
548
635
  try:
549
636
  r = await _client().get(
550
637
  f"{L2_PROXY_URL}/search",
551
- params={"q": req.query, "limit": req.limit or 10},
638
+ params={"q": req.query, "limit": _search_overfetch(req)},
552
639
  timeout=30.0,
553
640
  )
554
641
  r.raise_for_status()
@@ -558,7 +645,7 @@ async def search(req: SearchRequest):
558
645
  try:
559
646
  r = await _client().post(
560
647
  f"{L2_PROXY_URL}/v1/search",
561
- json={"query": req.query, "limit": req.limit or 10,
648
+ json={"query": req.query, "limit": _search_overfetch(req),
562
649
  "min_score": req.min_score or 0.001},
563
650
  timeout=30.0,
564
651
  )
@@ -567,9 +654,14 @@ async def search(req: SearchRequest):
567
654
  except Exception as exc2:
568
655
  last_err = exc2
569
656
  try:
657
+ params: dict[str, Any] = {"q": req.query, "limit": _search_overfetch(req)}
658
+ # L6 supports arena natively; forward it on the
659
+ # last-resort fallback path too.
660
+ if req.arena:
661
+ params["arena"] = req.arena
570
662
  r = await _client().get(
571
663
  f"{L6_DOC_URL}/search",
572
- params={"q": req.query, "limit": req.limit or 10},
664
+ params=params,
573
665
  timeout=10.0,
574
666
  )
575
667
  r.raise_for_status()
@@ -611,17 +703,28 @@ async def search(req: SearchRequest):
611
703
  if not chosen_id:
612
704
  chosen_id = (item.get("id") or item.get("doc_id")
613
705
  or item.get("path") or item.get("source_file") or "")
706
+ # Pull arena/kind/layer_type from raw item top-level if present
707
+ # (L6 returns these as top-level columns; cache may be empty).
708
+ raw_top_level = {
709
+ k: item[k]
710
+ for k in ("arena", "kind", "layer_type", "doc_type", "source_file")
711
+ if item.get(k)
712
+ }
713
+ merged_meta = {**raw_top_level, **(attached_meta or item.get("metadata") or {})}
614
714
  out_results.append({
615
715
  "id": chosen_id,
616
716
  "content": item.get("text") or item.get("content") or item.get("snippet") or "",
617
- "metadata": attached_meta or item.get("metadata") or {},
717
+ "metadata": merged_meta,
618
718
  "similarity": float(item.get("score") or item.get("similarity") or 0.0),
619
719
  "layer_id": f"ml_{CLIENT_ID}_episodic",
620
720
  "client_id": CLIENT_ID,
621
721
  "source": item.get("source", item.get("source_file", "")),
622
722
  "engine_layer": item.get("layer", item.get("source_layer", "")),
623
723
  })
624
- return {"results": out_results}
724
+ # Defense-in-depth post-filter (arena + arbitrary metadata) on L2/L6
725
+ # fallback paths. Same logic as the BYPASS branch above.
726
+ out_results = _apply_metadata_filters(out_results, req)
727
+ return {"results": out_results[: req.limit or 10]}
625
728
 
626
729
 
627
730
  @app.post("/forget")