@pentatonic-ai/ai-agent-sdk 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.3",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -85,6 +85,19 @@ class SearchRequest(BaseModel):
|
|
|
85
85
|
query: str
|
|
86
86
|
limit: Optional[int] = 10
|
|
87
87
|
min_score: Optional[float] = 0.001
|
|
88
|
+
# Tenant scope. Required for multi-tenant deployments. Forwarded to
|
|
89
|
+
# layers that support arena filtering natively (L6); applied as a
|
|
90
|
+
# post-filter on the shim for layers that don't yet (L2, L4, L5).
|
|
91
|
+
# When unset, search is global — same behaviour as v0.7.x; safe for
|
|
92
|
+
# single-tenant deployments. Multi-tenant callers MUST set this.
|
|
93
|
+
arena: Optional[str] = None
|
|
94
|
+
# Arbitrary metadata equality filters, applied as a post-filter on
|
|
95
|
+
# the shim. Useful for `kind`, `layer_type`, `source_repo`, etc.
|
|
96
|
+
# Keys not present on a result's metadata are treated as no-match.
|
|
97
|
+
# Each pair is exact string equality. Engine doesn't currently
|
|
98
|
+
# forward these to underlying stores, so over-fetch happens; the
|
|
99
|
+
# shim trims to the requested limit after filtering.
|
|
100
|
+
metadata_filter: Optional[dict[str, Any]] = None
|
|
88
101
|
|
|
89
102
|
|
|
90
103
|
class ForgetRequest(BaseModel):
|
|
@@ -424,6 +437,51 @@ async def store_batch(req: StoreBatchRequest):
|
|
|
424
437
|
}
|
|
425
438
|
|
|
426
439
|
|
|
440
|
+
def _apply_metadata_filters(results: list[dict[str, Any]], req: SearchRequest) -> list[dict[str, Any]]:
|
|
441
|
+
"""Post-filter results by arena + arbitrary metadata equality.
|
|
442
|
+
|
|
443
|
+
Many layer searches don't yet honour arena/metadata at the storage
|
|
444
|
+
level, so the shim enforces tenant isolation here as defence in
|
|
445
|
+
depth. Even if the underlying layer leaks across arenas, the shim
|
|
446
|
+
drops cross-tenant rows before returning.
|
|
447
|
+
"""
|
|
448
|
+
arena = req.arena
|
|
449
|
+
extra = req.metadata_filter or {}
|
|
450
|
+
if not arena and not extra:
|
|
451
|
+
return results
|
|
452
|
+
out: list[dict[str, Any]] = []
|
|
453
|
+
for item in results:
|
|
454
|
+
meta = item.get("metadata") or {}
|
|
455
|
+
if arena:
|
|
456
|
+
row_arena = meta.get("arena") or item.get("arena")
|
|
457
|
+
if row_arena and row_arena != arena:
|
|
458
|
+
continue
|
|
459
|
+
# If row has no arena tag at all, drop on multi-tenant
|
|
460
|
+
# safety: a row without arena predates the multi-tenant
|
|
461
|
+
# plumbing and could belong to anyone.
|
|
462
|
+
if arena and not row_arena:
|
|
463
|
+
continue
|
|
464
|
+
ok = True
|
|
465
|
+
for k, v in extra.items():
|
|
466
|
+
if str(meta.get(k, "")) != str(v):
|
|
467
|
+
ok = False
|
|
468
|
+
break
|
|
469
|
+
if ok:
|
|
470
|
+
out.append(item)
|
|
471
|
+
return out
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def _search_overfetch(req: SearchRequest) -> int:
|
|
475
|
+
"""Decide how many results to over-fetch from layers.
|
|
476
|
+
|
|
477
|
+
Post-filtering can drop many rows; we ask layers for more than the
|
|
478
|
+
user's limit so we have headroom after filtering. 5x is a balance
|
|
479
|
+
between accuracy and latency.
|
|
480
|
+
"""
|
|
481
|
+
base = req.limit or 10
|
|
482
|
+
return base * 5 if (req.arena or req.metadata_filter) else base * 3
|
|
483
|
+
|
|
484
|
+
|
|
427
485
|
@app.post("/search")
|
|
428
486
|
async def search(req: SearchRequest):
|
|
429
487
|
"""
|
|
@@ -431,6 +489,12 @@ async def search(req: SearchRequest):
|
|
|
431
489
|
queries L0 BM25, L4 vec, L5 Milvus, L6 doc-store in parallel and fuses
|
|
432
490
|
the results with Reciprocal Rank Fusion. L3 KG adds entity-aware
|
|
433
491
|
boosting for graph queries.
|
|
492
|
+
|
|
493
|
+
Multi-tenancy: pass `arena` to scope results to a single tenant.
|
|
494
|
+
Underlying layers may or may not honour arena natively (L6 does;
|
|
495
|
+
L2/L4/L5 don't yet — engine TODO); the shim applies arena as a
|
|
496
|
+
post-filter regardless, so cross-tenant leakage is prevented even
|
|
497
|
+
when a layer is non-compliant.
|
|
434
498
|
"""
|
|
435
499
|
if not req.query:
|
|
436
500
|
return {"results": []}
|
|
@@ -452,10 +516,19 @@ async def search(req: SearchRequest):
|
|
|
452
516
|
import asyncio
|
|
453
517
|
async def _q_l6(query: str):
|
|
454
518
|
try:
|
|
519
|
+
params: dict[str, Any] = {
|
|
520
|
+
"q": query,
|
|
521
|
+
"limit": _search_overfetch(req),
|
|
522
|
+
"method": "hybrid",
|
|
523
|
+
}
|
|
524
|
+
if req.arena:
|
|
525
|
+
# L6 supports arena natively (l6-document-store.py:837).
|
|
526
|
+
# Forward it so the underlying Milvus query and FTS
|
|
527
|
+
# query both filter to this tenant before returning.
|
|
528
|
+
params["arena"] = req.arena
|
|
455
529
|
r = await _client().get(
|
|
456
530
|
f"{L6_DOC_URL}/search",
|
|
457
|
-
params=
|
|
458
|
-
"method": "hybrid"},
|
|
531
|
+
params=params,
|
|
459
532
|
timeout=30.0,
|
|
460
533
|
)
|
|
461
534
|
r.raise_for_status()
|
|
@@ -534,21 +607,35 @@ async def search(req: SearchRequest):
|
|
|
534
607
|
if m:
|
|
535
608
|
attached_meta = m
|
|
536
609
|
break
|
|
610
|
+
# Some layers surface arena/kind/layer_type as top-level
|
|
611
|
+
# fields on the raw item rather than nested under metadata.
|
|
612
|
+
# Pull them through into the formatted metadata so the
|
|
613
|
+
# post-filter can see them even on cache miss (e.g., after
|
|
614
|
+
# a compat container restart wipes _META_CACHE).
|
|
615
|
+
raw_top_level = {
|
|
616
|
+
k: item[k]
|
|
617
|
+
for k in ("arena", "kind", "layer_type", "doc_type", "source_file")
|
|
618
|
+
if item.get(k)
|
|
619
|
+
}
|
|
620
|
+
merged_meta = {**raw_top_level, **(attached_meta or item.get("metadata") or {})}
|
|
537
621
|
out_results.append({
|
|
538
622
|
"id": key,
|
|
539
623
|
"content": item.get("text") or item.get("content") or item.get("snippet") or "",
|
|
540
|
-
"metadata":
|
|
624
|
+
"metadata": merged_meta,
|
|
541
625
|
"similarity": float(rrf_scores[key]),
|
|
542
626
|
"layer_id": f"ml_{CLIENT_ID}_episodic",
|
|
543
627
|
"client_id": CLIENT_ID,
|
|
544
628
|
"source": item.get("source_file") or item.get("path") or "",
|
|
545
629
|
"engine_layer": "+".join(sorted(set(layer_provenance.get(key, [])))),
|
|
546
630
|
})
|
|
547
|
-
|
|
631
|
+
# Defense-in-depth post-filter (arena + arbitrary metadata),
|
|
632
|
+
# then trim to the requested limit.
|
|
633
|
+
out_results = _apply_metadata_filters(out_results, req)
|
|
634
|
+
return {"results": out_results[: req.limit or 10]}
|
|
548
635
|
try:
|
|
549
636
|
r = await _client().get(
|
|
550
637
|
f"{L2_PROXY_URL}/search",
|
|
551
|
-
params={"q": req.query, "limit": req
|
|
638
|
+
params={"q": req.query, "limit": _search_overfetch(req)},
|
|
552
639
|
timeout=30.0,
|
|
553
640
|
)
|
|
554
641
|
r.raise_for_status()
|
|
@@ -558,7 +645,7 @@ async def search(req: SearchRequest):
|
|
|
558
645
|
try:
|
|
559
646
|
r = await _client().post(
|
|
560
647
|
f"{L2_PROXY_URL}/v1/search",
|
|
561
|
-
json={"query": req.query, "limit": req
|
|
648
|
+
json={"query": req.query, "limit": _search_overfetch(req),
|
|
562
649
|
"min_score": req.min_score or 0.001},
|
|
563
650
|
timeout=30.0,
|
|
564
651
|
)
|
|
@@ -567,9 +654,14 @@ async def search(req: SearchRequest):
|
|
|
567
654
|
except Exception as exc2:
|
|
568
655
|
last_err = exc2
|
|
569
656
|
try:
|
|
657
|
+
params: dict[str, Any] = {"q": req.query, "limit": _search_overfetch(req)}
|
|
658
|
+
# L6 supports arena natively; forward it on the
|
|
659
|
+
# last-resort fallback path too.
|
|
660
|
+
if req.arena:
|
|
661
|
+
params["arena"] = req.arena
|
|
570
662
|
r = await _client().get(
|
|
571
663
|
f"{L6_DOC_URL}/search",
|
|
572
|
-
params=
|
|
664
|
+
params=params,
|
|
573
665
|
timeout=10.0,
|
|
574
666
|
)
|
|
575
667
|
r.raise_for_status()
|
|
@@ -611,17 +703,28 @@ async def search(req: SearchRequest):
|
|
|
611
703
|
if not chosen_id:
|
|
612
704
|
chosen_id = (item.get("id") or item.get("doc_id")
|
|
613
705
|
or item.get("path") or item.get("source_file") or "")
|
|
706
|
+
# Pull arena/kind/layer_type from raw item top-level if present
|
|
707
|
+
# (L6 returns these as top-level columns; cache may be empty).
|
|
708
|
+
raw_top_level = {
|
|
709
|
+
k: item[k]
|
|
710
|
+
for k in ("arena", "kind", "layer_type", "doc_type", "source_file")
|
|
711
|
+
if item.get(k)
|
|
712
|
+
}
|
|
713
|
+
merged_meta = {**raw_top_level, **(attached_meta or item.get("metadata") or {})}
|
|
614
714
|
out_results.append({
|
|
615
715
|
"id": chosen_id,
|
|
616
716
|
"content": item.get("text") or item.get("content") or item.get("snippet") or "",
|
|
617
|
-
"metadata":
|
|
717
|
+
"metadata": merged_meta,
|
|
618
718
|
"similarity": float(item.get("score") or item.get("similarity") or 0.0),
|
|
619
719
|
"layer_id": f"ml_{CLIENT_ID}_episodic",
|
|
620
720
|
"client_id": CLIENT_ID,
|
|
621
721
|
"source": item.get("source", item.get("source_file", "")),
|
|
622
722
|
"engine_layer": item.get("layer", item.get("source_layer", "")),
|
|
623
723
|
})
|
|
624
|
-
|
|
724
|
+
# Defense-in-depth post-filter (arena + arbitrary metadata) on L2/L6
|
|
725
|
+
# fallback paths. Same logic as the BYPASS branch above.
|
|
726
|
+
out_results = _apply_metadata_filters(out_results, req)
|
|
727
|
+
return {"results": out_results[: req.limit or 10]}
|
|
625
728
|
|
|
626
729
|
|
|
627
730
|
@app.post("/forget")
|