npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.7.1 → 0.7.2 - Mend

@pentatonic-ai/ai-agent-sdk 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +1 -1
package/packages/memory-engine/compat/server.py +91 -7

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pentatonic-ai/ai-agent-sdk",
-  "version": "0.7.1",
+  "version": "0.7.2",
   "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
   "type": "module",
   "main": "./dist/index.cjs",

package/packages/memory-engine/compat/server.py CHANGED Viewed

@@ -85,6 +85,19 @@ class SearchRequest(BaseModel):
     query: str
     limit: Optional[int] = 10
     min_score: Optional[float] = 0.001
+    # Tenant scope. Required for multi-tenant deployments. Forwarded to
+    # layers that support arena filtering natively (L6); applied as a
+    # post-filter on the shim for layers that don't yet (L2, L4, L5).
+    # When unset, search is global — same behaviour as v0.7.x; safe for
+    # single-tenant deployments. Multi-tenant callers MUST set this.
+    arena: Optional[str] = None
+    # Arbitrary metadata equality filters, applied as a post-filter on
+    # the shim. Useful for `kind`, `layer_type`, `source_repo`, etc.
+    # Keys not present on a result's metadata are treated as no-match.
+    # Each pair is exact string equality. Engine doesn't currently
+    # forward these to underlying stores, so over-fetch happens; the
+    # shim trims to the requested limit after filtering.
+    metadata_filter: Optional[dict[str, Any]] = None
 class ForgetRequest(BaseModel):
@@ -424,6 +437,51 @@ async def store_batch(req: StoreBatchRequest):
     }
+def _apply_metadata_filters(results: list[dict[str, Any]], req: SearchRequest) -> list[dict[str, Any]]:
+    """Post-filter results by arena + arbitrary metadata equality.
+    Many layer searches don't yet honour arena/metadata at the storage
+    level, so the shim enforces tenant isolation here as defence in
+    depth. Even if the underlying layer leaks across arenas, the shim
+    drops cross-tenant rows before returning.
+    """
+    arena = req.arena
+    extra = req.metadata_filter or {}
+    if not arena and not extra:
+        return results
+    out: list[dict[str, Any]] = []
+    for item in results:
+        meta = item.get("metadata") or {}
+        if arena:
+            row_arena = meta.get("arena") or item.get("arena")
+            if row_arena and row_arena != arena:
+                continue
+            # If row has no arena tag at all, drop on multi-tenant
+            # safety: a row without arena predates the multi-tenant
+            # plumbing and could belong to anyone.
+            if arena and not row_arena:
+                continue
+        ok = True
+        for k, v in extra.items():
+            if str(meta.get(k, "")) != str(v):
+                ok = False
+                break
+        if ok:
+            out.append(item)
+    return out
+def _search_overfetch(req: SearchRequest) -> int:
+    """Decide how many results to over-fetch from layers.
+    Post-filtering can drop many rows; we ask layers for more than the
+    user's limit so we have headroom after filtering. 5x is a balance
+    between accuracy and latency.
+    """
+    base = req.limit or 10
+    return base * 5 if (req.arena or req.metadata_filter) else base * 3
 @app.post("/search")
 async def search(req: SearchRequest):
     """
@@ -431,6 +489,12 @@ async def search(req: SearchRequest):
     queries L0 BM25, L4 vec, L5 Milvus, L6 doc-store in parallel and fuses
     the results with Reciprocal Rank Fusion. L3 KG adds entity-aware
     boosting for graph queries.
+    Multi-tenancy: pass `arena` to scope results to a single tenant.
+    Underlying layers may or may not honour arena natively (L6 does;
+    L2/L4/L5 don't yet — engine TODO); the shim applies arena as a
+    post-filter regardless, so cross-tenant leakage is prevented even
+    when a layer is non-compliant.
     """
     if not req.query:
         return {"results": []}
@@ -452,10 +516,19 @@ async def search(req: SearchRequest):
         import asyncio
         async def _q_l6(query: str):
             try:
+                params: dict[str, Any] = {
+                    "q": query,
+                    "limit": _search_overfetch(req),
+                    "method": "hybrid",
+                }
+                if req.arena:
+                    # L6 supports arena natively (l6-document-store.py:837).
+                    # Forward it so the underlying Milvus query and FTS
+                    # query both filter to this tenant before returning.
+                    params["arena"] = req.arena
                 r = await _client().get(
                     f"{L6_DOC_URL}/search",
-                    params={"q": query, "limit": (req.limit or 10) * 3,
-                            "method": "hybrid"},
+                    params=params,
                     timeout=30.0,
                 )
                 r.raise_for_status()
@@ -544,11 +617,14 @@ async def search(req: SearchRequest):
                 "source": item.get("source_file") or item.get("path") or "",
                 "engine_layer": "+".join(sorted(set(layer_provenance.get(key, [])))),
             })
-        return {"results": out_results}
+        # Defense-in-depth post-filter (arena + arbitrary metadata),
+        # then trim to the requested limit.
+        out_results = _apply_metadata_filters(out_results, req)
+        return {"results": out_results[: req.limit or 10]}
     try:
         r = await _client().get(
             f"{L2_PROXY_URL}/search",
-            params={"q": req.query, "limit": req.limit or 10},
+            params={"q": req.query, "limit": _search_overfetch(req)},
             timeout=30.0,
         )
         r.raise_for_status()
@@ -558,7 +634,7 @@ async def search(req: SearchRequest):
         try:
             r = await _client().post(
                 f"{L2_PROXY_URL}/v1/search",
-                json={"query": req.query, "limit": req.limit or 10,
+                json={"query": req.query, "limit": _search_overfetch(req),
                       "min_score": req.min_score or 0.001},
                 timeout=30.0,
             )
@@ -567,9 +643,14 @@ async def search(req: SearchRequest):
         except Exception as exc2:
             last_err = exc2
             try:
+                params: dict[str, Any] = {"q": req.query, "limit": _search_overfetch(req)}
+                # L6 supports arena natively; forward it on the
+                # last-resort fallback path too.
+                if req.arena:
+                    params["arena"] = req.arena
                 r = await _client().get(
                     f"{L6_DOC_URL}/search",
-                    params={"q": req.query, "limit": req.limit or 10},
+                    params=params,
                     timeout=10.0,
                 )
                 r.raise_for_status()
@@ -621,7 +702,10 @@ async def search(req: SearchRequest):
             "source": item.get("source", item.get("source_file", "")),
             "engine_layer": item.get("layer", item.get("source_layer", "")),
         })
-    return {"results": out_results}
+    # Defense-in-depth post-filter (arena + arbitrary metadata) on L2/L6
+    # fallback paths. Same logic as the BYPASS branch above.
+    out_results = _apply_metadata_filters(out_results, req)
+    return {"results": out_results[: req.limit or 10]}
 @app.post("/forget")