npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.7.3 → 0.7.4 - Mend

@pentatonic-ai/ai-agent-sdk 0.7.3 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pentatonic-ai/ai-agent-sdk",
-  "version": "0.7.3",
+  "version": "0.7.4",
   "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
   "type": "module",
   "main": "./dist/index.cjs",

package/packages/memory-engine/compat/server.py CHANGED Viewed

@@ -204,8 +204,12 @@ async def _index_l4(records: list[dict[str, Any]]) -> int:
         return 0
-async def _index_l5(records: list[dict[str, Any]]) -> int:
-    """Index records into the L5 Milvus comms layer (chats collection)."""
+async def _index_l5(records: list[dict[str, Any]], arena: str = "general") -> int:
+    """Index records into the L5 Milvus comms layer (chats collection).
+    arena is forwarded as a Milvus dynamic field so /search can filter
+    by arena natively (vs the shim's defence-in-depth post-filter).
+    """
     payload = {
         "collection": "chats",
         "records": [
@@ -215,6 +219,7 @@ async def _index_l5(records: list[dict[str, Any]]) -> int:
                 "source": (r.get("metadata") or {}).get("source", "shim"),
                 "channel": "pentatonic-memory",
                 "contact": (r.get("metadata") or {}).get("user", ""),
+                "arena": (r.get("metadata") or {}).get("arena") or arena,
             }
             for r in records
         ],
@@ -369,7 +374,7 @@ async def store(req: StoreRequest):
     import asyncio
     l4_count, l5_count, l6_count, l2_internal = await asyncio.gather(
         _index_l4([record]),
-        _index_l5([record]),
+        _index_l5([record], arena=arena),
         _index_l6([record], arena=arena),
         _index_l2_internal([record], arena=arena),
     )
@@ -414,7 +419,7 @@ async def store_batch(req: StoreBatchRequest):
     import asyncio
     l4_count, l5_count, l6_count, l2_internal = await asyncio.gather(
         _index_l4(normalised),
-        _index_l5(normalised),
+        _index_l5(normalised, arena=req.arena or "general"),
         _index_l6(normalised, arena=req.arena or "general"),
         _index_l2_internal(normalised, arena=req.arena or "general"),
     )
@@ -633,9 +638,12 @@ async def search(req: SearchRequest):
         out_results = _apply_metadata_filters(out_results, req)
         return {"results": out_results[: req.limit or 10]}
     try:
+        get_params: dict[str, Any] = {"q": req.query, "limit": _search_overfetch(req)}
+        if req.arena:
+            get_params["arena"] = req.arena
         r = await _client().get(
             f"{L2_PROXY_URL}/search",
-            params={"q": req.query, "limit": _search_overfetch(req)},
+            params=get_params,
             timeout=30.0,
         )
         r.raise_for_status()
@@ -643,10 +651,16 @@ async def search(req: SearchRequest):
     except Exception as exc:
         last_err = exc
         try:
+            post_body: dict[str, Any] = {
+                "query": req.query,
+                "limit": _search_overfetch(req),
+                "min_score": req.min_score or 0.001,
+            }
+            if req.arena:
+                post_body["arena"] = req.arena
             r = await _client().post(
                 f"{L2_PROXY_URL}/v1/search",
-                json={"query": req.query, "limit": _search_overfetch(req),
-                      "min_score": req.min_score or 0.001},
+                json=post_body,
                 timeout=30.0,
             )
             r.raise_for_status()

package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py CHANGED Viewed

@@ -719,12 +719,17 @@ L0_MEMORY_DB = Path(os.environ.get(
     str(Path.home() / ".pentatonic" / "memory" / "main.sqlite"),
 ))
-def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
+def search_l0_bm25(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
     """Search native BM25 index over workspace memory files.
     Covers chunks from daily notes, memory files, people profiles,
     infrastructure docs, project files — corpus that L3-L6 don't index.
     Sub-millisecond local SQLite reads, zero network overhead.
+    arena (optional): when set, filter to paths under bench/<arena>/.
+    Records stored via the compat shim land under that prefix per
+    _stash_all_keys; this is the L0 path-based equivalent of the
+    arena dynamic-field filter on L5/L6.
     """
     if not L0_MEMORY_DB.exists():
         return []
@@ -741,16 +746,21 @@ def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
         conn = sqlite3.connect(str(L0_MEMORY_DB), timeout=2)
         conn.execute("PRAGMA journal_mode=WAL")
-        rows = conn.execute("""
+        sql = """
             SELECT path, text, bm25(chunks_fts) as rank
             FROM chunks_fts
             WHERE chunks_fts MATCH ?
               AND path NOT LIKE '%/snapshots/%'
               AND path NOT LIKE '%/archive/%'
               AND path NOT LIKE '%-backup-%'
-            ORDER BY rank ASC
-            LIMIT ?
-        """, (fts_query, limit * 2)).fetchall()
+        """
+        params: list = [fts_query]
+        if arena:
+            sql += " AND path LIKE ?"
+            params.append(f"bench/{arena}/%")
+        sql += " ORDER BY rank ASC LIMIT ?"
+        params.append(limit * 2)
+        rows = conn.execute(sql, params).fetchall()
         conn.close()
         results = []
@@ -761,12 +771,20 @@ def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
             seen_paths.add(path)
             relevance = -rank if rank < 0 else 0.001
             score = min(relevance / (1 + relevance) * 0.85, 0.75)
+            # Parse arena from path (bench/<arena>/...) so downstream
+            # consumers can read it directly without parsing again.
+            row_arena = ""
+            if path.startswith("bench/"):
+                parts = path.split("/", 2)
+                if len(parts) >= 3:
+                    row_arena = parts[1]
             results.append({
                 "path": f"L0/{path}",
                 "snippet": text[:500],
                 "score": round(score, 4),
                 "layer": "L0_workspace_bm25",
                 "source": path,
+                "arena": row_arena,
             })
             if len(results) >= limit:
                 break
@@ -782,12 +800,20 @@ def search_l0_bm25(query: str, limit: int = 6) -> List[Dict]:
 L5_API_URL = os.environ.get("PME_L5_URL", "http://127.0.0.1:8034")
-def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
-    """Search L5 Communications Context via L5 API (emails, chats, calendar)."""
+def search_l5_communications(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
+    """Search L5 Communications Context via L5 API (emails, chats, calendar).
+    arena (optional): forwarded to L5; filters Milvus by the arena
+    dynamic field. Records id is included in the result so callers
+    can attach metadata via the shim's _META_CACHE.
+    """
     try:
+        params: dict = {"q": query, "limit": limit}
+        if arena:
+            params["arena"] = arena
         resp = requests.get(
             f"{L5_API_URL}/search",
-            params={"q": query, "limit": limit},
+            params=params,
             timeout=10,
         )
         if resp.status_code != 200:
@@ -804,10 +830,15 @@ def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
                 continue  # skip low relevance
             contact = hit.get("contact", "")
             channel = hit.get("channel", "")
-            path_label = f"L5/{source}"
-            if contact:
+            hit_id = hit.get("id", "")
+            # Use record id as path label so the shim can attach
+            # metadata via _META_CACHE; falls back to source label
+            # for legacy records that have no id.
+            path_label = hit_id or f"L5/{source}"
+            if not hit_id and contact:
                 path_label = f"L5/{channel}/{contact}"
             results.append({
+                "id": hit_id,
                 "path": path_label,
                 "snippet": hit.get("text", "")[:500],
                 "score": scaled_score,
@@ -815,6 +846,7 @@ def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
                 "source": source,
                 "collection": hit.get("collection", ""),
                 "timestamp": hit.get("timestamp", ""),
+                "arena": hit.get("arena", ""),
             })
         return results
     except Exception as e:
@@ -825,12 +857,19 @@ def search_l5_communications(query: str, limit: int = 6) -> List[Dict]:
 # L6: Document Store Search
 L6_URL = os.environ.get("PME_L6_URL", "http://localhost:8037")
-def search_l6_documents(query: str, limit: int = 6) -> List[Dict]:
-    """Search L6 Document Store (research, legal, financial, project docs)."""
+def search_l6_documents(query: str, limit: int = 6, arena: str = None) -> List[Dict]:
+    """Search L6 Document Store (research, legal, financial, project docs).
+    arena (optional): forwarded to L6 — L6 already supports arena
+    natively (see l6-document-store.py search_vector / search_fts).
+    """
     try:
+        params: dict = {"q": query, "method": "hybrid", "limit": limit, "rerank": "true"}
+        if arena:
+            params["arena"] = arena
         resp = requests.get(
             f"{L6_URL}/search",
-            params={"q": query, "method": "hybrid", "limit": limit, "rerank": "true"},
+            params=params,
             timeout=10,
         )
         if resp.status_code != 200:
@@ -875,13 +914,19 @@ def search_l6_documents(query: str, limit: int = 6) -> List[Dict]:
         return []
-def sequential_hybridrag_search(query: str, limit: int = 16) -> List[Dict]:
-    """Main HybridRAG processing: L0 BM25 → L1 System Files → L2 HybridRAG (L3 Graph + L4 Vector + L5 Comms + L6 Docs)."""
+def sequential_hybridrag_search(query: str, limit: int = 16, arena: str = None) -> List[Dict]:
+    """Main HybridRAG processing: L0 BM25 → L1 System Files → L2 HybridRAG (L3 Graph + L4 Vector + L5 Comms + L6 Docs).
+    arena (optional): tenant scope. Forwarded to L0 (path-prefix
+    filter), L5 (Milvus dynamic-field filter), L6 (native arena).
+    L4 vector and L3 graph don't yet support native arena filtering;
+    the compat shim post-filter catches those before they leak out.
+    """
     start_time = time.time()
-    log.info(f"Starting sequential HybridRAG search for: '{query}'")
+    log.info(f"Starting sequential HybridRAG search for: '{query}' arena={arena!r}")
     # L0: BM25 workspace memory (keyword search — complements semantic layers)
-    l0_results = search_l0_bm25(query, limit=6)
+    l0_results = search_l0_bm25(query, limit=6, arena=arena)
     log.info(f"L0 BM25 workspace: {len(l0_results)} results")
     # L1: System Files (HIGHEST PRIORITY)
@@ -902,11 +947,11 @@ def sequential_hybridrag_search(query: str, limit: int = 16) -> List[Dict]:
     log.info(f"L4 Vector search: {len(vector_results)} results (HyDE={'on' if hyde_query != query else 'off'})")
     # L5: Communications Context (emails, chats, calendar) — also use HyDE
-    l5_results = search_l5_communications(hyde_query, limit=6)
+    l5_results = search_l5_communications(hyde_query, limit=6, arena=arena)
     log.info(f"L5 Communications: {len(l5_results)} results")
     # L6: Document Store (research, legal, financial, project docs)
-    l6_results = search_l6_documents(hyde_query, limit=6)
+    l6_results = search_l6_documents(hyde_query, limit=6, arena=arena)
     log.info(f"L6 Documents: {len(l6_results)} results")
     # L2: HybridRAG fusion (combines all layers with L1 priority)
@@ -966,10 +1011,11 @@ async def search_endpoint(request: Request) -> dict:
         body = await request.json()
         query = body.get("query", "")
         limit = body.get("limit", 16)
+        arena = body.get("arena") or None
         if not query:
             raise HTTPException(status_code=400, detail="query is required")
-        results = sequential_hybridrag_search(query, limit=limit)
+        results = sequential_hybridrag_search(query, limit=limit, arena=arena)
         # Also return raw graph entities for context enrichment
         entities = extract_query_entities(query)

package/packages/memory-engine/engine/services/l5/l5-comms-layer.py CHANGED Viewed

@@ -449,8 +449,13 @@ def index_memory(client):
 # --- Search ---
-def search(query: str, collection: str = None, limit: int = 10):
-    """Search across collections."""
+def search(query: str, collection: str = None, limit: int = 10, arena: str = None):
+    """Search across collections.
+    arena (optional): when set, filter to records whose arena dynamic
+    field matches. Records indexed before arena was added carry no
+    arena field — those are dropped under multi-tenant safety.
+    """
     client = get_client()
     vectors = embed_texts([query])
     if not vectors or all(v == 0.0 for v in vectors[0]):
@@ -460,6 +465,12 @@ def search(query: str, collection: str = None, limit: int = 10):
     collections = [collection] if collection else ["chats", "emails", "contacts", "memory"]
     all_results = []
+    filter_expr = ""
+    if arena:
+        # Escape double quotes; Milvus filter syntax for dynamic fields.
+        safe = str(arena).replace('"', '\\"')
+        filter_expr = f'arena == "{safe}"'
     for coll in collections:
         if not client.has_collection(coll):
             continue
@@ -468,12 +479,14 @@ def search(query: str, collection: str = None, limit: int = 10):
                 collection_name=coll,
                 data=[vectors[0]],
                 limit=limit,
-                output_fields=["text", "source", "channel", "contact", "timestamp"],
+                filter=filter_expr,
+                output_fields=["text", "source", "channel", "contact", "timestamp", "arena"],
             )
             for hits in results:
                 for hit in hits:
                     entity = hit.get("entity", {})
                     all_results.append({
+                        "id": hit.get("id", ""),
                         "collection": coll,
                         "score": round(hit.get("distance", 0), 4),
                         "text": entity.get("text", ""),
@@ -481,6 +494,7 @@ def search(query: str, collection: str = None, limit: int = 10):
                         "channel": entity.get("channel", ""),
                         "contact": entity.get("contact", ""),
                         "timestamp": entity.get("timestamp", ""),
+                        "arena": entity.get("arena", ""),
                     })
         except Exception as e:
             print(f"  Search error in {coll}: {e}")
@@ -547,8 +561,9 @@ def serve(port=8034):
         return health()
     @api.get("/search")
-    def api_search(q: str = Query(...), collection: str = None, limit: int = 10):
-        results = search(q, collection=collection, limit=limit)
+    def api_search(q: str = Query(...), collection: str = None, limit: int = 10,
+                   arena: str = None):
+        results = search(q, collection=collection, limit=limit, arena=arena)
         return {"query": q, "results": results, "count": len(results)}
     @api.get("/stats")
@@ -618,6 +633,10 @@ def serve(port=8034):
                 "channel": (r.get("channel") or "")[:64],
                 "contact": (r.get("contact") or "")[:256],
                 "timestamp": (r.get("timestamp") or _now)[:32],
+                # arena lands in the dynamic-field section of the
+                # collection (enable_dynamic_field=True). Filterable
+                # via `arena == "..."` in /search.
+                "arena": (r.get("arena") or "general")[:64],
             })
         t1 = _time.time()
         if rows:

package/packages/memory-engine/engine/services/l6/l6-document-store.py CHANGED Viewed

@@ -94,35 +94,13 @@ log = logging.getLogger("l6-document-store")
 _embed_client = httpx.Client(timeout=60)
 def embed_text(text: str) -> List[float]:
-    """Get embedding — NV-Embed-v2 primary, Ollama fallback."""
-    if NV_EMBED_ENABLED:
-        try:
-            resp = _embed_client.post(NV_EMBED_URL, json={"input": text[:4000]})
-            resp.raise_for_status()
-            return resp.json()["data"][0]["embedding"]
-        except Exception as e:
-            log.warning(f"NV-Embed-v2 failed, falling back to Ollama: {e}")
-    # Ollama fallback
-    resp = _embed_client.post(
-        f"{OLLAMA_URL}/api/embeddings",
-        json={"model": EMBED_MODEL, "prompt": text[:8000]},
-    )
-    resp.raise_for_status()
-    return resp.json()["embedding"]
+    """Single-text embed via _embed_post (OpenAI-compat first, lambda-gateway fallback)."""
+    return _embed_post([text[:8000]])[0]
 def embed_batch(texts: List[str]) -> List[List[float]]:
-    """Embed a batch of texts — NV-Embed-v2 supports native batching."""
-    if NV_EMBED_ENABLED:
-        try:
-            resp = _embed_client.post(NV_EMBED_URL, json={"input": [t[:4000] for t in texts]})
-            resp.raise_for_status()
-            return [d["embedding"] for d in resp.json()["data"]]
-        except Exception as e:
-            log.warning(f"NV-Embed-v2 batch failed, falling back to sequential: {e}")
-    return [embed_text(t) for t in texts]
+    """Batched embed via _embed_post."""
+    return _embed_post([t[:8000] for t in texts])
 # ---------------------------------------------------------------------------
 # Cross-Encoder Reranker