npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.7.13 → 0.8.1 - Mend

@pentatonic-ai/ai-agent-sdk 0.7.13 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/packages/memory-engine/engine/services/l4/Dockerfile CHANGED Viewed

@@ -4,7 +4,11 @@ WORKDIR /app
 RUN pip install --no-cache-dir fastapi 'uvicorn[standard]' httpx pydantic
-COPY server.py /app/server.py
+# Build context is engine/services so the shared embed_provider module is
+# COPYable. server.py adds engine/services to sys.path at startup, then
+# imports from `_shared.embed_provider`.
+COPY _shared /app/_shared
+COPY l4/server.py /app/server.py
 RUN mkdir -p /data
 ENV L4_DB_PATH=/data/vec.db

package/packages/memory-engine/engine/services/l4/server.py CHANGED Viewed

@@ -23,27 +23,25 @@ import hashlib
 import os
 import sqlite3
 import struct
+import sys
 import time
 from pathlib import Path
 from typing import Any
-import httpx
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+# Shared embedding client lives at engine/services/_shared/. Add the parent of
+# the service dir to sys.path so `from _shared.embed_provider import ...` works
+# regardless of how the service is launched (uvicorn, python server.py, etc.).
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+from _shared.embed_provider import EmbedClient  # noqa: E402
 # ----------------------------------------------------------------------
 # Config
 # ----------------------------------------------------------------------
 DB_PATH = os.environ.get("L4_DB_PATH", "/data/vec.db")
-NV_EMBED_URL = os.environ.get("L4_NV_EMBED_URL", "http://nv-embed:8041/v1/embeddings")
-# Embedding model name sent in /v1/embeddings request body. Defaults to
-# the production NV-Embed-v2 name; override via env when pointing at a
-# different OpenAI-compat endpoint (e.g. Ollama with nomic-embed-text).
-EMBED_MODEL_NAME = os.environ.get("L4_EMBED_MODEL", "nv-embed-v2")
-# Optional Authorization: Bearer <key> for the embedding endpoint.
-# Set when calling a hosted gateway (e.g. pentatonic-ai-gateway). Empty = no auth.
-EMBED_API_KEY = os.environ.get("L4_EMBED_API_KEY", "")
 EMBED_DIM = int(os.environ.get("L4_EMBED_DIM", "4096"))
@@ -96,59 +94,23 @@ def _get_db() -> sqlite3.Connection:
 # Embedding client
 # ----------------------------------------------------------------------
-_http: httpx.AsyncClient | None = None
+_embed: EmbedClient | None = None
-def _client() -> httpx.AsyncClient:
-    global _http
-    if _http is None:
-        _http = httpx.AsyncClient(timeout=120.0)
-    return _http
+def _embed_client() -> EmbedClient:
+    """Lazily build the embed client so env vars are read at first use."""
+    global _embed
+    if _embed is None:
+        _embed = EmbedClient.from_env(
+            prefix="L4_",
+            default_url="http://nv-embed:8041/v1/embeddings",
+        )
+    return _embed
 async def _embed_batch(texts: list[str]) -> list[list[float]]:
-    """Embed a batch of texts.
-    Tries OpenAI-compatible shape first (POST <url>, Bearer auth,
-    response data[i].embedding). On failure, falls back to the
-    Pentatonic-AI gateway's native shape (POST .../v1/embed, X-API-Key
-    auth, response embeddings[i]). When the gateway eventually adds an
-    OpenAI-compat /v1/embeddings alias, the primary path will succeed
-    and the fallback will never fire — no code change needed.
-    """
-    if not texts:
-        return []
-    payload = {"input": texts, "model": EMBED_MODEL_NAME}
-    # Primary: OpenAI-compat
-    try:
-        resp = await _client().post(
-            NV_EMBED_URL,
-            headers=_openai_headers(),
-            json=payload,
-            timeout=120.0,
-        )
-        resp.raise_for_status()
-        return [d["embedding"] for d in resp.json()["data"]]
-    except Exception:
-        pass
-    # Fallback: lambda-gateway native shape
-    fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
-    resp = await _client().post(
-        fallback_url,
-        headers=_lambda_headers(),
-        json=payload,
-        timeout=120.0,
-    )
-    resp.raise_for_status()
-    return resp.json()["embeddings"]
-def _openai_headers() -> dict:
-    return {"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {}
-def _lambda_headers() -> dict:
-    return {"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {}
+    """Embed a batch of texts via the shared EmbedClient."""
+    return await _embed_client().embed_batch_async(texts)
 # ----------------------------------------------------------------------

package/packages/memory-engine/engine/services/l5/Dockerfile CHANGED Viewed

@@ -1,7 +1,9 @@
 FROM python:3.12-slim
 WORKDIR /app
 RUN pip install --no-cache-dir fastapi "uvicorn[standard]" httpx "pymilvus[milvus_lite]" "setuptools<70" pydantic
-COPY l5-comms-layer.py /app/server.py
+# Shared embed_provider module (build context is engine/services).
+COPY _shared /app/_shared
+COPY l5/l5-comms-layer.py /app/server.py
 RUN mkdir -p /data
 ENV L5_DB_PATH=/data/comms.db
 EXPOSE 8034

package/packages/memory-engine/engine/services/l5/l5-comms-layer.py CHANGED Viewed

@@ -23,6 +23,7 @@ import os
 import glob
 import hashlib
 import json
+import sys
 import time
 from datetime import datetime
 from pathlib import Path
@@ -30,6 +31,10 @@ from pathlib import Path
 import httpx
 from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
+# Shared embed client lives at engine/services/_shared/.
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+from _shared.embed_provider import EmbedClient  # noqa: E402
 # --- Config ---
 DB_PATH = os.environ.get(
     "L5_DB_PATH",
@@ -43,43 +48,30 @@ PEOPLE_DIR = WORKSPACE / "memory" / "people"
 CONTACTS_DIR = WORKSPACE / "memory" / "contacts"
 MEMORY_DIR = WORKSPACE / "memory"
-NV_EMBED_URL = os.environ.get("L5_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
-# Embedding model name sent in /v1/embeddings request body. Defaults to
-# the production NV-Embed-v2 name; override when pointing at a different
-# OpenAI-compat endpoint (e.g. Ollama with nomic-embed-text).
-EMBED_MODEL_NAME = os.environ.get("L5_EMBED_MODEL", "nv-embed-v2")
-# Optional Authorization: Bearer <key> for the primary embedding endpoint.
-EMBED_API_KEY = os.environ.get("L5_EMBED_API_KEY", "")
+_embed: EmbedClient | None = None
+def _embed_client() -> EmbedClient:
+    """Lazily build the shared EmbedClient for L5."""
+    global _embed
+    if _embed is None:
+        _embed = EmbedClient.from_env(
+            prefix="L5_",
+            default_url="http://localhost:8041/v1/embeddings",
+        )
+    return _embed
 def _embed_post(texts):
-    """POST to the configured embedding endpoint. Tries OpenAI-compat
-    shape first; falls back to Pentatonic-AI lambda-gateway native shape
-    on any failure. When the gateway adds an /v1/embeddings alias the
-    primary path will succeed and the fallback never fires.
+    """Embed a batch of texts via the shared EmbedClient.
+    Provider profile (auth scheme + URL path + body/response shape) is
+    chosen by L5_EMBED_PROVIDER env var (openai | pentatonic-gateway |
+    cohere | custom). Auto-detects on 401 unless L5_EMBED_AUTODETECT=false.
     Returns: list[list[float]] (one embedding per input text).
     """
-    payload = {"input": texts, "model": EMBED_MODEL_NAME}
-    try:
-        r = httpx.post(
-            NV_EMBED_URL,
-            headers={"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {},
-            json=payload,
-            timeout=120,
-        )
-        r.raise_for_status()
-        return [d["embedding"] for d in r.json()["data"]]
-    except Exception:
-        pass
-    fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
-    r = httpx.post(
-        fallback_url,
-        headers={"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {},
-        json=payload,
-        timeout=120,
-    )
-    r.raise_for_status()
-    return r.json()["embeddings"]
+    return _embed_client().embed_batch(texts)
 # Ollama fallback path. URL/model can be overridden so the L5 container can
 # reach an Ollama instance running on the docker host (host.docker.internal)

package/packages/memory-engine/engine/services/l6/Dockerfile CHANGED Viewed

@@ -3,7 +3,9 @@ WORKDIR /app
 RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
 RUN pip install --no-cache-dir fastapi "uvicorn[standard]" httpx "pymilvus[milvus_lite]" "setuptools<70" pydantic spacy
 RUN python -m spacy download en_core_web_sm
-COPY l6-document-store.py /app/server.py
+# Shared embed_provider module (build context is engine/services).
+COPY _shared /app/_shared
+COPY l6/l6-document-store.py /app/server.py
 RUN mkdir -p /data
 ENV L6_DATA_DIR=/data
 EXPOSE 8037

package/packages/memory-engine/engine/services/l6/l6-document-store.py CHANGED Viewed

@@ -20,6 +20,7 @@ import logging
 import os
 import re
 import sqlite3
+import sys
 import time
 from datetime import datetime, timezone
 from pathlib import Path
@@ -29,6 +30,10 @@ import httpx
 from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
 from pymilvus.milvus_client.index import IndexParams
+# Shared embed client lives at engine/services/_shared/.
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+from _shared.embed_provider import EmbedClient  # noqa: E402
 # ---------------------------------------------------------------------------
 # Config
 # ---------------------------------------------------------------------------
@@ -37,39 +42,29 @@ DATA_DIR = Path(os.environ.get("L6_DATA_DIR", str(Path.home() / "l6-document-sto
 MILVUS_DB = str(DATA_DIR / "documents.db")
 FTS_DB = str(DATA_DIR / "documents_fts.db")
 OLLAMA_URL = os.environ.get("L6_OLLAMA_URL", "http://localhost:11434")
-EMBED_MODEL = os.environ.get("L6_EMBED_MODEL", "nomic-embed-text")
-NV_EMBED_URL = os.environ.get("L6_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
 NV_EMBED_ENABLED = os.environ.get("L6_NV_EMBED_ENABLED", "true").lower() == "true"
 EMBED_DIM = int(os.environ.get("L6_EMBED_DIM", "4096"))
-# Optional Authorization: Bearer <key> for the embedding endpoint.
-EMBED_API_KEY = os.environ.get("L6_EMBED_API_KEY", "")
-def _embed_post(texts):
-    """POST to embedding endpoint. Tries OpenAI-compat shape first;
-    falls back to Pentatonic-AI lambda-gateway native shape on failure.
-    See L4 / L5 for the same pattern."""
-    import httpx as _httpx
-    payload = {"input": texts, "model": EMBED_MODEL}
-    try:
-        r = _httpx.post(
-            NV_EMBED_URL,
-            headers={"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {},
-            json=payload,
-            timeout=120,
+_embed: EmbedClient | None = None
+def _embed_client() -> EmbedClient:
+    """Lazily build the shared EmbedClient for L6."""
+    global _embed
+    if _embed is None:
+        _embed = EmbedClient.from_env(
+            prefix="L6_",
+            default_url="http://localhost:8041/v1/embeddings",
+            default_model="nomic-embed-text",
         )
-        r.raise_for_status()
-        return [d["embedding"] for d in r.json()["data"]]
-    except Exception:
-        pass
-    fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
-    r = _httpx.post(
-        fallback_url,
-        headers={"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {},
-        json=payload,
-        timeout=120,
-    )
-    r.raise_for_status()
-    return r.json()["embeddings"]
+    return _embed
+def _embed_post(texts):
+    """Embed a batch of texts via the shared EmbedClient. Provider profile
+    chosen by L6_EMBED_PROVIDER env (openai | pentatonic-gateway | cohere
+    | custom). See engine/services/_shared/embed_provider.py for details."""
+    return _embed_client().embed_batch(texts)
 COLLECTION_NAME = "documents"
 RRF_K = 60

package/packages/memory-engine/scripts/wipe-legacy-l3-entities.py ADDED Viewed

@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+"""Wipe pre-arena-scoping :Entity nodes from the L3 Neo4j graph.
+Run this AFTER the engine has been deployed with the arena-scoped
+writer paths, not before. Sequence:
+    1. Deploy l2-hybridrag-proxy with arena-scoped MERGE patterns.
+    2. Verify new ingest is creating arena-tagged entities (run
+       --dry-run on this script first; it reports legacy vs new).
+    3. Run this script with --confirm to wipe legacy entities.
+    4. Future ingest re-extracts entities from existing :Chunk nodes
+       on-demand (search-side touches them; new stores recreate them
+       from scratch under the right arena).
+Why wipe vs. backfill: pre-fix entities collapsed cross-tenant by name,
+so their MENTIONS edges connect to chunks across multiple arenas.
+Splitting them by mentions is doable but error-prone (edge cases for
+many-arena entities, orphans, no-mention entities). The Hebbian weights
+on those edges were also corrupted by cross-tenant traffic, so they
+weren't worth saving. Chunks are preserved either way — re-extraction
+is cheap.
+Usage:
+    # report counts only
+    python wipe-legacy-l3-entities.py --neo4j-uri bolt://localhost:7687
+    # actually wipe
+    python wipe-legacy-l3-entities.py --neo4j-uri bolt://localhost:7687 --confirm
+The script is idempotent — running it twice on a clean graph deletes
+zero rows.
+"""
+from __future__ import annotations
+import argparse
+import os
+import sys
+try:
+    from neo4j import GraphDatabase
+except ImportError:
+    print("ERROR: neo4j driver not installed. `pip install neo4j` first.", file=sys.stderr)
+    sys.exit(1)
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__.split("\n\n")[0])
+    parser.add_argument(
+        "--neo4j-uri",
+        default=os.environ.get("NEO4J_URI", "bolt://localhost:7687"),
+        help="Neo4j bolt URI (default: NEO4J_URI env or bolt://localhost:7687)",
+    )
+    parser.add_argument(
+        "--neo4j-user",
+        default=os.environ.get("NEO4J_USER", "neo4j"),
+    )
+    parser.add_argument(
+        "--neo4j-password",
+        default=os.environ.get("NEO4J_PASSWORD"),
+    )
+    parser.add_argument(
+        "--confirm",
+        action="store_true",
+        help="Actually delete. Without this flag, runs in dry-run mode "
+        "(reports counts only).",
+    )
+    args = parser.parse_args()
+    if not args.neo4j_password:
+        print("ERROR: --neo4j-password or NEO4J_PASSWORD env required", file=sys.stderr)
+        return 2
+    driver = GraphDatabase.driver(
+        args.neo4j_uri, auth=(args.neo4j_user, args.neo4j_password),
+    )
+    try:
+        with driver.session() as session:
+            # Count legacy vs arena-scoped entities so the operator can
+            # eyeball whether the new writer path has actually started
+            # producing arena-scoped rows before deleting anything.
+            legacy = session.run(
+                "MATCH (e:Entity) WHERE e.arena IS NULL RETURN count(e) AS n"
+            ).single()["n"]
+            scoped = session.run(
+                "MATCH (e:Entity) WHERE e.arena IS NOT NULL RETURN count(e) AS n"
+            ).single()["n"]
+            print(f"L3 Entity nodes: {legacy} legacy (no arena) / {scoped} arena-scoped")
+            # Same breakdown for chunks — they should already all be
+            # arena-tagged but worth verifying before/after.
+            chunk_legacy = session.run(
+                "MATCH (c:Chunk) WHERE c.arena IS NULL RETURN count(c) AS n"
+            ).single()["n"]
+            chunk_scoped = session.run(
+                "MATCH (c:Chunk) WHERE c.arena IS NOT NULL RETURN count(c) AS n"
+            ).single()["n"]
+            print(f"L3 Chunk nodes:  {chunk_legacy} legacy (no arena) / {chunk_scoped} arena-scoped")
+            if not args.confirm:
+                print("\nDry run — pass --confirm to actually delete legacy entities.")
+                return 0
+            if legacy == 0:
+                print("\nNothing to do — all entities already arena-scoped.")
+                return 0
+            print(f"\nWiping {legacy} legacy entities…")
+            result = session.run(
+                "MATCH (e:Entity) WHERE e.arena IS NULL DETACH DELETE e RETURN count(e) AS n"
+            ).single()
+            deleted = result["n"]
+            print(f"Deleted {deleted} legacy entities.")
+            # Drop the old single-key entity index now that the data
+            # is gone — safe to do because the new writer path uses
+            # the compound (arena, name) index.
+            try:
+                session.run("DROP INDEX entity_name IF EXISTS")
+                print("Dropped legacy `entity_name` index.")
+            except Exception as e:  # noqa: BLE001
+                print(f"Note: could not drop entity_name index ({e}); ok if absent.")
+        return 0
+    finally:
+        driver.close()
+if __name__ == "__main__":
+    sys.exit(main())

package/packages/memory-engine/tests/e2e_arena.sh CHANGED Viewed

@@ -217,16 +217,40 @@ print("yes" if any("Eclipse" in r.get("content","") for r in data) else "no")')
   || fail "tenant-y lost the shared phrase (id collision?)"
 # ---------------------------------------------------------------------------
-# /forget — by metadata_contains. Cleans up so reruns are idempotent.
+# /forget — tenant-scoped delete by arena.
+#
+# Pre-arena, /forget would unconditionally trigger a global wipe of
+# L0+L4+L3 inside the shim, regardless of what filters were passed. The
+# smoke test happily asserted "deleted >= 1" because the shim returned
+# the count of every row globally, even rows the caller didn't ask to
+# remove. That was the right thing for a bench reset, the wrong thing
+# for any real tenant operation.
+#
+# Now /forget honours the arena in the metadata filter:
+#   - {metadata_contains: {arena: <tenant>}}     → tenant-scoped wipe
+#   - {metadata_contains: {<other>: <val>}}      → L6 targeted only
+#   - {} (empty body)                            → bench reset (gated
+#                                                  by GLOBAL_WIPE in the
+#                                                  internal endpoint)
+#
+# We assert here that wiping arena=e2e-tenant-a actually removes ≥1 row,
+# AND that arena=e2e-tenant-b's rows survived — proves real tenant
+# isolation rather than just "shim returned non-zero".
 # ---------------------------------------------------------------------------
 echo ""
-echo "=== /forget probe=e2e-arena ==="
+echo "=== /forget arena=e2e-tenant-a ==="
 F=$(curl -sf -X POST "$BASE/forget" -H "Content-Type: application/json" \
-  -d '{"metadata_contains":{"probe":"e2e-arena"}}')
+  -d '{"metadata_contains":{"arena":"e2e-tenant-a"}}')
 deleted=$(echo "$F" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("deleted",0))')
 echo "  deleted: $deleted"
-[ "$deleted" -ge "1" ] && ok "/forget removed at least 1 row" || fail "/forget"
+[ "$deleted" -ge "1" ] && ok "/forget removed tenant-a rows" || fail "/forget tenant-a wipe"
+# tenant-b should still have its rows.
+B=$(curl -sf -X POST "$BASE/search" -H "Content-Type: application/json" \
+  -d '{"arena":"e2e-tenant-b","query":"Borealis","limit":4}')
+b_hits=$(echo "$B" | python3 -c 'import json,sys; print(len(json.load(sys.stdin).get("results",[])))')
+[ "$b_hits" -ge "1" ] && ok "/forget left tenant-b intact" || fail "/forget leaked into tenant-b"
 echo ""
 echo "=== Result ==="