npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.7.12 → 0.8.0 - Mend

@pentatonic-ai/ai-agent-sdk 0.7.12 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py CHANGED Viewed

@@ -17,6 +17,7 @@ import json
 import logging
 import os
 import sqlite3
+import sys
 import time
 from datetime import datetime
 from pathlib import Path
@@ -30,6 +31,10 @@ from neo4j.time import DateTime as Neo4jDateTime, Date as Neo4jDate
 from pydantic import BaseModel
 import uvicorn
+# Shared embed client lives at engine/services/_shared/.
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+from _shared.embed_provider import EmbedClient  # noqa: E402
 def _serialize_neo4j_value(v: Any) -> Any:
     """Convert neo4j-specific types to JSON-serialisable equivalents.
@@ -93,10 +98,27 @@ QMD_DB_PATH = _resolve_qmd_db()
 OLLAMA_URL = os.environ.get("PME_OLLAMA_URL", "http://localhost:11434/api/embeddings")
 EMBEDDING_MODEL = os.environ.get("PME_EMBED_MODEL", "nomic-embed-text")
-# NV-Embed-v2 service (primary, 4096-dim)
-NV_EMBED_URL = os.environ.get("PME_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
+# NV-Embed-v2 service (primary, 4096-dim). URL/auth/path/body/response are
+# managed by the shared EmbedClient; PME_EMBED_PROVIDER (default openai)
+# selects auth scheme (Bearer vs X-API-Key) and request shape.
 NV_EMBED_ENABLED = os.environ.get("PME_NV_EMBED_ENABLED", "true").lower() == "true"
+_embed: EmbedClient | None = None
+def _embed_client() -> EmbedClient:
+    """Lazily build the shared EmbedClient for L2."""
+    global _embed
+    if _embed is None:
+        _embed = EmbedClient.from_env(
+            prefix="PME_",
+            url_var="PME_NV_EMBED_URL",
+            key_var="PME_EMBED_API_KEY",
+            model_var="PME_NV_EMBED_MODEL",
+            default_url="http://localhost:8041/v1/embeddings",
+        )
+    return _embed
 # Sequential processing weights - OPTIMIZED FOR QUALITY
 GRAPH_PRIORITY_BOOST = 0.5  # Extra score for graph-derived results (↑ for better entity/relationship context)
 VECTOR_BASE_WEIGHT = 0.5     # Base weight for vector results (↓ balanced for accuracy over speed)
@@ -389,12 +411,11 @@ def search_neo4j_sequential(query: str, entities: List[str], limit: int = 12) ->
 def get_embedding(text: str) -> List[float]:
     """Get embedding — tries NV-Embed-v2 (4096-dim) first, falls back to Ollama."""
-    # Try NV-Embed-v2 service first
+    # Try NV-Embed-v2 service first via the shared EmbedClient (handles
+    # provider selection, auth scheme, path, and 401 auto-detect).
     if NV_EMBED_ENABLED:
         try:
-            r = requests.post(NV_EMBED_URL, json={"input": text}, timeout=30)
-            r.raise_for_status()
-            return r.json()["data"][0]["embedding"]
+            return _embed_client().embed_one(text)
         except Exception as e:
             log.warning(f"NV-Embed-v2 failed, falling back to Ollama: {e}")
@@ -1073,17 +1094,23 @@ async def list_models() -> dict:
 @app.post("/v1/embeddings")
 async def create_embeddings(request: EmbeddingRequest) -> dict:
     """Pass-through to NV-Embed-v2 (4096-dim). Batch-native — forwards the full
-    input list in a single HTTP call instead of looping one-at-a-time."""
+    input list in a single HTTP call instead of looping one-at-a-time.
+    Returns OpenAI-shaped response regardless of upstream provider, so
+    callers (including L4 search and external clients) get a consistent
+    contract from this proxy."""
     try:
-        import httpx
         inputs = [request.input] if isinstance(request.input, str) else request.input
-        async with httpx.AsyncClient(timeout=60) as client:
-            resp = await client.post(
-                NV_EMBED_URL,
-                json={"input": inputs, "model": request.model or "nv-embed-v2"}
-            )
-            resp.raise_for_status()
-            return resp.json()
+        embeddings = await _embed_client().embed_batch_async(inputs)
+        return {
+            "object": "list",
+            "model": request.model or "nv-embed-v2",
+            "data": [
+                {"object": "embedding", "embedding": e, "index": i}
+                for i, e in enumerate(embeddings)
+            ],
+            "usage": {"prompt_tokens": 0, "total_tokens": 0},
+        }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
@@ -1319,17 +1346,11 @@ def _extract_entities_for_kg(text: str, max_entities: int = 32) -> List[str]:
 def _embed_batch_local(texts: List[str]) -> List[List[float]]:
-    """Batch embed via NV-Embed. Returns vectors in input order."""
+    """Batch embed via the shared EmbedClient. Returns vectors in input order."""
     if not texts:
         return []
     try:
-        r = requests.post(NV_EMBED_URL,
-                          json={"input": texts, "model": "nv-embed-v2"},
-                          timeout=120)
-        r.raise_for_status()
-        data = r.json().get("data", [])
-        # NV-Embed returns [{embedding: [...]}, ...]
-        return [d["embedding"] for d in data]
+        return _embed_client().embed_batch(texts)
     except Exception as e:
         log.warning(f"NV-Embed batch failed: {e}; trying singletons")
         return [get_embedding(t) for t in texts]

package/packages/memory-engine/engine/services/l4/Dockerfile CHANGED Viewed

@@ -4,7 +4,11 @@ WORKDIR /app
 RUN pip install --no-cache-dir fastapi 'uvicorn[standard]' httpx pydantic
-COPY server.py /app/server.py
+# Build context is engine/services so the shared embed_provider module is
+# COPYable. server.py adds engine/services to sys.path at startup, then
+# imports from `_shared.embed_provider`.
+COPY _shared /app/_shared
+COPY l4/server.py /app/server.py
 RUN mkdir -p /data
 ENV L4_DB_PATH=/data/vec.db

package/packages/memory-engine/engine/services/l4/server.py CHANGED Viewed

@@ -23,27 +23,25 @@ import hashlib
 import os
 import sqlite3
 import struct
+import sys
 import time
 from pathlib import Path
 from typing import Any
-import httpx
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+# Shared embedding client lives at engine/services/_shared/. Add the parent of
+# the service dir to sys.path so `from _shared.embed_provider import ...` works
+# regardless of how the service is launched (uvicorn, python server.py, etc.).
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+from _shared.embed_provider import EmbedClient  # noqa: E402
 # ----------------------------------------------------------------------
 # Config
 # ----------------------------------------------------------------------
 DB_PATH = os.environ.get("L4_DB_PATH", "/data/vec.db")
-NV_EMBED_URL = os.environ.get("L4_NV_EMBED_URL", "http://nv-embed:8041/v1/embeddings")
-# Embedding model name sent in /v1/embeddings request body. Defaults to
-# the production NV-Embed-v2 name; override via env when pointing at a
-# different OpenAI-compat endpoint (e.g. Ollama with nomic-embed-text).
-EMBED_MODEL_NAME = os.environ.get("L4_EMBED_MODEL", "nv-embed-v2")
-# Optional Authorization: Bearer <key> for the embedding endpoint.
-# Set when calling a hosted gateway (e.g. pentatonic-ai-gateway). Empty = no auth.
-EMBED_API_KEY = os.environ.get("L4_EMBED_API_KEY", "")
 EMBED_DIM = int(os.environ.get("L4_EMBED_DIM", "4096"))
@@ -96,59 +94,23 @@ def _get_db() -> sqlite3.Connection:
 # Embedding client
 # ----------------------------------------------------------------------
-_http: httpx.AsyncClient | None = None
+_embed: EmbedClient | None = None
-def _client() -> httpx.AsyncClient:
-    global _http
-    if _http is None:
-        _http = httpx.AsyncClient(timeout=120.0)
-    return _http
+def _embed_client() -> EmbedClient:
+    """Lazily build the embed client so env vars are read at first use."""
+    global _embed
+    if _embed is None:
+        _embed = EmbedClient.from_env(
+            prefix="L4_",
+            default_url="http://nv-embed:8041/v1/embeddings",
+        )
+    return _embed
 async def _embed_batch(texts: list[str]) -> list[list[float]]:
-    """Embed a batch of texts.
-    Tries OpenAI-compatible shape first (POST <url>, Bearer auth,
-    response data[i].embedding). On failure, falls back to the
-    Pentatonic-AI gateway's native shape (POST .../v1/embed, X-API-Key
-    auth, response embeddings[i]). When the gateway eventually adds an
-    OpenAI-compat /v1/embeddings alias, the primary path will succeed
-    and the fallback will never fire — no code change needed.
-    """
-    if not texts:
-        return []
-    payload = {"input": texts, "model": EMBED_MODEL_NAME}
-    # Primary: OpenAI-compat
-    try:
-        resp = await _client().post(
-            NV_EMBED_URL,
-            headers=_openai_headers(),
-            json=payload,
-            timeout=120.0,
-        )
-        resp.raise_for_status()
-        return [d["embedding"] for d in resp.json()["data"]]
-    except Exception:
-        pass
-    # Fallback: lambda-gateway native shape
-    fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
-    resp = await _client().post(
-        fallback_url,
-        headers=_lambda_headers(),
-        json=payload,
-        timeout=120.0,
-    )
-    resp.raise_for_status()
-    return resp.json()["embeddings"]
-def _openai_headers() -> dict:
-    return {"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {}
-def _lambda_headers() -> dict:
-    return {"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {}
+    """Embed a batch of texts via the shared EmbedClient."""
+    return await _embed_client().embed_batch_async(texts)
 # ----------------------------------------------------------------------

package/packages/memory-engine/engine/services/l5/Dockerfile CHANGED Viewed

@@ -1,7 +1,9 @@
 FROM python:3.12-slim
 WORKDIR /app
 RUN pip install --no-cache-dir fastapi "uvicorn[standard]" httpx "pymilvus[milvus_lite]" "setuptools<70" pydantic
-COPY l5-comms-layer.py /app/server.py
+# Shared embed_provider module (build context is engine/services).
+COPY _shared /app/_shared
+COPY l5/l5-comms-layer.py /app/server.py
 RUN mkdir -p /data
 ENV L5_DB_PATH=/data/comms.db
 EXPOSE 8034

package/packages/memory-engine/engine/services/l5/l5-comms-layer.py CHANGED Viewed

@@ -23,6 +23,7 @@ import os
 import glob
 import hashlib
 import json
+import sys
 import time
 from datetime import datetime
 from pathlib import Path
@@ -30,6 +31,10 @@ from pathlib import Path
 import httpx
 from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
+# Shared embed client lives at engine/services/_shared/.
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+from _shared.embed_provider import EmbedClient  # noqa: E402
 # --- Config ---
 DB_PATH = os.environ.get(
     "L5_DB_PATH",
@@ -43,43 +48,30 @@ PEOPLE_DIR = WORKSPACE / "memory" / "people"
 CONTACTS_DIR = WORKSPACE / "memory" / "contacts"
 MEMORY_DIR = WORKSPACE / "memory"
-NV_EMBED_URL = os.environ.get("L5_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
-# Embedding model name sent in /v1/embeddings request body. Defaults to
-# the production NV-Embed-v2 name; override when pointing at a different
-# OpenAI-compat endpoint (e.g. Ollama with nomic-embed-text).
-EMBED_MODEL_NAME = os.environ.get("L5_EMBED_MODEL", "nv-embed-v2")
-# Optional Authorization: Bearer <key> for the primary embedding endpoint.
-EMBED_API_KEY = os.environ.get("L5_EMBED_API_KEY", "")
+_embed: EmbedClient | None = None
+def _embed_client() -> EmbedClient:
+    """Lazily build the shared EmbedClient for L5."""
+    global _embed
+    if _embed is None:
+        _embed = EmbedClient.from_env(
+            prefix="L5_",
+            default_url="http://localhost:8041/v1/embeddings",
+        )
+    return _embed
 def _embed_post(texts):
-    """POST to the configured embedding endpoint. Tries OpenAI-compat
-    shape first; falls back to Pentatonic-AI lambda-gateway native shape
-    on any failure. When the gateway adds an /v1/embeddings alias the
-    primary path will succeed and the fallback never fires.
+    """Embed a batch of texts via the shared EmbedClient.
+    Provider profile (auth scheme + URL path + body/response shape) is
+    chosen by L5_EMBED_PROVIDER env var (openai | pentatonic-gateway |
+    cohere | custom). Auto-detects on 401 unless L5_EMBED_AUTODETECT=false.
     Returns: list[list[float]] (one embedding per input text).
     """
-    payload = {"input": texts, "model": EMBED_MODEL_NAME}
-    try:
-        r = httpx.post(
-            NV_EMBED_URL,
-            headers={"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {},
-            json=payload,
-            timeout=120,
-        )
-        r.raise_for_status()
-        return [d["embedding"] for d in r.json()["data"]]
-    except Exception:
-        pass
-    fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
-    r = httpx.post(
-        fallback_url,
-        headers={"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {},
-        json=payload,
-        timeout=120,
-    )
-    r.raise_for_status()
-    return r.json()["embeddings"]
+    return _embed_client().embed_batch(texts)
 # Ollama fallback path. URL/model can be overridden so the L5 container can
 # reach an Ollama instance running on the docker host (host.docker.internal)

package/packages/memory-engine/engine/services/l6/Dockerfile CHANGED Viewed

@@ -3,7 +3,9 @@ WORKDIR /app
 RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
 RUN pip install --no-cache-dir fastapi "uvicorn[standard]" httpx "pymilvus[milvus_lite]" "setuptools<70" pydantic spacy
 RUN python -m spacy download en_core_web_sm
-COPY l6-document-store.py /app/server.py
+# Shared embed_provider module (build context is engine/services).
+COPY _shared /app/_shared
+COPY l6/l6-document-store.py /app/server.py
 RUN mkdir -p /data
 ENV L6_DATA_DIR=/data
 EXPOSE 8037

package/packages/memory-engine/engine/services/l6/l6-document-store.py CHANGED Viewed

@@ -20,6 +20,7 @@ import logging
 import os
 import re
 import sqlite3
+import sys
 import time
 from datetime import datetime, timezone
 from pathlib import Path
@@ -29,6 +30,10 @@ import httpx
 from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
 from pymilvus.milvus_client.index import IndexParams
+# Shared embed client lives at engine/services/_shared/.
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+from _shared.embed_provider import EmbedClient  # noqa: E402
 # ---------------------------------------------------------------------------
 # Config
 # ---------------------------------------------------------------------------
@@ -37,39 +42,29 @@ DATA_DIR = Path(os.environ.get("L6_DATA_DIR", str(Path.home() / "l6-document-sto
 MILVUS_DB = str(DATA_DIR / "documents.db")
 FTS_DB = str(DATA_DIR / "documents_fts.db")
 OLLAMA_URL = os.environ.get("L6_OLLAMA_URL", "http://localhost:11434")
-EMBED_MODEL = os.environ.get("L6_EMBED_MODEL", "nomic-embed-text")
-NV_EMBED_URL = os.environ.get("L6_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
 NV_EMBED_ENABLED = os.environ.get("L6_NV_EMBED_ENABLED", "true").lower() == "true"
 EMBED_DIM = int(os.environ.get("L6_EMBED_DIM", "4096"))
-# Optional Authorization: Bearer <key> for the embedding endpoint.
-EMBED_API_KEY = os.environ.get("L6_EMBED_API_KEY", "")
-def _embed_post(texts):
-    """POST to embedding endpoint. Tries OpenAI-compat shape first;
-    falls back to Pentatonic-AI lambda-gateway native shape on failure.
-    See L4 / L5 for the same pattern."""
-    import httpx as _httpx
-    payload = {"input": texts, "model": EMBED_MODEL}
-    try:
-        r = _httpx.post(
-            NV_EMBED_URL,
-            headers={"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {},
-            json=payload,
-            timeout=120,
+_embed: EmbedClient | None = None
+def _embed_client() -> EmbedClient:
+    """Lazily build the shared EmbedClient for L6."""
+    global _embed
+    if _embed is None:
+        _embed = EmbedClient.from_env(
+            prefix="L6_",
+            default_url="http://localhost:8041/v1/embeddings",
+            default_model="nomic-embed-text",
         )
-        r.raise_for_status()
-        return [d["embedding"] for d in r.json()["data"]]
-    except Exception:
-        pass
-    fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
-    r = _httpx.post(
-        fallback_url,
-        headers={"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {},
-        json=payload,
-        timeout=120,
-    )
-    r.raise_for_status()
-    return r.json()["embeddings"]
+    return _embed
+def _embed_post(texts):
+    """Embed a batch of texts via the shared EmbedClient. Provider profile
+    chosen by L6_EMBED_PROVIDER env (openai | pentatonic-gateway | cohere
+    | custom). See engine/services/_shared/embed_provider.py for details."""
+    return _embed_client().embed_batch(texts)
 COLLECTION_NAME = "documents"
 RRF_K = 60