@pentatonic-ai/ai-agent-sdk 0.7.12 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,7 @@ import json
17
17
  import logging
18
18
  import os
19
19
  import sqlite3
20
+ import sys
20
21
  import time
21
22
  from datetime import datetime
22
23
  from pathlib import Path
@@ -30,6 +31,10 @@ from neo4j.time import DateTime as Neo4jDateTime, Date as Neo4jDate
30
31
  from pydantic import BaseModel
31
32
  import uvicorn
32
33
 
34
+ # Shared embed client lives at engine/services/_shared/.
35
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
36
+ from _shared.embed_provider import EmbedClient # noqa: E402
37
+
33
38
 
34
39
  def _serialize_neo4j_value(v: Any) -> Any:
35
40
  """Convert neo4j-specific types to JSON-serialisable equivalents.
@@ -93,10 +98,27 @@ QMD_DB_PATH = _resolve_qmd_db()
93
98
  OLLAMA_URL = os.environ.get("PME_OLLAMA_URL", "http://localhost:11434/api/embeddings")
94
99
  EMBEDDING_MODEL = os.environ.get("PME_EMBED_MODEL", "nomic-embed-text")
95
100
 
96
- # NV-Embed-v2 service (primary, 4096-dim)
97
- NV_EMBED_URL = os.environ.get("PME_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
101
+ # NV-Embed-v2 service (primary, 4096-dim). URL/auth/path/body/response are
102
+ # managed by the shared EmbedClient; PME_EMBED_PROVIDER (default openai)
103
+ # selects auth scheme (Bearer vs X-API-Key) and request shape.
98
104
  NV_EMBED_ENABLED = os.environ.get("PME_NV_EMBED_ENABLED", "true").lower() == "true"
99
105
 
106
+ _embed: EmbedClient | None = None
107
+
108
+
109
+ def _embed_client() -> EmbedClient:
110
+ """Lazily build the shared EmbedClient for L2."""
111
+ global _embed
112
+ if _embed is None:
113
+ _embed = EmbedClient.from_env(
114
+ prefix="PME_",
115
+ url_var="PME_NV_EMBED_URL",
116
+ key_var="PME_EMBED_API_KEY",
117
+ model_var="PME_NV_EMBED_MODEL",
118
+ default_url="http://localhost:8041/v1/embeddings",
119
+ )
120
+ return _embed
121
+
100
122
  # Sequential processing weights - OPTIMIZED FOR QUALITY
101
123
  GRAPH_PRIORITY_BOOST = 0.5 # Extra score for graph-derived results (↑ for better entity/relationship context)
102
124
  VECTOR_BASE_WEIGHT = 0.5 # Base weight for vector results (↓ balanced for accuracy over speed)
@@ -389,12 +411,11 @@ def search_neo4j_sequential(query: str, entities: List[str], limit: int = 12) ->
389
411
 
390
412
  def get_embedding(text: str) -> List[float]:
391
413
  """Get embedding — tries NV-Embed-v2 (4096-dim) first, falls back to Ollama."""
392
- # Try NV-Embed-v2 service first
414
+ # Try NV-Embed-v2 service first via the shared EmbedClient (handles
415
+ # provider selection, auth scheme, path, and 401 auto-detect).
393
416
  if NV_EMBED_ENABLED:
394
417
  try:
395
- r = requests.post(NV_EMBED_URL, json={"input": text}, timeout=30)
396
- r.raise_for_status()
397
- return r.json()["data"][0]["embedding"]
418
+ return _embed_client().embed_one(text)
398
419
  except Exception as e:
399
420
  log.warning(f"NV-Embed-v2 failed, falling back to Ollama: {e}")
400
421
 
@@ -1073,17 +1094,23 @@ async def list_models() -> dict:
1073
1094
  @app.post("/v1/embeddings")
1074
1095
  async def create_embeddings(request: EmbeddingRequest) -> dict:
1075
1096
  """Pass-through to NV-Embed-v2 (4096-dim). Batch-native — forwards the full
1076
- input list in a single HTTP call instead of looping one-at-a-time."""
1097
+ input list in a single HTTP call instead of looping one-at-a-time.
1098
+
1099
+ Returns OpenAI-shaped response regardless of upstream provider, so
1100
+ callers (including L4 search and external clients) get a consistent
1101
+ contract from this proxy."""
1077
1102
  try:
1078
- import httpx
1079
1103
  inputs = [request.input] if isinstance(request.input, str) else request.input
1080
- async with httpx.AsyncClient(timeout=60) as client:
1081
- resp = await client.post(
1082
- NV_EMBED_URL,
1083
- json={"input": inputs, "model": request.model or "nv-embed-v2"}
1084
- )
1085
- resp.raise_for_status()
1086
- return resp.json()
1104
+ embeddings = await _embed_client().embed_batch_async(inputs)
1105
+ return {
1106
+ "object": "list",
1107
+ "model": request.model or "nv-embed-v2",
1108
+ "data": [
1109
+ {"object": "embedding", "embedding": e, "index": i}
1110
+ for i, e in enumerate(embeddings)
1111
+ ],
1112
+ "usage": {"prompt_tokens": 0, "total_tokens": 0},
1113
+ }
1087
1114
  except Exception as e:
1088
1115
  raise HTTPException(status_code=500, detail=str(e))
1089
1116
 
@@ -1319,17 +1346,11 @@ def _extract_entities_for_kg(text: str, max_entities: int = 32) -> List[str]:
1319
1346
 
1320
1347
 
1321
1348
  def _embed_batch_local(texts: List[str]) -> List[List[float]]:
1322
- """Batch embed via NV-Embed. Returns vectors in input order."""
1349
+ """Batch embed via the shared EmbedClient. Returns vectors in input order."""
1323
1350
  if not texts:
1324
1351
  return []
1325
1352
  try:
1326
- r = requests.post(NV_EMBED_URL,
1327
- json={"input": texts, "model": "nv-embed-v2"},
1328
- timeout=120)
1329
- r.raise_for_status()
1330
- data = r.json().get("data", [])
1331
- # NV-Embed returns [{embedding: [...]}, ...]
1332
- return [d["embedding"] for d in data]
1353
+ return _embed_client().embed_batch(texts)
1333
1354
  except Exception as e:
1334
1355
  log.warning(f"NV-Embed batch failed: {e}; trying singletons")
1335
1356
  return [get_embedding(t) for t in texts]
@@ -4,7 +4,11 @@ WORKDIR /app
4
4
 
5
5
  RUN pip install --no-cache-dir fastapi 'uvicorn[standard]' httpx pydantic
6
6
 
7
- COPY server.py /app/server.py
7
+ # Build context is engine/services so the shared embed_provider module is
8
+ # COPYable. server.py adds engine/services to sys.path at startup, then
9
+ # imports from `_shared.embed_provider`.
10
+ COPY _shared /app/_shared
11
+ COPY l4/server.py /app/server.py
8
12
 
9
13
  RUN mkdir -p /data
10
14
  ENV L4_DB_PATH=/data/vec.db
@@ -23,27 +23,25 @@ import hashlib
23
23
  import os
24
24
  import sqlite3
25
25
  import struct
26
+ import sys
26
27
  import time
27
28
  from pathlib import Path
28
29
  from typing import Any
29
30
 
30
- import httpx
31
31
  from fastapi import FastAPI, HTTPException
32
32
  from pydantic import BaseModel
33
33
 
34
+ # Shared embedding client lives at engine/services/_shared/. Add the parent of
35
+ # the service dir to sys.path so `from _shared.embed_provider import ...` works
36
+ # regardless of how the service is launched (uvicorn, python server.py, etc.).
37
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
38
+ from _shared.embed_provider import EmbedClient # noqa: E402
39
+
34
40
  # ----------------------------------------------------------------------
35
41
  # Config
36
42
  # ----------------------------------------------------------------------
37
43
 
38
44
  DB_PATH = os.environ.get("L4_DB_PATH", "/data/vec.db")
39
- NV_EMBED_URL = os.environ.get("L4_NV_EMBED_URL", "http://nv-embed:8041/v1/embeddings")
40
- # Embedding model name sent in /v1/embeddings request body. Defaults to
41
- # the production NV-Embed-v2 name; override via env when pointing at a
42
- # different OpenAI-compat endpoint (e.g. Ollama with nomic-embed-text).
43
- EMBED_MODEL_NAME = os.environ.get("L4_EMBED_MODEL", "nv-embed-v2")
44
- # Optional Authorization: Bearer <key> for the embedding endpoint.
45
- # Set when calling a hosted gateway (e.g. pentatonic-ai-gateway). Empty = no auth.
46
- EMBED_API_KEY = os.environ.get("L4_EMBED_API_KEY", "")
47
45
  EMBED_DIM = int(os.environ.get("L4_EMBED_DIM", "4096"))
48
46
 
49
47
 
@@ -96,59 +94,23 @@ def _get_db() -> sqlite3.Connection:
96
94
  # Embedding client
97
95
  # ----------------------------------------------------------------------
98
96
 
99
- _http: httpx.AsyncClient | None = None
97
+ _embed: EmbedClient | None = None
100
98
 
101
99
 
102
- def _client() -> httpx.AsyncClient:
103
- global _http
104
- if _http is None:
105
- _http = httpx.AsyncClient(timeout=120.0)
106
- return _http
100
+ def _embed_client() -> EmbedClient:
101
+ """Lazily build the embed client so env vars are read at first use."""
102
+ global _embed
103
+ if _embed is None:
104
+ _embed = EmbedClient.from_env(
105
+ prefix="L4_",
106
+ default_url="http://nv-embed:8041/v1/embeddings",
107
+ )
108
+ return _embed
107
109
 
108
110
 
109
111
  async def _embed_batch(texts: list[str]) -> list[list[float]]:
110
- """Embed a batch of texts.
111
-
112
- Tries OpenAI-compatible shape first (POST <url>, Bearer auth,
113
- response data[i].embedding). On failure, falls back to the
114
- Pentatonic-AI gateway's native shape (POST .../v1/embed, X-API-Key
115
- auth, response embeddings[i]). When the gateway eventually adds an
116
- OpenAI-compat /v1/embeddings alias, the primary path will succeed
117
- and the fallback will never fire — no code change needed.
118
- """
119
- if not texts:
120
- return []
121
- payload = {"input": texts, "model": EMBED_MODEL_NAME}
122
- # Primary: OpenAI-compat
123
- try:
124
- resp = await _client().post(
125
- NV_EMBED_URL,
126
- headers=_openai_headers(),
127
- json=payload,
128
- timeout=120.0,
129
- )
130
- resp.raise_for_status()
131
- return [d["embedding"] for d in resp.json()["data"]]
132
- except Exception:
133
- pass
134
- # Fallback: lambda-gateway native shape
135
- fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
136
- resp = await _client().post(
137
- fallback_url,
138
- headers=_lambda_headers(),
139
- json=payload,
140
- timeout=120.0,
141
- )
142
- resp.raise_for_status()
143
- return resp.json()["embeddings"]
144
-
145
-
146
- def _openai_headers() -> dict:
147
- return {"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {}
148
-
149
-
150
- def _lambda_headers() -> dict:
151
- return {"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {}
112
+ """Embed a batch of texts via the shared EmbedClient."""
113
+ return await _embed_client().embed_batch_async(texts)
152
114
 
153
115
 
154
116
  # ----------------------------------------------------------------------
@@ -1,7 +1,9 @@
1
1
  FROM python:3.12-slim
2
2
  WORKDIR /app
3
3
  RUN pip install --no-cache-dir fastapi "uvicorn[standard]" httpx "pymilvus[milvus_lite]" "setuptools<70" pydantic
4
- COPY l5-comms-layer.py /app/server.py
4
+ # Shared embed_provider module (build context is engine/services).
5
+ COPY _shared /app/_shared
6
+ COPY l5/l5-comms-layer.py /app/server.py
5
7
  RUN mkdir -p /data
6
8
  ENV L5_DB_PATH=/data/comms.db
7
9
  EXPOSE 8034
@@ -23,6 +23,7 @@ import os
23
23
  import glob
24
24
  import hashlib
25
25
  import json
26
+ import sys
26
27
  import time
27
28
  from datetime import datetime
28
29
  from pathlib import Path
@@ -30,6 +31,10 @@ from pathlib import Path
30
31
  import httpx
31
32
  from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
32
33
 
34
+ # Shared embed client lives at engine/services/_shared/.
35
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
36
+ from _shared.embed_provider import EmbedClient # noqa: E402
37
+
33
38
  # --- Config ---
34
39
  DB_PATH = os.environ.get(
35
40
  "L5_DB_PATH",
@@ -43,43 +48,30 @@ PEOPLE_DIR = WORKSPACE / "memory" / "people"
43
48
  CONTACTS_DIR = WORKSPACE / "memory" / "contacts"
44
49
  MEMORY_DIR = WORKSPACE / "memory"
45
50
 
46
- NV_EMBED_URL = os.environ.get("L5_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
47
- # Embedding model name sent in /v1/embeddings request body. Defaults to
48
- # the production NV-Embed-v2 name; override when pointing at a different
49
- # OpenAI-compat endpoint (e.g. Ollama with nomic-embed-text).
50
- EMBED_MODEL_NAME = os.environ.get("L5_EMBED_MODEL", "nv-embed-v2")
51
- # Optional Authorization: Bearer <key> for the primary embedding endpoint.
52
- EMBED_API_KEY = os.environ.get("L5_EMBED_API_KEY", "")
51
+ _embed: EmbedClient | None = None
52
+
53
+
54
+ def _embed_client() -> EmbedClient:
55
+ """Lazily build the shared EmbedClient for L5."""
56
+ global _embed
57
+ if _embed is None:
58
+ _embed = EmbedClient.from_env(
59
+ prefix="L5_",
60
+ default_url="http://localhost:8041/v1/embeddings",
61
+ )
62
+ return _embed
63
+
53
64
 
54
65
  def _embed_post(texts):
55
- """POST to the configured embedding endpoint. Tries OpenAI-compat
56
- shape first; falls back to Pentatonic-AI lambda-gateway native shape
57
- on any failure. When the gateway adds an /v1/embeddings alias the
58
- primary path will succeed and the fallback never fires.
66
+ """Embed a batch of texts via the shared EmbedClient.
67
+
68
+ Provider profile (auth scheme + URL path + body/response shape) is
69
+ chosen by L5_EMBED_PROVIDER env var (openai | pentatonic-gateway |
70
+ cohere | custom). Auto-detects on 401 unless L5_EMBED_AUTODETECT=false.
59
71
 
60
72
  Returns: list[list[float]] (one embedding per input text).
61
73
  """
62
- payload = {"input": texts, "model": EMBED_MODEL_NAME}
63
- try:
64
- r = httpx.post(
65
- NV_EMBED_URL,
66
- headers={"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {},
67
- json=payload,
68
- timeout=120,
69
- )
70
- r.raise_for_status()
71
- return [d["embedding"] for d in r.json()["data"]]
72
- except Exception:
73
- pass
74
- fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
75
- r = httpx.post(
76
- fallback_url,
77
- headers={"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {},
78
- json=payload,
79
- timeout=120,
80
- )
81
- r.raise_for_status()
82
- return r.json()["embeddings"]
74
+ return _embed_client().embed_batch(texts)
83
75
 
84
76
  # Ollama fallback path. URL/model can be overridden so the L5 container can
85
77
  # reach an Ollama instance running on the docker host (host.docker.internal)
@@ -3,7 +3,9 @@ WORKDIR /app
3
3
  RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
4
4
  RUN pip install --no-cache-dir fastapi "uvicorn[standard]" httpx "pymilvus[milvus_lite]" "setuptools<70" pydantic spacy
5
5
  RUN python -m spacy download en_core_web_sm
6
- COPY l6-document-store.py /app/server.py
6
+ # Shared embed_provider module (build context is engine/services).
7
+ COPY _shared /app/_shared
8
+ COPY l6/l6-document-store.py /app/server.py
7
9
  RUN mkdir -p /data
8
10
  ENV L6_DATA_DIR=/data
9
11
  EXPOSE 8037
@@ -20,6 +20,7 @@ import logging
20
20
  import os
21
21
  import re
22
22
  import sqlite3
23
+ import sys
23
24
  import time
24
25
  from datetime import datetime, timezone
25
26
  from pathlib import Path
@@ -29,6 +30,10 @@ import httpx
29
30
  from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
30
31
  from pymilvus.milvus_client.index import IndexParams
31
32
 
33
+ # Shared embed client lives at engine/services/_shared/.
34
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
35
+ from _shared.embed_provider import EmbedClient # noqa: E402
36
+
32
37
  # ---------------------------------------------------------------------------
33
38
  # Config
34
39
  # ---------------------------------------------------------------------------
@@ -37,39 +42,29 @@ DATA_DIR = Path(os.environ.get("L6_DATA_DIR", str(Path.home() / "l6-document-sto
37
42
  MILVUS_DB = str(DATA_DIR / "documents.db")
38
43
  FTS_DB = str(DATA_DIR / "documents_fts.db")
39
44
  OLLAMA_URL = os.environ.get("L6_OLLAMA_URL", "http://localhost:11434")
40
- EMBED_MODEL = os.environ.get("L6_EMBED_MODEL", "nomic-embed-text")
41
- NV_EMBED_URL = os.environ.get("L6_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
42
45
  NV_EMBED_ENABLED = os.environ.get("L6_NV_EMBED_ENABLED", "true").lower() == "true"
43
46
  EMBED_DIM = int(os.environ.get("L6_EMBED_DIM", "4096"))
44
- # Optional Authorization: Bearer <key> for the embedding endpoint.
45
- EMBED_API_KEY = os.environ.get("L6_EMBED_API_KEY", "")
46
47
 
47
- def _embed_post(texts):
48
- """POST to embedding endpoint. Tries OpenAI-compat shape first;
49
- falls back to Pentatonic-AI lambda-gateway native shape on failure.
50
- See L4 / L5 for the same pattern."""
51
- import httpx as _httpx
52
- payload = {"input": texts, "model": EMBED_MODEL}
53
- try:
54
- r = _httpx.post(
55
- NV_EMBED_URL,
56
- headers={"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {},
57
- json=payload,
58
- timeout=120,
48
+ _embed: EmbedClient | None = None
49
+
50
+
51
+ def _embed_client() -> EmbedClient:
52
+ """Lazily build the shared EmbedClient for L6."""
53
+ global _embed
54
+ if _embed is None:
55
+ _embed = EmbedClient.from_env(
56
+ prefix="L6_",
57
+ default_url="http://localhost:8041/v1/embeddings",
58
+ default_model="nomic-embed-text",
59
59
  )
60
- r.raise_for_status()
61
- return [d["embedding"] for d in r.json()["data"]]
62
- except Exception:
63
- pass
64
- fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
65
- r = _httpx.post(
66
- fallback_url,
67
- headers={"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {},
68
- json=payload,
69
- timeout=120,
70
- )
71
- r.raise_for_status()
72
- return r.json()["embeddings"]
60
+ return _embed
61
+
62
+
63
+ def _embed_post(texts):
64
+ """Embed a batch of texts via the shared EmbedClient. Provider profile
65
+ chosen by L6_EMBED_PROVIDER env (openai | pentatonic-gateway | cohere
66
+ | custom). See engine/services/_shared/embed_provider.py for details."""
67
+ return _embed_client().embed_batch(texts)
73
68
 
74
69
  COLLECTION_NAME = "documents"
75
70
  RRF_K = 60