@pentatonic-ai/ai-agent-sdk 0.7.13 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.7.13",
3
+ "version": "0.8.0",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -2,7 +2,7 @@
2
2
  "id": "pentatonic-memory",
3
3
  "name": "Pentatonic Memory",
4
4
  "description": "Persistent, searchable memory with multi-signal retrieval and HyDE query expansion. Local (Docker + Ollama) or hosted (Pentatonic TES).",
5
- "version": "0.5.3",
5
+ "version": "0.8.4",
6
6
  "kind": "context-engine",
7
7
  "configSchema": {
8
8
  "type": "object",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/openclaw-memory-plugin",
3
- "version": "0.8.3",
3
+ "version": "0.8.4",
4
4
  "description": "Pentatonic Memory plugin for OpenClaw — persistent, searchable memory with multi-signal retrieval and HyDE query expansion",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -88,8 +88,8 @@ services:
88
88
  l4:
89
89
  <<: *engine-base
90
90
  build:
91
- context: ./engine/services/l4
92
- dockerfile: Dockerfile
91
+ context: ./engine/services
92
+ dockerfile: l4/Dockerfile
93
93
  container_name: pme-l4
94
94
  # Default 18042 to avoid port collisions on 8042.
95
95
  # Override via PME_L4_PORT for bench setups that intentionally replace it.
@@ -98,6 +98,8 @@ services:
98
98
  L4_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
99
99
  L4_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
100
100
  L4_EMBED_API_KEY: ${EMBED_API_KEY:-}
101
+ L4_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
102
+ L4_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
101
103
  L4_EMBED_DIM: ${EMBED_DIM:-4096}
102
104
  L4_DB_PATH: /data/vec.db
103
105
  extra_hosts:
@@ -116,8 +118,8 @@ services:
116
118
  l5:
117
119
  <<: *engine-base
118
120
  build:
119
- context: ./engine/services/l5
120
- dockerfile: Dockerfile
121
+ context: ./engine/services
122
+ dockerfile: l5/Dockerfile
121
123
  container_name: pme-l5
122
124
  # Default 18034 to avoid port collisions on 8034.
123
125
  # Override via PME_L5_PORT for bench setups that intentionally replace it.
@@ -126,6 +128,8 @@ services:
126
128
  L5_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
127
129
  L5_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
128
130
  L5_EMBED_API_KEY: ${EMBED_API_KEY:-}
131
+ L5_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
132
+ L5_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
129
133
  L5_EMBED_DIM: ${EMBED_DIM:-4096}
130
134
  L5_OLLAMA_DIM: ${OLLAMA_DIM:-768}
131
135
  L5_OLLAMA_EMBED_URL: ${L5_OLLAMA_EMBED_URL:-http://host.docker.internal:11434/api/embed}
@@ -143,8 +147,8 @@ services:
143
147
  l6:
144
148
  <<: *engine-base
145
149
  build:
146
- context: ./engine/services/l6
147
- dockerfile: Dockerfile
150
+ context: ./engine/services
151
+ dockerfile: l6/Dockerfile
148
152
  container_name: pme-l6
149
153
  # Default 18037 to avoid colliding with Spark Core L6 doc-store on 8037.
150
154
  # Override via PME_L6_PORT for bench setups that intentionally replace it.
@@ -153,6 +157,8 @@ services:
153
157
  L6_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
154
158
  L6_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
155
159
  L6_EMBED_API_KEY: ${EMBED_API_KEY:-}
160
+ L6_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
161
+ L6_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
156
162
  L6_EMBED_DIM: ${EMBED_DIM:-4096}
157
163
  L6_DATA_DIR: /data
158
164
  extra_hosts:
@@ -166,12 +172,16 @@ services:
166
172
  l2:
167
173
  <<: *engine-base
168
174
  build:
169
- context: ./engine/services/l2
170
- dockerfile: Dockerfile
175
+ context: ./engine/services
176
+ dockerfile: l2/Dockerfile
171
177
  container_name: pme-l2
172
178
  ports: ["127.0.0.1:${PME_L2_PORT:-8131}:8031"]
173
179
  environment:
174
180
  PME_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
181
+ PME_EMBED_API_KEY: ${EMBED_API_KEY:-}
182
+ PME_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
183
+ PME_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
184
+ PME_NV_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
175
185
  PME_NEO4J_URI: bolt://l3:7687
176
186
  PME_NEO4J_PASSWORD: ${NEO4J_PASSWORD:-local-dev-pw}
177
187
  NEO4J_PASSWORD: ${NEO4J_PASSWORD:-local-dev-pw}
@@ -0,0 +1 @@
1
+ """Shared utilities used across the memory-engine layer services."""
@@ -0,0 +1,431 @@
1
+ """Embedding provider abstraction for memory-engine layer services.
2
+
3
+ Different embedding gateways disagree on three things:
4
+ 1. Auth scheme (Authorization: Bearer ... vs X-API-Key: ...)
5
+ 2. URL path (/v1/embeddings vs /v1/embed vs vendor-specific)
6
+ 3. Request shape (OpenAI {"input": [...]} vs Cohere {"texts": [...], "input_type": ...})
7
+ 4. Response shape ({"data": [{"embedding": ...}]} vs {"embeddings": [...]})
8
+
9
+ `EmbedProvider` captures all four for one gateway. Built-in profiles cover
10
+ OpenAI-compatible, Pentatonic AI Gateway, and Cohere. A "custom" profile lets
11
+ you override auth + path via env without code changes.
12
+
13
+ Auto-detect: when the configured provider returns 401, the client probes the
14
+ other built-in profiles once and caches the winner for the rest of the
15
+ process. Disabled with `EMBED_AUTODETECT=false` per layer.
16
+
17
+ Usage from a layer service:
18
+
19
+ from _shared.embed_provider import EmbedClient
20
+
21
+ embed = EmbedClient.from_env(prefix="L4_")
22
+ vectors = embed.embed_batch(["hello", "world"])
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import logging
28
+ import os
29
+ from dataclasses import dataclass
30
+ from typing import Any, Callable
31
+ from urllib.parse import urlparse, urlunparse
32
+
33
+ import httpx
34
+
35
+ log = logging.getLogger("embed_provider")
36
+
37
+
38
+ # ----------------------------------------------------------------------
39
+ # Body builders + response parsers
40
+ # ----------------------------------------------------------------------
41
+
42
+ def _openai_body(texts: list[str], model: str) -> dict[str, Any]:
43
+ return {"input": texts, "model": model}
44
+
45
+
46
+ def _openai_response(payload: dict[str, Any]) -> list[list[float]]:
47
+ return [d["embedding"] for d in payload["data"]]
48
+
49
+
50
+ def _pentatonic_response(payload: dict[str, Any]) -> list[list[float]]:
51
+ """Pentatonic AI Gateway returns {"embeddings": [...]} on /v1/embed
52
+ and OpenAI-shaped {"data": [{"embedding": ...}]} on /v1/embeddings.
53
+ Accept either."""
54
+ if "data" in payload:
55
+ return [d["embedding"] for d in payload["data"]]
56
+ return payload["embeddings"]
57
+
58
+
59
+ def _cohere_body(texts: list[str], model: str) -> dict[str, Any]:
60
+ return {"texts": texts, "model": model, "input_type": "search_document"}
61
+
62
+
63
+ def _cohere_response(payload: dict[str, Any]) -> list[list[float]]:
64
+ return payload["embeddings"]
65
+
66
+
67
+ _BODY_BUILDERS: dict[str, Callable[[list[str], str], dict[str, Any]]] = {
68
+ "openai": _openai_body,
69
+ "cohere": _cohere_body,
70
+ }
71
+
72
+ _RESPONSE_PARSERS: dict[str, Callable[[dict[str, Any]], list[list[float]]]] = {
73
+ "openai": _openai_response,
74
+ "pentatonic": _pentatonic_response,
75
+ "cohere": _cohere_response,
76
+ }
77
+
78
+
79
+ # ----------------------------------------------------------------------
80
+ # Provider profiles
81
+ # ----------------------------------------------------------------------
82
+
83
+ @dataclass(frozen=True)
84
+ class EmbedProvider:
85
+ name: str
86
+ auth_header: str
87
+ auth_format: str # f-string-ish; "{key}" placeholder substituted at request time
88
+ path_default: str
89
+ body_builder: Callable[[list[str], str], dict[str, Any]]
90
+ response_parser: Callable[[dict[str, Any]], list[list[float]]]
91
+
92
+
93
+ PROVIDERS: dict[str, EmbedProvider] = {
94
+ "openai": EmbedProvider(
95
+ name="openai",
96
+ auth_header="Authorization",
97
+ auth_format="Bearer {key}",
98
+ path_default="/v1/embeddings",
99
+ body_builder=_openai_body,
100
+ response_parser=_openai_response,
101
+ ),
102
+ "pentatonic-gateway": EmbedProvider(
103
+ name="pentatonic-gateway",
104
+ auth_header="X-API-Key",
105
+ auth_format="{key}",
106
+ path_default="/v1/embed",
107
+ body_builder=_openai_body,
108
+ response_parser=_pentatonic_response,
109
+ ),
110
+ "cohere": EmbedProvider(
111
+ name="cohere",
112
+ auth_header="Authorization",
113
+ auth_format="Bearer {key}",
114
+ path_default="/v1/embed",
115
+ body_builder=_cohere_body,
116
+ response_parser=_cohere_response,
117
+ ),
118
+ }
119
+
120
+
121
+ def _build_custom_provider(env_prefix: str) -> EmbedProvider:
122
+ """Build a custom provider from env vars.
123
+
124
+ Env vars (env_prefix is e.g. 'L4_'):
125
+ {prefix}EMBED_AUTH_HEADER default Authorization
126
+ {prefix}EMBED_AUTH_FORMAT default Bearer {key}
127
+ {prefix}EMBED_PATH_DEFAULT default /v1/embeddings
128
+ {prefix}EMBED_BODY_SHAPE "openai" | "cohere" default openai
129
+ {prefix}EMBED_RESPONSE_SHAPE "openai" | "pentatonic" | "cohere" default openai
130
+ """
131
+ body_shape = os.environ.get(f"{env_prefix}EMBED_BODY_SHAPE", "openai")
132
+ response_shape = os.environ.get(f"{env_prefix}EMBED_RESPONSE_SHAPE", "openai")
133
+ return EmbedProvider(
134
+ name="custom",
135
+ auth_header=os.environ.get(f"{env_prefix}EMBED_AUTH_HEADER", "Authorization"),
136
+ auth_format=os.environ.get(f"{env_prefix}EMBED_AUTH_FORMAT", "Bearer {key}"),
137
+ path_default=os.environ.get(f"{env_prefix}EMBED_PATH_DEFAULT", "/v1/embeddings"),
138
+ body_builder=_BODY_BUILDERS.get(body_shape, _openai_body),
139
+ response_parser=_RESPONSE_PARSERS.get(response_shape, _openai_response),
140
+ )
141
+
142
+
143
+ def resolve_provider(name: str, env_prefix: str = "") -> EmbedProvider:
144
+ """Look up a built-in provider, or build a custom one from env."""
145
+ if name in PROVIDERS:
146
+ return PROVIDERS[name]
147
+ if name == "custom":
148
+ return _build_custom_provider(env_prefix)
149
+ raise ValueError(
150
+ f"Unknown EMBED_PROVIDER {name!r}. "
151
+ f"Built-ins: {sorted(PROVIDERS)} or 'custom'."
152
+ )
153
+
154
+
155
+ # ----------------------------------------------------------------------
156
+ # URL helpers
157
+ # ----------------------------------------------------------------------
158
+
159
+ def _swap_path(url: str, new_path: str) -> str:
160
+ """Replace the path component of `url` with `new_path`. Empty path on the
161
+ input URL is treated as 'no path provided' and gets `new_path` appended."""
162
+ p = urlparse(url)
163
+ return urlunparse((p.scheme, p.netloc, new_path, "", p.query, p.fragment))
164
+
165
+
166
+ def _resolved_url(url: str, provider: EmbedProvider) -> str:
167
+ """If `url` has no path, append the provider's default path; otherwise
168
+ leave as-is (operator chose a specific path)."""
169
+ p = urlparse(url)
170
+ if not p.path or p.path == "/":
171
+ return _swap_path(url, provider.path_default)
172
+ return url
173
+
174
+
175
+ # ----------------------------------------------------------------------
176
+ # Client
177
+ # ----------------------------------------------------------------------
178
+
179
+ class EmbedAuthError(RuntimeError):
180
+ """Raised when every candidate provider returns 401."""
181
+
182
+
183
+ class EmbedHTTPError(RuntimeError):
184
+ """Raised on non-401 HTTP errors from the embedding endpoint."""
185
+
186
+ def __init__(self, status: int, body: str):
187
+ super().__init__(f"HTTP {status}: {body[:200]}")
188
+ self.status = status
189
+ self.body = body
190
+
191
+
192
+ class EmbedClient:
193
+ """Sync + async embedding client with provider auto-detection.
194
+
195
+ Construct via `EmbedClient.from_env(prefix="L4_")` so each layer service
196
+ reads its own env-var namespace; or call the constructor directly for
197
+ tests.
198
+
199
+ The client is stateful: after a successful auto-detect, the winning
200
+ provider is cached for the rest of the process lifetime. Set
201
+ `autodetect=False` (or env `{prefix}EMBED_AUTODETECT=false`) to disable.
202
+ """
203
+
204
+ def __init__(
205
+ self,
206
+ *,
207
+ url: str,
208
+ api_key: str,
209
+ model: str,
210
+ provider: EmbedProvider,
211
+ autodetect: bool = True,
212
+ timeout: float = 120.0,
213
+ env_prefix: str = "",
214
+ ) -> None:
215
+ self._configured_provider = provider
216
+ self._provider = provider
217
+ self._configured_url = url
218
+ self._url = _resolved_url(url, provider)
219
+ self._api_key = api_key
220
+ self._model = model
221
+ self._timeout = timeout
222
+ self._autodetect = autodetect
223
+ self._env_prefix = env_prefix
224
+ self._detected = False
225
+
226
+ # ------------------------------------------------------------------
227
+ # Construction
228
+ # ------------------------------------------------------------------
229
+
230
+ @classmethod
231
+ def from_env(
232
+ cls,
233
+ prefix: str,
234
+ *,
235
+ url_var: str | None = None,
236
+ key_var: str | None = None,
237
+ model_var: str | None = None,
238
+ default_url: str = "",
239
+ default_model: str = "nv-embed-v2",
240
+ ) -> "EmbedClient":
241
+ """Build an EmbedClient from layer-prefixed env vars.
242
+
243
+ Layer services use `EmbedClient.from_env(prefix="L4_")` and the
244
+ client reads:
245
+ {prefix}NV_EMBED_URL (override with `url_var=...`)
246
+ {prefix}EMBED_API_KEY
247
+ {prefix}EMBED_MODEL
248
+ {prefix}EMBED_PROVIDER default 'openai'
249
+ {prefix}EMBED_AUTODETECT default 'true'
250
+ {prefix}EMBED_TIMEOUT default '120'
251
+ """
252
+ url_var = url_var or f"{prefix}NV_EMBED_URL"
253
+ key_var = key_var or f"{prefix}EMBED_API_KEY"
254
+ model_var = model_var or f"{prefix}EMBED_MODEL"
255
+
256
+ url = os.environ.get(url_var, default_url)
257
+ api_key = os.environ.get(key_var, "")
258
+ model = os.environ.get(model_var, default_model)
259
+ provider_name = os.environ.get(f"{prefix}EMBED_PROVIDER", "openai")
260
+ autodetect = os.environ.get(f"{prefix}EMBED_AUTODETECT", "true").lower() == "true"
261
+ timeout = float(os.environ.get(f"{prefix}EMBED_TIMEOUT", "120"))
262
+
263
+ provider = resolve_provider(provider_name, env_prefix=prefix)
264
+ return cls(
265
+ url=url,
266
+ api_key=api_key,
267
+ model=model,
268
+ provider=provider,
269
+ autodetect=autodetect,
270
+ timeout=timeout,
271
+ env_prefix=prefix,
272
+ )
273
+
274
+ # ------------------------------------------------------------------
275
+ # Internals
276
+ # ------------------------------------------------------------------
277
+
278
+ def _headers(self, provider: EmbedProvider) -> dict[str, str]:
279
+ if not self._api_key:
280
+ return {}
281
+ return {provider.auth_header: provider.auth_format.format(key=self._api_key)}
282
+
283
+ def _candidate_url(self, provider: EmbedProvider) -> str:
284
+ """URL to try for this provider. The configured provider keeps the
285
+ operator's chosen URL; auto-detect candidates swap in their own
286
+ path_default since the operator's path was wrong for them."""
287
+ if provider.name == self._configured_provider.name:
288
+ return self._url
289
+ return _swap_path(self._configured_url, provider.path_default)
290
+
291
+ def _switch_to(self, provider: EmbedProvider) -> None:
292
+ """Make `provider` the active provider for future requests."""
293
+ log.warning(
294
+ "embed_provider auto-detect switched: configured=%s -> active=%s. "
295
+ "Set %sEMBED_PROVIDER=%s to silence this.",
296
+ self._configured_provider.name,
297
+ provider.name,
298
+ self._env_prefix,
299
+ provider.name,
300
+ )
301
+ self._provider = provider
302
+ self._url = self._candidate_url(provider)
303
+ self._detected = True
304
+
305
+ # ------------------------------------------------------------------
306
+ # Sync API
307
+ # ------------------------------------------------------------------
308
+
309
+ def embed_batch(self, texts: list[str]) -> list[list[float]]:
310
+ """Embed a list of texts. Empty list returns empty list."""
311
+ if not texts:
312
+ return []
313
+ return self._post_with_autodetect(texts, async_mode=False)
314
+
315
+ def embed_one(self, text: str) -> list[float]:
316
+ return self.embed_batch([text])[0]
317
+
318
+ # ------------------------------------------------------------------
319
+ # Async API
320
+ # ------------------------------------------------------------------
321
+
322
+ async def embed_batch_async(self, texts: list[str]) -> list[list[float]]:
323
+ if not texts:
324
+ return []
325
+ return await self._post_with_autodetect_async(texts)
326
+
327
+ async def embed_one_async(self, text: str) -> list[float]:
328
+ out = await self.embed_batch_async([text])
329
+ return out[0]
330
+
331
+ # ------------------------------------------------------------------
332
+ # Request paths
333
+ # ------------------------------------------------------------------
334
+
335
+ def _post_with_autodetect(self, texts: list[str], *, async_mode: bool) -> list[list[float]]:
336
+ del async_mode # kept for symmetry; sync path is its own method
337
+ body = self._provider.body_builder(texts, self._model)
338
+ headers = self._headers(self._provider)
339
+ try:
340
+ r = httpx.post(self._url, json=body, headers=headers, timeout=self._timeout)
341
+ except httpx.HTTPError as exc:
342
+ raise EmbedHTTPError(0, str(exc)) from exc
343
+
344
+ if r.status_code == 401 and self._autodetect and not self._detected:
345
+ return self._autodetect_and_retry(texts, last_body=r.text)
346
+
347
+ if r.status_code == 401:
348
+ raise EmbedAuthError(r.text)
349
+ if not r.is_success:
350
+ raise EmbedHTTPError(r.status_code, r.text)
351
+ return self._provider.response_parser(r.json())
352
+
353
+ async def _post_with_autodetect_async(self, texts: list[str]) -> list[list[float]]:
354
+ body = self._provider.body_builder(texts, self._model)
355
+ headers = self._headers(self._provider)
356
+ try:
357
+ async with httpx.AsyncClient(timeout=self._timeout) as client:
358
+ r = await client.post(self._url, json=body, headers=headers)
359
+ except httpx.HTTPError as exc:
360
+ raise EmbedHTTPError(0, str(exc)) from exc
361
+
362
+ if r.status_code == 401 and self._autodetect and not self._detected:
363
+ return await self._autodetect_and_retry_async(texts, last_body=r.text)
364
+
365
+ if r.status_code == 401:
366
+ raise EmbedAuthError(r.text)
367
+ if not r.is_success:
368
+ raise EmbedHTTPError(r.status_code, r.text)
369
+ return self._provider.response_parser(r.json())
370
+
371
+ # ------------------------------------------------------------------
372
+ # Auto-detect
373
+ # ------------------------------------------------------------------
374
+
375
+ def _candidates(self) -> list[EmbedProvider]:
376
+ return [p for p in PROVIDERS.values() if p.name != self._provider.name]
377
+
378
+ def _autodetect_and_retry(self, texts: list[str], *, last_body: str) -> list[list[float]]:
379
+ for candidate in self._candidates():
380
+ url = self._candidate_url(candidate)
381
+ body = candidate.body_builder(texts, self._model)
382
+ headers = (
383
+ {candidate.auth_header: candidate.auth_format.format(key=self._api_key)}
384
+ if self._api_key
385
+ else {}
386
+ )
387
+ try:
388
+ r = httpx.post(url, json=body, headers=headers, timeout=self._timeout)
389
+ except httpx.HTTPError:
390
+ continue
391
+ if r.is_success:
392
+ self._switch_to(candidate)
393
+ return candidate.response_parser(r.json())
394
+ raise EmbedAuthError(
395
+ f"all providers returned auth or transport errors. "
396
+ f"last 401 body: {last_body[:200]}"
397
+ )
398
+
399
+ async def _autodetect_and_retry_async(self, texts: list[str], *, last_body: str) -> list[list[float]]:
400
+ for candidate in self._candidates():
401
+ url = self._candidate_url(candidate)
402
+ body = candidate.body_builder(texts, self._model)
403
+ headers = (
404
+ {candidate.auth_header: candidate.auth_format.format(key=self._api_key)}
405
+ if self._api_key
406
+ else {}
407
+ )
408
+ try:
409
+ async with httpx.AsyncClient(timeout=self._timeout) as client:
410
+ r = await client.post(url, json=body, headers=headers)
411
+ except httpx.HTTPError:
412
+ continue
413
+ if r.is_success:
414
+ self._switch_to(candidate)
415
+ return candidate.response_parser(r.json())
416
+ raise EmbedAuthError(
417
+ f"all providers returned auth or transport errors. "
418
+ f"last 401 body: {last_body[:200]}"
419
+ )
420
+
421
+ # ------------------------------------------------------------------
422
+ # Introspection (handy for /health and tests)
423
+ # ------------------------------------------------------------------
424
+
425
+ @property
426
+ def active_provider(self) -> str:
427
+ return self._provider.name
428
+
429
+ @property
430
+ def active_url(self) -> str:
431
+ return self._url
@@ -15,8 +15,10 @@ RUN pip install --no-cache-dir \
15
15
  "sentence-transformers" \
16
16
  "torch" --extra-index-url https://download.pytorch.org/whl/cpu
17
17
 
18
- COPY l2-hybridrag-proxy.py /app/server.py
19
- COPY init_databases.py /app/init_databases.py
18
+ # Shared embed_provider module (build context is engine/services).
19
+ COPY _shared /app/_shared
20
+ COPY l2/l2-hybridrag-proxy.py /app/server.py
21
+ COPY l2/init_databases.py /app/init_databases.py
20
22
 
21
23
  # Pre-create empty L0 BM25 SQLite + empty workspace
22
24
  RUN mkdir -p /data/workspace /data/.pentatonic/memory /data/.cache && \
@@ -17,6 +17,7 @@ import json
17
17
  import logging
18
18
  import os
19
19
  import sqlite3
20
+ import sys
20
21
  import time
21
22
  from datetime import datetime
22
23
  from pathlib import Path
@@ -30,6 +31,10 @@ from neo4j.time import DateTime as Neo4jDateTime, Date as Neo4jDate
30
31
  from pydantic import BaseModel
31
32
  import uvicorn
32
33
 
34
+ # Shared embed client lives at engine/services/_shared/.
35
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
36
+ from _shared.embed_provider import EmbedClient # noqa: E402
37
+
33
38
 
34
39
  def _serialize_neo4j_value(v: Any) -> Any:
35
40
  """Convert neo4j-specific types to JSON-serialisable equivalents.
@@ -93,10 +98,27 @@ QMD_DB_PATH = _resolve_qmd_db()
93
98
  OLLAMA_URL = os.environ.get("PME_OLLAMA_URL", "http://localhost:11434/api/embeddings")
94
99
  EMBEDDING_MODEL = os.environ.get("PME_EMBED_MODEL", "nomic-embed-text")
95
100
 
96
- # NV-Embed-v2 service (primary, 4096-dim)
97
- NV_EMBED_URL = os.environ.get("PME_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
101
+ # NV-Embed-v2 service (primary, 4096-dim). URL/auth/path/body/response are
102
+ # managed by the shared EmbedClient; PME_EMBED_PROVIDER (default openai)
103
+ # selects auth scheme (Bearer vs X-API-Key) and request shape.
98
104
  NV_EMBED_ENABLED = os.environ.get("PME_NV_EMBED_ENABLED", "true").lower() == "true"
99
105
 
106
+ _embed: EmbedClient | None = None
107
+
108
+
109
+ def _embed_client() -> EmbedClient:
110
+ """Lazily build the shared EmbedClient for L2."""
111
+ global _embed
112
+ if _embed is None:
113
+ _embed = EmbedClient.from_env(
114
+ prefix="PME_",
115
+ url_var="PME_NV_EMBED_URL",
116
+ key_var="PME_EMBED_API_KEY",
117
+ model_var="PME_NV_EMBED_MODEL",
118
+ default_url="http://localhost:8041/v1/embeddings",
119
+ )
120
+ return _embed
121
+
100
122
  # Sequential processing weights - OPTIMIZED FOR QUALITY
101
123
  GRAPH_PRIORITY_BOOST = 0.5 # Extra score for graph-derived results (↑ for better entity/relationship context)
102
124
  VECTOR_BASE_WEIGHT = 0.5 # Base weight for vector results (↓ balanced for accuracy over speed)
@@ -389,12 +411,11 @@ def search_neo4j_sequential(query: str, entities: List[str], limit: int = 12) ->
389
411
 
390
412
  def get_embedding(text: str) -> List[float]:
391
413
  """Get embedding — tries NV-Embed-v2 (4096-dim) first, falls back to Ollama."""
392
- # Try NV-Embed-v2 service first
414
+ # Try NV-Embed-v2 service first via the shared EmbedClient (handles
415
+ # provider selection, auth scheme, path, and 401 auto-detect).
393
416
  if NV_EMBED_ENABLED:
394
417
  try:
395
- r = requests.post(NV_EMBED_URL, json={"input": text}, timeout=30)
396
- r.raise_for_status()
397
- return r.json()["data"][0]["embedding"]
418
+ return _embed_client().embed_one(text)
398
419
  except Exception as e:
399
420
  log.warning(f"NV-Embed-v2 failed, falling back to Ollama: {e}")
400
421
 
@@ -1073,17 +1094,23 @@ async def list_models() -> dict:
1073
1094
  @app.post("/v1/embeddings")
1074
1095
  async def create_embeddings(request: EmbeddingRequest) -> dict:
1075
1096
  """Pass-through to NV-Embed-v2 (4096-dim). Batch-native — forwards the full
1076
- input list in a single HTTP call instead of looping one-at-a-time."""
1097
+ input list in a single HTTP call instead of looping one-at-a-time.
1098
+
1099
+ Returns OpenAI-shaped response regardless of upstream provider, so
1100
+ callers (including L4 search and external clients) get a consistent
1101
+ contract from this proxy."""
1077
1102
  try:
1078
- import httpx
1079
1103
  inputs = [request.input] if isinstance(request.input, str) else request.input
1080
- async with httpx.AsyncClient(timeout=60) as client:
1081
- resp = await client.post(
1082
- NV_EMBED_URL,
1083
- json={"input": inputs, "model": request.model or "nv-embed-v2"}
1084
- )
1085
- resp.raise_for_status()
1086
- return resp.json()
1104
+ embeddings = await _embed_client().embed_batch_async(inputs)
1105
+ return {
1106
+ "object": "list",
1107
+ "model": request.model or "nv-embed-v2",
1108
+ "data": [
1109
+ {"object": "embedding", "embedding": e, "index": i}
1110
+ for i, e in enumerate(embeddings)
1111
+ ],
1112
+ "usage": {"prompt_tokens": 0, "total_tokens": 0},
1113
+ }
1087
1114
  except Exception as e:
1088
1115
  raise HTTPException(status_code=500, detail=str(e))
1089
1116
 
@@ -1319,17 +1346,11 @@ def _extract_entities_for_kg(text: str, max_entities: int = 32) -> List[str]:
1319
1346
 
1320
1347
 
1321
1348
  def _embed_batch_local(texts: List[str]) -> List[List[float]]:
1322
- """Batch embed via NV-Embed. Returns vectors in input order."""
1349
+ """Batch embed via the shared EmbedClient. Returns vectors in input order."""
1323
1350
  if not texts:
1324
1351
  return []
1325
1352
  try:
1326
- r = requests.post(NV_EMBED_URL,
1327
- json={"input": texts, "model": "nv-embed-v2"},
1328
- timeout=120)
1329
- r.raise_for_status()
1330
- data = r.json().get("data", [])
1331
- # NV-Embed returns [{embedding: [...]}, ...]
1332
- return [d["embedding"] for d in data]
1353
+ return _embed_client().embed_batch(texts)
1333
1354
  except Exception as e:
1334
1355
  log.warning(f"NV-Embed batch failed: {e}; trying singletons")
1335
1356
  return [get_embedding(t) for t in texts]
@@ -4,7 +4,11 @@ WORKDIR /app
4
4
 
5
5
  RUN pip install --no-cache-dir fastapi 'uvicorn[standard]' httpx pydantic
6
6
 
7
- COPY server.py /app/server.py
7
+ # Build context is engine/services so the shared embed_provider module is
8
+ # COPYable. server.py adds engine/services to sys.path at startup, then
9
+ # imports from `_shared.embed_provider`.
10
+ COPY _shared /app/_shared
11
+ COPY l4/server.py /app/server.py
8
12
 
9
13
  RUN mkdir -p /data
10
14
  ENV L4_DB_PATH=/data/vec.db
@@ -23,27 +23,25 @@ import hashlib
23
23
  import os
24
24
  import sqlite3
25
25
  import struct
26
+ import sys
26
27
  import time
27
28
  from pathlib import Path
28
29
  from typing import Any
29
30
 
30
- import httpx
31
31
  from fastapi import FastAPI, HTTPException
32
32
  from pydantic import BaseModel
33
33
 
34
+ # Shared embedding client lives at engine/services/_shared/. Add the parent of
35
+ # the service dir to sys.path so `from _shared.embed_provider import ...` works
36
+ # regardless of how the service is launched (uvicorn, python server.py, etc.).
37
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
38
+ from _shared.embed_provider import EmbedClient # noqa: E402
39
+
34
40
  # ----------------------------------------------------------------------
35
41
  # Config
36
42
  # ----------------------------------------------------------------------
37
43
 
38
44
  DB_PATH = os.environ.get("L4_DB_PATH", "/data/vec.db")
39
- NV_EMBED_URL = os.environ.get("L4_NV_EMBED_URL", "http://nv-embed:8041/v1/embeddings")
40
- # Embedding model name sent in /v1/embeddings request body. Defaults to
41
- # the production NV-Embed-v2 name; override via env when pointing at a
42
- # different OpenAI-compat endpoint (e.g. Ollama with nomic-embed-text).
43
- EMBED_MODEL_NAME = os.environ.get("L4_EMBED_MODEL", "nv-embed-v2")
44
- # Optional Authorization: Bearer <key> for the embedding endpoint.
45
- # Set when calling a hosted gateway (e.g. pentatonic-ai-gateway). Empty = no auth.
46
- EMBED_API_KEY = os.environ.get("L4_EMBED_API_KEY", "")
47
45
  EMBED_DIM = int(os.environ.get("L4_EMBED_DIM", "4096"))
48
46
 
49
47
 
@@ -96,59 +94,23 @@ def _get_db() -> sqlite3.Connection:
96
94
  # Embedding client
97
95
  # ----------------------------------------------------------------------
98
96
 
99
- _http: httpx.AsyncClient | None = None
97
+ _embed: EmbedClient | None = None
100
98
 
101
99
 
102
- def _client() -> httpx.AsyncClient:
103
- global _http
104
- if _http is None:
105
- _http = httpx.AsyncClient(timeout=120.0)
106
- return _http
100
+ def _embed_client() -> EmbedClient:
101
+ """Lazily build the embed client so env vars are read at first use."""
102
+ global _embed
103
+ if _embed is None:
104
+ _embed = EmbedClient.from_env(
105
+ prefix="L4_",
106
+ default_url="http://nv-embed:8041/v1/embeddings",
107
+ )
108
+ return _embed
107
109
 
108
110
 
109
111
  async def _embed_batch(texts: list[str]) -> list[list[float]]:
110
- """Embed a batch of texts.
111
-
112
- Tries OpenAI-compatible shape first (POST <url>, Bearer auth,
113
- response data[i].embedding). On failure, falls back to the
114
- Pentatonic-AI gateway's native shape (POST .../v1/embed, X-API-Key
115
- auth, response embeddings[i]). When the gateway eventually adds an
116
- OpenAI-compat /v1/embeddings alias, the primary path will succeed
117
- and the fallback will never fire — no code change needed.
118
- """
119
- if not texts:
120
- return []
121
- payload = {"input": texts, "model": EMBED_MODEL_NAME}
122
- # Primary: OpenAI-compat
123
- try:
124
- resp = await _client().post(
125
- NV_EMBED_URL,
126
- headers=_openai_headers(),
127
- json=payload,
128
- timeout=120.0,
129
- )
130
- resp.raise_for_status()
131
- return [d["embedding"] for d in resp.json()["data"]]
132
- except Exception:
133
- pass
134
- # Fallback: lambda-gateway native shape
135
- fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
136
- resp = await _client().post(
137
- fallback_url,
138
- headers=_lambda_headers(),
139
- json=payload,
140
- timeout=120.0,
141
- )
142
- resp.raise_for_status()
143
- return resp.json()["embeddings"]
144
-
145
-
146
- def _openai_headers() -> dict:
147
- return {"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {}
148
-
149
-
150
- def _lambda_headers() -> dict:
151
- return {"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {}
112
+ """Embed a batch of texts via the shared EmbedClient."""
113
+ return await _embed_client().embed_batch_async(texts)
152
114
 
153
115
 
154
116
  # ----------------------------------------------------------------------
@@ -1,7 +1,9 @@
1
1
  FROM python:3.12-slim
2
2
  WORKDIR /app
3
3
  RUN pip install --no-cache-dir fastapi "uvicorn[standard]" httpx "pymilvus[milvus_lite]" "setuptools<70" pydantic
4
- COPY l5-comms-layer.py /app/server.py
4
+ # Shared embed_provider module (build context is engine/services).
5
+ COPY _shared /app/_shared
6
+ COPY l5/l5-comms-layer.py /app/server.py
5
7
  RUN mkdir -p /data
6
8
  ENV L5_DB_PATH=/data/comms.db
7
9
  EXPOSE 8034
@@ -23,6 +23,7 @@ import os
23
23
  import glob
24
24
  import hashlib
25
25
  import json
26
+ import sys
26
27
  import time
27
28
  from datetime import datetime
28
29
  from pathlib import Path
@@ -30,6 +31,10 @@ from pathlib import Path
30
31
  import httpx
31
32
  from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
32
33
 
34
+ # Shared embed client lives at engine/services/_shared/.
35
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
36
+ from _shared.embed_provider import EmbedClient # noqa: E402
37
+
33
38
  # --- Config ---
34
39
  DB_PATH = os.environ.get(
35
40
  "L5_DB_PATH",
@@ -43,43 +48,30 @@ PEOPLE_DIR = WORKSPACE / "memory" / "people"
43
48
  CONTACTS_DIR = WORKSPACE / "memory" / "contacts"
44
49
  MEMORY_DIR = WORKSPACE / "memory"
45
50
 
46
- NV_EMBED_URL = os.environ.get("L5_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
47
- # Embedding model name sent in /v1/embeddings request body. Defaults to
48
- # the production NV-Embed-v2 name; override when pointing at a different
49
- # OpenAI-compat endpoint (e.g. Ollama with nomic-embed-text).
50
- EMBED_MODEL_NAME = os.environ.get("L5_EMBED_MODEL", "nv-embed-v2")
51
- # Optional Authorization: Bearer <key> for the primary embedding endpoint.
52
- EMBED_API_KEY = os.environ.get("L5_EMBED_API_KEY", "")
51
+ _embed: EmbedClient | None = None
52
+
53
+
54
+ def _embed_client() -> EmbedClient:
55
+ """Lazily build the shared EmbedClient for L5."""
56
+ global _embed
57
+ if _embed is None:
58
+ _embed = EmbedClient.from_env(
59
+ prefix="L5_",
60
+ default_url="http://localhost:8041/v1/embeddings",
61
+ )
62
+ return _embed
63
+
53
64
 
54
65
  def _embed_post(texts):
55
- """POST to the configured embedding endpoint. Tries OpenAI-compat
56
- shape first; falls back to Pentatonic-AI lambda-gateway native shape
57
- on any failure. When the gateway adds an /v1/embeddings alias the
58
- primary path will succeed and the fallback never fires.
66
+ """Embed a batch of texts via the shared EmbedClient.
67
+
68
+ Provider profile (auth scheme + URL path + body/response shape) is
69
+ chosen by L5_EMBED_PROVIDER env var (openai | pentatonic-gateway |
70
+ cohere | custom). Auto-detects on 401 unless L5_EMBED_AUTODETECT=false.
59
71
 
60
72
  Returns: list[list[float]] (one embedding per input text).
61
73
  """
62
- payload = {"input": texts, "model": EMBED_MODEL_NAME}
63
- try:
64
- r = httpx.post(
65
- NV_EMBED_URL,
66
- headers={"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {},
67
- json=payload,
68
- timeout=120,
69
- )
70
- r.raise_for_status()
71
- return [d["embedding"] for d in r.json()["data"]]
72
- except Exception:
73
- pass
74
- fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
75
- r = httpx.post(
76
- fallback_url,
77
- headers={"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {},
78
- json=payload,
79
- timeout=120,
80
- )
81
- r.raise_for_status()
82
- return r.json()["embeddings"]
74
+ return _embed_client().embed_batch(texts)
83
75
 
84
76
  # Ollama fallback path. URL/model can be overridden so the L5 container can
85
77
  # reach an Ollama instance running on the docker host (host.docker.internal)
@@ -3,7 +3,9 @@ WORKDIR /app
3
3
  RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
4
4
  RUN pip install --no-cache-dir fastapi "uvicorn[standard]" httpx "pymilvus[milvus_lite]" "setuptools<70" pydantic spacy
5
5
  RUN python -m spacy download en_core_web_sm
6
- COPY l6-document-store.py /app/server.py
6
+ # Shared embed_provider module (build context is engine/services).
7
+ COPY _shared /app/_shared
8
+ COPY l6/l6-document-store.py /app/server.py
7
9
  RUN mkdir -p /data
8
10
  ENV L6_DATA_DIR=/data
9
11
  EXPOSE 8037
@@ -20,6 +20,7 @@ import logging
20
20
  import os
21
21
  import re
22
22
  import sqlite3
23
+ import sys
23
24
  import time
24
25
  from datetime import datetime, timezone
25
26
  from pathlib import Path
@@ -29,6 +30,10 @@ import httpx
29
30
  from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
30
31
  from pymilvus.milvus_client.index import IndexParams
31
32
 
33
+ # Shared embed client lives at engine/services/_shared/.
34
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
35
+ from _shared.embed_provider import EmbedClient # noqa: E402
36
+
32
37
  # ---------------------------------------------------------------------------
33
38
  # Config
34
39
  # ---------------------------------------------------------------------------
@@ -37,39 +42,29 @@ DATA_DIR = Path(os.environ.get("L6_DATA_DIR", str(Path.home() / "l6-document-sto
37
42
  MILVUS_DB = str(DATA_DIR / "documents.db")
38
43
  FTS_DB = str(DATA_DIR / "documents_fts.db")
39
44
  OLLAMA_URL = os.environ.get("L6_OLLAMA_URL", "http://localhost:11434")
40
- EMBED_MODEL = os.environ.get("L6_EMBED_MODEL", "nomic-embed-text")
41
- NV_EMBED_URL = os.environ.get("L6_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
42
45
  NV_EMBED_ENABLED = os.environ.get("L6_NV_EMBED_ENABLED", "true").lower() == "true"
43
46
  EMBED_DIM = int(os.environ.get("L6_EMBED_DIM", "4096"))
44
- # Optional Authorization: Bearer <key> for the embedding endpoint.
45
- EMBED_API_KEY = os.environ.get("L6_EMBED_API_KEY", "")
46
47
 
47
- def _embed_post(texts):
48
- """POST to embedding endpoint. Tries OpenAI-compat shape first;
49
- falls back to Pentatonic-AI lambda-gateway native shape on failure.
50
- See L4 / L5 for the same pattern."""
51
- import httpx as _httpx
52
- payload = {"input": texts, "model": EMBED_MODEL}
53
- try:
54
- r = _httpx.post(
55
- NV_EMBED_URL,
56
- headers={"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {},
57
- json=payload,
58
- timeout=120,
48
+ _embed: EmbedClient | None = None
49
+
50
+
51
+ def _embed_client() -> EmbedClient:
52
+ """Lazily build the shared EmbedClient for L6."""
53
+ global _embed
54
+ if _embed is None:
55
+ _embed = EmbedClient.from_env(
56
+ prefix="L6_",
57
+ default_url="http://localhost:8041/v1/embeddings",
58
+ default_model="nomic-embed-text",
59
59
  )
60
- r.raise_for_status()
61
- return [d["embedding"] for d in r.json()["data"]]
62
- except Exception:
63
- pass
64
- fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
65
- r = _httpx.post(
66
- fallback_url,
67
- headers={"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {},
68
- json=payload,
69
- timeout=120,
70
- )
71
- r.raise_for_status()
72
- return r.json()["embeddings"]
60
+ return _embed
61
+
62
+
63
+ def _embed_post(texts):
64
+ """Embed a batch of texts via the shared EmbedClient. Provider profile
65
+ chosen by L6_EMBED_PROVIDER env (openai | pentatonic-gateway | cohere
66
+ | custom). See engine/services/_shared/embed_provider.py for details."""
67
+ return _embed_client().embed_batch(texts)
73
68
 
74
69
  COLLECTION_NAME = "documents"
75
70
  RRF_K = 60
@@ -0,0 +1,354 @@
1
+ """Unit tests for engine/services/_shared/embed_provider.py.
2
+
3
+ Run with:
4
+ cd packages/memory-engine
5
+ python -m pytest tests/test_embed_provider.py -v
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import sys
11
+ from pathlib import Path
12
+
13
+ # Make the engine/services tree importable for tests without packaging it.
14
+ ROOT = Path(__file__).parent.parent / "engine" / "services"
15
+ sys.path.insert(0, str(ROOT))
16
+
17
+ import json # noqa: E402
18
+
19
+ import httpx # noqa: E402
20
+ import pytest # noqa: E402
21
+
22
+ from _shared.embed_provider import ( # noqa: E402
23
+ PROVIDERS,
24
+ EmbedAuthError,
25
+ EmbedClient,
26
+ EmbedHTTPError,
27
+ EmbedProvider,
28
+ resolve_provider,
29
+ )
30
+
31
+
32
+ # ----------------------------------------------------------------------
33
+ # Helpers — stub httpx so we can assert the request shape.
34
+ # ----------------------------------------------------------------------
35
+
36
+ class _FakeResponse:
37
+ def __init__(self, status_code: int, payload: dict | str = ""):
38
+ self.status_code = status_code
39
+ if isinstance(payload, dict):
40
+ self._json = payload
41
+ self.text = json.dumps(payload)
42
+ else:
43
+ self._json = None
44
+ self.text = payload
45
+
46
+ @property
47
+ def is_success(self) -> bool:
48
+ return 200 <= self.status_code < 300
49
+
50
+ def json(self) -> dict:
51
+ if self._json is None:
52
+ raise ValueError("not json")
53
+ return self._json
54
+
55
+
56
+ class _Recorder:
57
+ """Records every httpx.post call and returns canned responses keyed by URL."""
58
+
59
+ def __init__(self):
60
+ self.calls: list[dict] = []
61
+ self.responses: dict[str, _FakeResponse] = {}
62
+
63
+ def respond(self, url: str, response: _FakeResponse) -> None:
64
+ self.responses[url] = response
65
+
66
+ def __call__(self, url, *, json, headers, timeout):
67
+ self.calls.append({"url": url, "json": json, "headers": headers, "timeout": timeout})
68
+ if url in self.responses:
69
+ return self.responses[url]
70
+ # default: 401 to flush out unmatched URLs
71
+ return _FakeResponse(401, "no stub for this url")
72
+
73
+
74
+ @pytest.fixture
75
+ def recorder(monkeypatch):
76
+ rec = _Recorder()
77
+ monkeypatch.setattr(httpx, "post", rec)
78
+ return rec
79
+
80
+
81
+ # ----------------------------------------------------------------------
82
+ # Provider resolution
83
+ # ----------------------------------------------------------------------
84
+
85
+ def test_resolve_built_in_providers():
86
+ for name in ("openai", "pentatonic-gateway", "cohere"):
87
+ p = resolve_provider(name)
88
+ assert p.name == name
89
+
90
+
91
+ def test_resolve_unknown_provider_raises():
92
+ with pytest.raises(ValueError):
93
+ resolve_provider("not-a-provider")
94
+
95
+
96
+ def test_resolve_custom_provider_from_env(monkeypatch):
97
+ monkeypatch.setenv("L4_EMBED_AUTH_HEADER", "X-Custom-Auth")
98
+ monkeypatch.setenv("L4_EMBED_AUTH_FORMAT", "Token {key}")
99
+ monkeypatch.setenv("L4_EMBED_PATH_DEFAULT", "/embed")
100
+ monkeypatch.setenv("L4_EMBED_BODY_SHAPE", "cohere")
101
+ monkeypatch.setenv("L4_EMBED_RESPONSE_SHAPE", "cohere")
102
+ p = resolve_provider("custom", env_prefix="L4_")
103
+ assert p.auth_header == "X-Custom-Auth"
104
+ assert p.auth_format == "Token {key}"
105
+ assert p.path_default == "/embed"
106
+ # body shape produces Cohere-style "texts" field
107
+ body = p.body_builder(["hi"], "model-x")
108
+ assert body == {"texts": ["hi"], "model": "model-x", "input_type": "search_document"}
109
+
110
+
111
+ # ----------------------------------------------------------------------
112
+ # Request shape
113
+ # ----------------------------------------------------------------------
114
+
115
+ def test_openai_provider_request_shape(recorder):
116
+ recorder.respond(
117
+ "https://gw/v1/embeddings",
118
+ _FakeResponse(200, {"data": [{"embedding": [0.1, 0.2]}]}),
119
+ )
120
+ client = EmbedClient(
121
+ url="https://gw/v1/embeddings",
122
+ api_key="k",
123
+ model="m",
124
+ provider=PROVIDERS["openai"],
125
+ )
126
+ out = client.embed_batch(["hello"])
127
+ assert out == [[0.1, 0.2]]
128
+ call = recorder.calls[0]
129
+ assert call["url"] == "https://gw/v1/embeddings"
130
+ assert call["json"] == {"input": ["hello"], "model": "m"}
131
+ assert call["headers"] == {"Authorization": "Bearer k"}
132
+
133
+
134
+ def test_pentatonic_provider_request_shape(recorder):
135
+ recorder.respond(
136
+ "https://lambda-gateway.pentatonic.com/v1/embed",
137
+ _FakeResponse(200, {"data": [{"embedding": [1.0, 2.0]}]}),
138
+ )
139
+ client = EmbedClient(
140
+ url="https://lambda-gateway.pentatonic.com/v1/embed",
141
+ api_key="secret",
142
+ model="nv-embed-v2",
143
+ provider=PROVIDERS["pentatonic-gateway"],
144
+ )
145
+ out = client.embed_batch(["t1"])
146
+ assert out == [[1.0, 2.0]]
147
+ call = recorder.calls[0]
148
+ assert call["url"] == "https://lambda-gateway.pentatonic.com/v1/embed"
149
+ assert call["json"] == {"input": ["t1"], "model": "nv-embed-v2"}
150
+ assert call["headers"] == {"X-API-Key": "secret"}
151
+
152
+
153
+ def test_pentatonic_response_parser_handles_both_shapes(recorder):
154
+ """Pentatonic Gateway has historically returned both {"data":[...]} and
155
+ {"embeddings":[...]} on different endpoints. Parser accepts either."""
156
+ p = PROVIDERS["pentatonic-gateway"]
157
+ assert p.response_parser({"data": [{"embedding": [1.0]}]}) == [[1.0]]
158
+ assert p.response_parser({"embeddings": [[1.0]]}) == [[1.0]]
159
+
160
+
161
+ def test_cohere_provider_request_shape(recorder):
162
+ recorder.respond(
163
+ "https://api.cohere.ai/v1/embed",
164
+ _FakeResponse(200, {"embeddings": [[3.0, 4.0]]}),
165
+ )
166
+ client = EmbedClient(
167
+ url="https://api.cohere.ai/v1/embed",
168
+ api_key="cohere-key",
169
+ model="embed-english-v3.0",
170
+ provider=PROVIDERS["cohere"],
171
+ )
172
+ out = client.embed_batch(["hi"])
173
+ assert out == [[3.0, 4.0]]
174
+ call = recorder.calls[0]
175
+ assert call["json"] == {
176
+ "texts": ["hi"],
177
+ "model": "embed-english-v3.0",
178
+ "input_type": "search_document",
179
+ }
180
+ assert call["headers"] == {"Authorization": "Bearer cohere-key"}
181
+
182
+
183
+ # ----------------------------------------------------------------------
184
+ # Auto-detect
185
+ # ----------------------------------------------------------------------
186
+
187
+ def test_autodetect_on_401_falls_back_to_pentatonic(recorder):
188
+ """Operator configured openai but the URL+key actually belong to
189
+ Pentatonic Gateway. First call 401s, auto-detect probes pentatonic
190
+ and succeeds."""
191
+ recorder.respond(
192
+ "https://lambda-gateway.pentatonic.com/v1/embeddings",
193
+ _FakeResponse(401, '{"error":"Invalid or missing API key"}'),
194
+ )
195
+ recorder.respond(
196
+ "https://lambda-gateway.pentatonic.com/v1/embed",
197
+ _FakeResponse(200, {"data": [{"embedding": [9.0]}]}),
198
+ )
199
+ client = EmbedClient(
200
+ url="https://lambda-gateway.pentatonic.com/v1/embeddings",
201
+ api_key="k",
202
+ model="nv-embed-v2",
203
+ provider=PROVIDERS["openai"],
204
+ )
205
+ out = client.embed_batch(["x"])
206
+ assert out == [[9.0]]
207
+ assert client.active_provider == "pentatonic-gateway"
208
+ # First call uses configured (openai) shape, second uses pentatonic
209
+ assert recorder.calls[0]["headers"] == {"Authorization": "Bearer k"}
210
+ assert recorder.calls[1]["headers"] == {"X-API-Key": "k"}
211
+
212
+
213
+ def test_autodetect_caches_after_first_success(recorder):
214
+ """Once auto-detect picks a winner, subsequent calls go straight to it
215
+ without retrying the original 401."""
216
+ recorder.respond(
217
+ "https://gw/v1/embeddings",
218
+ _FakeResponse(401, "wrong scheme"),
219
+ )
220
+ recorder.respond(
221
+ "https://gw/v1/embed",
222
+ _FakeResponse(200, {"data": [{"embedding": [1.0]}]}),
223
+ )
224
+ client = EmbedClient(
225
+ url="https://gw/v1/embeddings",
226
+ api_key="k",
227
+ model="m",
228
+ provider=PROVIDERS["openai"],
229
+ )
230
+ client.embed_batch(["a"]) # triggers detect
231
+ n_after_first = len(recorder.calls)
232
+ client.embed_batch(["b"]) # should go straight to /v1/embed
233
+ assert len(recorder.calls) == n_after_first + 1
234
+ assert recorder.calls[-1]["url"] == "https://gw/v1/embed"
235
+
236
+
237
+ def test_autodetect_disabled_raises(recorder):
238
+ recorder.respond("https://gw/v1/embeddings", _FakeResponse(401, "no auth"))
239
+ client = EmbedClient(
240
+ url="https://gw/v1/embeddings",
241
+ api_key="k",
242
+ model="m",
243
+ provider=PROVIDERS["openai"],
244
+ autodetect=False,
245
+ )
246
+ with pytest.raises(EmbedAuthError):
247
+ client.embed_batch(["x"])
248
+ # Only one call: no probing happened.
249
+ assert len(recorder.calls) == 1
250
+
251
+
252
+ def test_autodetect_all_fail_raises(recorder):
253
+ """Every candidate also 401s — raise EmbedAuthError."""
254
+ recorder.respond("https://gw/v1/embeddings", _FakeResponse(401, "x"))
255
+ recorder.respond("https://gw/v1/embed", _FakeResponse(401, "x"))
256
+ client = EmbedClient(
257
+ url="https://gw/v1/embeddings",
258
+ api_key="k",
259
+ model="m",
260
+ provider=PROVIDERS["openai"],
261
+ )
262
+ with pytest.raises(EmbedAuthError):
263
+ client.embed_batch(["x"])
264
+
265
+
266
+ # ----------------------------------------------------------------------
267
+ # Error handling
268
+ # ----------------------------------------------------------------------
269
+
270
+ def test_non_401_http_error_does_not_trigger_autodetect(recorder):
271
+ recorder.respond(
272
+ "https://gw/v1/embeddings",
273
+ _FakeResponse(503, "upstream down"),
274
+ )
275
+ client = EmbedClient(
276
+ url="https://gw/v1/embeddings",
277
+ api_key="k",
278
+ model="m",
279
+ provider=PROVIDERS["openai"],
280
+ )
281
+ with pytest.raises(EmbedHTTPError) as exc:
282
+ client.embed_batch(["x"])
283
+ assert exc.value.status == 503
284
+ assert len(recorder.calls) == 1
285
+
286
+
287
+ def test_empty_input_returns_empty(recorder):
288
+ client = EmbedClient(
289
+ url="https://gw/v1/embeddings",
290
+ api_key="k",
291
+ model="m",
292
+ provider=PROVIDERS["openai"],
293
+ )
294
+ assert client.embed_batch([]) == []
295
+ assert recorder.calls == []
296
+
297
+
298
+ # ----------------------------------------------------------------------
299
+ # from_env construction
300
+ # ----------------------------------------------------------------------
301
+
302
+ def test_from_env_reads_layer_prefix(monkeypatch, recorder):
303
+ monkeypatch.setenv("L4_NV_EMBED_URL", "https://lambda-gateway.pentatonic.com/v1/embed")
304
+ monkeypatch.setenv("L4_EMBED_API_KEY", "real-key")
305
+ monkeypatch.setenv("L4_EMBED_MODEL", "nv-embed-v2")
306
+ monkeypatch.setenv("L4_EMBED_PROVIDER", "pentatonic-gateway")
307
+ recorder.respond(
308
+ "https://lambda-gateway.pentatonic.com/v1/embed",
309
+ _FakeResponse(200, {"data": [{"embedding": [42.0]}]}),
310
+ )
311
+ client = EmbedClient.from_env(prefix="L4_")
312
+ out = client.embed_batch(["t"])
313
+ assert out == [[42.0]]
314
+ assert client.active_provider == "pentatonic-gateway"
315
+ assert recorder.calls[0]["headers"] == {"X-API-Key": "real-key"}
316
+
317
+
318
+ def test_from_env_default_provider_is_openai(monkeypatch):
319
+ monkeypatch.setenv("L5_NV_EMBED_URL", "https://gw/v1/embeddings")
320
+ monkeypatch.setenv("L5_EMBED_API_KEY", "k")
321
+ client = EmbedClient.from_env(prefix="L5_")
322
+ assert client.active_provider == "openai"
323
+
324
+
325
+ def test_from_env_autodetect_opt_out(monkeypatch, recorder):
326
+ monkeypatch.setenv("L4_NV_EMBED_URL", "https://gw/v1/embeddings")
327
+ monkeypatch.setenv("L4_EMBED_API_KEY", "k")
328
+ monkeypatch.setenv("L4_EMBED_AUTODETECT", "false")
329
+ recorder.respond("https://gw/v1/embeddings", _FakeResponse(401, "x"))
330
+ client = EmbedClient.from_env(prefix="L4_")
331
+ with pytest.raises(EmbedAuthError):
332
+ client.embed_batch(["x"])
333
+ assert len(recorder.calls) == 1
334
+
335
+
336
+ # ----------------------------------------------------------------------
337
+ # URL handling
338
+ # ----------------------------------------------------------------------
339
+
340
+ def test_url_without_path_gets_provider_default(recorder):
341
+ """If operator provides only a base URL, the provider's path_default
342
+ is appended."""
343
+ recorder.respond(
344
+ "https://lambda-gateway.pentatonic.com/v1/embed",
345
+ _FakeResponse(200, {"data": [{"embedding": [0.0]}]}),
346
+ )
347
+ client = EmbedClient(
348
+ url="https://lambda-gateway.pentatonic.com",
349
+ api_key="k",
350
+ model="m",
351
+ provider=PROVIDERS["pentatonic-gateway"],
352
+ )
353
+ client.embed_batch(["x"])
354
+ assert recorder.calls[0]["url"] == "https://lambda-gateway.pentatonic.com/v1/embed"