@pentatonic-ai/ai-agent-sdk 0.7.12 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.7.12",
3
+ "version": "0.8.0",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -98,7 +98,7 @@ describe("openclaw-memory-plugin — hosted CHAT_TURN via afterTurn", () => {
98
98
  });
99
99
 
100
100
  const storeMemory = calls.filter(
101
- (c) => c.body?.variables?.moduleId === "deep-memory"
101
+ (c) => c.body?.variables?.moduleId === "pentatonic-memory"
102
102
  );
103
103
  const chatTurn = calls.filter(
104
104
  (c) => c.body?.variables?.moduleId === "conversation-analytics"
@@ -227,7 +227,7 @@ async function hostedStore(config, content, metadata = {}) {
227
227
  createModuleEvent(moduleId: $moduleId, input: $input) { success eventId }
228
228
  }`,
229
229
  variables: {
230
- moduleId: "deep-memory",
230
+ moduleId: "pentatonic-memory",
231
231
  input: {
232
232
  eventType: "STORE_MEMORY",
233
233
  data: {
@@ -2,7 +2,7 @@
2
2
  "id": "pentatonic-memory",
3
3
  "name": "Pentatonic Memory",
4
4
  "description": "Persistent, searchable memory with multi-signal retrieval and HyDE query expansion. Local (Docker + Ollama) or hosted (Pentatonic TES).",
5
- "version": "0.5.3",
5
+ "version": "0.8.4",
6
6
  "kind": "context-engine",
7
7
  "configSchema": {
8
8
  "type": "object",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/openclaw-memory-plugin",
3
- "version": "0.8.3",
3
+ "version": "0.8.4",
4
4
  "description": "Pentatonic Memory plugin for OpenClaw — persistent, searchable memory with multi-signal retrieval and HyDE query expansion",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -220,7 +220,7 @@ describe("hostedEmitChatTurn", () => {
220
220
  // =============================================================================
221
221
 
222
222
  describe("hostedStoreMemory", () => {
223
- it("emits STORE_MEMORY against deep-memory", async () => {
223
+ it("emits STORE_MEMORY against pentatonic-memory", async () => {
224
224
  stubFetch(() =>
225
225
  new Response(
226
226
  JSON.stringify({
@@ -236,7 +236,7 @@ describe("hostedStoreMemory", () => {
236
236
  { source: "my-app" }
237
237
  );
238
238
  expect(out.ok).toBe(true);
239
- expect(lastCall.body.variables.moduleId).toBe("deep-memory");
239
+ expect(lastCall.body.variables.moduleId).toBe("pentatonic-memory");
240
240
  expect(lastCall.body.variables.input.eventType).toBe("STORE_MEMORY");
241
241
  expect(lastCall.body.variables.input.data.attributes.content).toBe(
242
242
  "User owns a Subaru"
@@ -324,7 +324,7 @@ export async function hostedStoreMemory(
324
324
  body: JSON.stringify({
325
325
  query: CREATE_MODULE_EVENT_MUTATION,
326
326
  variables: {
327
- moduleId: "deep-memory",
327
+ moduleId: "pentatonic-memory",
328
328
  input: { eventType: "STORE_MEMORY", data },
329
329
  },
330
330
  }),
@@ -88,8 +88,8 @@ services:
88
88
  l4:
89
89
  <<: *engine-base
90
90
  build:
91
- context: ./engine/services/l4
92
- dockerfile: Dockerfile
91
+ context: ./engine/services
92
+ dockerfile: l4/Dockerfile
93
93
  container_name: pme-l4
94
94
  # Default 18042 to avoid port collisions on 8042.
95
95
  # Override via PME_L4_PORT for bench setups that intentionally replace it.
@@ -98,6 +98,8 @@ services:
98
98
  L4_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
99
99
  L4_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
100
100
  L4_EMBED_API_KEY: ${EMBED_API_KEY:-}
101
+ L4_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
102
+ L4_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
101
103
  L4_EMBED_DIM: ${EMBED_DIM:-4096}
102
104
  L4_DB_PATH: /data/vec.db
103
105
  extra_hosts:
@@ -116,8 +118,8 @@ services:
116
118
  l5:
117
119
  <<: *engine-base
118
120
  build:
119
- context: ./engine/services/l5
120
- dockerfile: Dockerfile
121
+ context: ./engine/services
122
+ dockerfile: l5/Dockerfile
121
123
  container_name: pme-l5
122
124
  # Default 18034 to avoid port collisions on 8034.
123
125
  # Override via PME_L5_PORT for bench setups that intentionally replace it.
@@ -126,6 +128,8 @@ services:
126
128
  L5_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
127
129
  L5_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
128
130
  L5_EMBED_API_KEY: ${EMBED_API_KEY:-}
131
+ L5_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
132
+ L5_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
129
133
  L5_EMBED_DIM: ${EMBED_DIM:-4096}
130
134
  L5_OLLAMA_DIM: ${OLLAMA_DIM:-768}
131
135
  L5_OLLAMA_EMBED_URL: ${L5_OLLAMA_EMBED_URL:-http://host.docker.internal:11434/api/embed}
@@ -143,8 +147,8 @@ services:
143
147
  l6:
144
148
  <<: *engine-base
145
149
  build:
146
- context: ./engine/services/l6
147
- dockerfile: Dockerfile
150
+ context: ./engine/services
151
+ dockerfile: l6/Dockerfile
148
152
  container_name: pme-l6
149
153
  # Default 18037 to avoid colliding with Spark Core L6 doc-store on 8037.
150
154
  # Override via PME_L6_PORT for bench setups that intentionally replace it.
@@ -153,6 +157,8 @@ services:
153
157
  L6_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
154
158
  L6_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
155
159
  L6_EMBED_API_KEY: ${EMBED_API_KEY:-}
160
+ L6_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
161
+ L6_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
156
162
  L6_EMBED_DIM: ${EMBED_DIM:-4096}
157
163
  L6_DATA_DIR: /data
158
164
  extra_hosts:
@@ -166,12 +172,16 @@ services:
166
172
  l2:
167
173
  <<: *engine-base
168
174
  build:
169
- context: ./engine/services/l2
170
- dockerfile: Dockerfile
175
+ context: ./engine/services
176
+ dockerfile: l2/Dockerfile
171
177
  container_name: pme-l2
172
178
  ports: ["127.0.0.1:${PME_L2_PORT:-8131}:8031"]
173
179
  environment:
174
180
  PME_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
181
+ PME_EMBED_API_KEY: ${EMBED_API_KEY:-}
182
+ PME_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
183
+ PME_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
184
+ PME_NV_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
175
185
  PME_NEO4J_URI: bolt://l3:7687
176
186
  PME_NEO4J_PASSWORD: ${NEO4J_PASSWORD:-local-dev-pw}
177
187
  NEO4J_PASSWORD: ${NEO4J_PASSWORD:-local-dev-pw}
@@ -0,0 +1 @@
1
+ """Shared utilities used across the memory-engine layer services."""
@@ -0,0 +1,431 @@
1
+ """Embedding provider abstraction for memory-engine layer services.
2
+
3
+ Different embedding gateways disagree on three things:
4
+ 1. Auth scheme (Authorization: Bearer ... vs X-API-Key: ...)
5
+ 2. URL path (/v1/embeddings vs /v1/embed vs vendor-specific)
6
+ 3. Request shape (OpenAI {"input": [...]} vs Cohere {"texts": [...], "input_type": ...})
7
+ 4. Response shape ({"data": [{"embedding": ...}]} vs {"embeddings": [...]})
8
+
9
+ `EmbedProvider` captures all four for one gateway. Built-in profiles cover
10
+ OpenAI-compatible, Pentatonic AI Gateway, and Cohere. A "custom" profile lets
11
+ you override auth + path via env without code changes.
12
+
13
+ Auto-detect: when the configured provider returns 401, the client probes the
14
+ other built-in profiles once and caches the winner for the rest of the
15
+ process. Disabled with `EMBED_AUTODETECT=false` per layer.
16
+
17
+ Usage from a layer service:
18
+
19
+ from _shared.embed_provider import EmbedClient
20
+
21
+ embed = EmbedClient.from_env(prefix="L4_")
22
+ vectors = embed.embed_batch(["hello", "world"])
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import logging
28
+ import os
29
+ from dataclasses import dataclass
30
+ from typing import Any, Callable
31
+ from urllib.parse import urlparse, urlunparse
32
+
33
+ import httpx
34
+
35
+ log = logging.getLogger("embed_provider")
36
+
37
+
38
+ # ----------------------------------------------------------------------
39
+ # Body builders + response parsers
40
+ # ----------------------------------------------------------------------
41
+
42
+ def _openai_body(texts: list[str], model: str) -> dict[str, Any]:
43
+ return {"input": texts, "model": model}
44
+
45
+
46
+ def _openai_response(payload: dict[str, Any]) -> list[list[float]]:
47
+ return [d["embedding"] for d in payload["data"]]
48
+
49
+
50
+ def _pentatonic_response(payload: dict[str, Any]) -> list[list[float]]:
51
+ """Pentatonic AI Gateway returns {"embeddings": [...]} on /v1/embed
52
+ and OpenAI-shaped {"data": [{"embedding": ...}]} on /v1/embeddings.
53
+ Accept either."""
54
+ if "data" in payload:
55
+ return [d["embedding"] for d in payload["data"]]
56
+ return payload["embeddings"]
57
+
58
+
59
+ def _cohere_body(texts: list[str], model: str) -> dict[str, Any]:
60
+ return {"texts": texts, "model": model, "input_type": "search_document"}
61
+
62
+
63
+ def _cohere_response(payload: dict[str, Any]) -> list[list[float]]:
64
+ return payload["embeddings"]
65
+
66
+
67
+ _BODY_BUILDERS: dict[str, Callable[[list[str], str], dict[str, Any]]] = {
68
+ "openai": _openai_body,
69
+ "cohere": _cohere_body,
70
+ }
71
+
72
+ _RESPONSE_PARSERS: dict[str, Callable[[dict[str, Any]], list[list[float]]]] = {
73
+ "openai": _openai_response,
74
+ "pentatonic": _pentatonic_response,
75
+ "cohere": _cohere_response,
76
+ }
77
+
78
+
79
+ # ----------------------------------------------------------------------
80
+ # Provider profiles
81
+ # ----------------------------------------------------------------------
82
+
83
+ @dataclass(frozen=True)
84
+ class EmbedProvider:
85
+ name: str
86
+ auth_header: str
87
+ auth_format: str # f-string-ish; "{key}" placeholder substituted at request time
88
+ path_default: str
89
+ body_builder: Callable[[list[str], str], dict[str, Any]]
90
+ response_parser: Callable[[dict[str, Any]], list[list[float]]]
91
+
92
+
93
+ PROVIDERS: dict[str, EmbedProvider] = {
94
+ "openai": EmbedProvider(
95
+ name="openai",
96
+ auth_header="Authorization",
97
+ auth_format="Bearer {key}",
98
+ path_default="/v1/embeddings",
99
+ body_builder=_openai_body,
100
+ response_parser=_openai_response,
101
+ ),
102
+ "pentatonic-gateway": EmbedProvider(
103
+ name="pentatonic-gateway",
104
+ auth_header="X-API-Key",
105
+ auth_format="{key}",
106
+ path_default="/v1/embed",
107
+ body_builder=_openai_body,
108
+ response_parser=_pentatonic_response,
109
+ ),
110
+ "cohere": EmbedProvider(
111
+ name="cohere",
112
+ auth_header="Authorization",
113
+ auth_format="Bearer {key}",
114
+ path_default="/v1/embed",
115
+ body_builder=_cohere_body,
116
+ response_parser=_cohere_response,
117
+ ),
118
+ }
119
+
120
+
121
+ def _build_custom_provider(env_prefix: str) -> EmbedProvider:
122
+ """Build a custom provider from env vars.
123
+
124
+ Env vars (env_prefix is e.g. 'L4_'):
125
+ {prefix}EMBED_AUTH_HEADER default Authorization
126
+ {prefix}EMBED_AUTH_FORMAT default Bearer {key}
127
+ {prefix}EMBED_PATH_DEFAULT default /v1/embeddings
128
+ {prefix}EMBED_BODY_SHAPE "openai" | "cohere" default openai
129
+ {prefix}EMBED_RESPONSE_SHAPE "openai" | "pentatonic" | "cohere" default openai
130
+ """
131
+ body_shape = os.environ.get(f"{env_prefix}EMBED_BODY_SHAPE", "openai")
132
+ response_shape = os.environ.get(f"{env_prefix}EMBED_RESPONSE_SHAPE", "openai")
133
+ return EmbedProvider(
134
+ name="custom",
135
+ auth_header=os.environ.get(f"{env_prefix}EMBED_AUTH_HEADER", "Authorization"),
136
+ auth_format=os.environ.get(f"{env_prefix}EMBED_AUTH_FORMAT", "Bearer {key}"),
137
+ path_default=os.environ.get(f"{env_prefix}EMBED_PATH_DEFAULT", "/v1/embeddings"),
138
+ body_builder=_BODY_BUILDERS.get(body_shape, _openai_body),
139
+ response_parser=_RESPONSE_PARSERS.get(response_shape, _openai_response),
140
+ )
141
+
142
+
143
+ def resolve_provider(name: str, env_prefix: str = "") -> EmbedProvider:
144
+ """Look up a built-in provider, or build a custom one from env."""
145
+ if name in PROVIDERS:
146
+ return PROVIDERS[name]
147
+ if name == "custom":
148
+ return _build_custom_provider(env_prefix)
149
+ raise ValueError(
150
+ f"Unknown EMBED_PROVIDER {name!r}. "
151
+ f"Built-ins: {sorted(PROVIDERS)} or 'custom'."
152
+ )
153
+
154
+
155
+ # ----------------------------------------------------------------------
156
+ # URL helpers
157
+ # ----------------------------------------------------------------------
158
+
159
+ def _swap_path(url: str, new_path: str) -> str:
160
+ """Replace the path component of `url` with `new_path`. Empty path on the
161
+ input URL is treated as 'no path provided' and gets `new_path` appended."""
162
+ p = urlparse(url)
163
+ return urlunparse((p.scheme, p.netloc, new_path, "", p.query, p.fragment))
164
+
165
+
166
+ def _resolved_url(url: str, provider: EmbedProvider) -> str:
167
+ """If `url` has no path, append the provider's default path; otherwise
168
+ leave as-is (operator chose a specific path)."""
169
+ p = urlparse(url)
170
+ if not p.path or p.path == "/":
171
+ return _swap_path(url, provider.path_default)
172
+ return url
173
+
174
+
175
+ # ----------------------------------------------------------------------
176
+ # Client
177
+ # ----------------------------------------------------------------------
178
+
179
+ class EmbedAuthError(RuntimeError):
180
+ """Raised when every candidate provider returns 401."""
181
+
182
+
183
+ class EmbedHTTPError(RuntimeError):
184
+ """Raised on non-401 HTTP errors from the embedding endpoint."""
185
+
186
+ def __init__(self, status: int, body: str):
187
+ super().__init__(f"HTTP {status}: {body[:200]}")
188
+ self.status = status
189
+ self.body = body
190
+
191
+
192
+ class EmbedClient:
193
+ """Sync + async embedding client with provider auto-detection.
194
+
195
+ Construct via `EmbedClient.from_env(prefix="L4_")` so each layer service
196
+ reads its own env-var namespace; or call the constructor directly for
197
+ tests.
198
+
199
+ The client is stateful: after a successful auto-detect, the winning
200
+ provider is cached for the rest of the process lifetime. Set
201
+ `autodetect=False` (or env `{prefix}EMBED_AUTODETECT=false`) to disable.
202
+ """
203
+
204
+ def __init__(
205
+ self,
206
+ *,
207
+ url: str,
208
+ api_key: str,
209
+ model: str,
210
+ provider: EmbedProvider,
211
+ autodetect: bool = True,
212
+ timeout: float = 120.0,
213
+ env_prefix: str = "",
214
+ ) -> None:
215
+ self._configured_provider = provider
216
+ self._provider = provider
217
+ self._configured_url = url
218
+ self._url = _resolved_url(url, provider)
219
+ self._api_key = api_key
220
+ self._model = model
221
+ self._timeout = timeout
222
+ self._autodetect = autodetect
223
+ self._env_prefix = env_prefix
224
+ self._detected = False
225
+
226
+ # ------------------------------------------------------------------
227
+ # Construction
228
+ # ------------------------------------------------------------------
229
+
230
+ @classmethod
231
+ def from_env(
232
+ cls,
233
+ prefix: str,
234
+ *,
235
+ url_var: str | None = None,
236
+ key_var: str | None = None,
237
+ model_var: str | None = None,
238
+ default_url: str = "",
239
+ default_model: str = "nv-embed-v2",
240
+ ) -> "EmbedClient":
241
+ """Build an EmbedClient from layer-prefixed env vars.
242
+
243
+ Layer services use `EmbedClient.from_env(prefix="L4_")` and the
244
+ client reads:
245
+ {prefix}NV_EMBED_URL (override with `url_var=...`)
246
+ {prefix}EMBED_API_KEY
247
+ {prefix}EMBED_MODEL
248
+ {prefix}EMBED_PROVIDER default 'openai'
249
+ {prefix}EMBED_AUTODETECT default 'true'
250
+ {prefix}EMBED_TIMEOUT default '120'
251
+ """
252
+ url_var = url_var or f"{prefix}NV_EMBED_URL"
253
+ key_var = key_var or f"{prefix}EMBED_API_KEY"
254
+ model_var = model_var or f"{prefix}EMBED_MODEL"
255
+
256
+ url = os.environ.get(url_var, default_url)
257
+ api_key = os.environ.get(key_var, "")
258
+ model = os.environ.get(model_var, default_model)
259
+ provider_name = os.environ.get(f"{prefix}EMBED_PROVIDER", "openai")
260
+ autodetect = os.environ.get(f"{prefix}EMBED_AUTODETECT", "true").lower() == "true"
261
+ timeout = float(os.environ.get(f"{prefix}EMBED_TIMEOUT", "120"))
262
+
263
+ provider = resolve_provider(provider_name, env_prefix=prefix)
264
+ return cls(
265
+ url=url,
266
+ api_key=api_key,
267
+ model=model,
268
+ provider=provider,
269
+ autodetect=autodetect,
270
+ timeout=timeout,
271
+ env_prefix=prefix,
272
+ )
273
+
274
+ # ------------------------------------------------------------------
275
+ # Internals
276
+ # ------------------------------------------------------------------
277
+
278
+ def _headers(self, provider: EmbedProvider) -> dict[str, str]:
279
+ if not self._api_key:
280
+ return {}
281
+ return {provider.auth_header: provider.auth_format.format(key=self._api_key)}
282
+
283
+ def _candidate_url(self, provider: EmbedProvider) -> str:
284
+ """URL to try for this provider. The configured provider keeps the
285
+ operator's chosen URL; auto-detect candidates swap in their own
286
+ path_default since the operator's path was wrong for them."""
287
+ if provider.name == self._configured_provider.name:
288
+ return self._url
289
+ return _swap_path(self._configured_url, provider.path_default)
290
+
291
+ def _switch_to(self, provider: EmbedProvider) -> None:
292
+ """Make `provider` the active provider for future requests."""
293
+ log.warning(
294
+ "embed_provider auto-detect switched: configured=%s -> active=%s. "
295
+ "Set %sEMBED_PROVIDER=%s to silence this.",
296
+ self._configured_provider.name,
297
+ provider.name,
298
+ self._env_prefix,
299
+ provider.name,
300
+ )
301
+ self._provider = provider
302
+ self._url = self._candidate_url(provider)
303
+ self._detected = True
304
+
305
+ # ------------------------------------------------------------------
306
+ # Sync API
307
+ # ------------------------------------------------------------------
308
+
309
+ def embed_batch(self, texts: list[str]) -> list[list[float]]:
310
+ """Embed a list of texts. Empty list returns empty list."""
311
+ if not texts:
312
+ return []
313
+ return self._post_with_autodetect(texts, async_mode=False)
314
+
315
+ def embed_one(self, text: str) -> list[float]:
316
+ return self.embed_batch([text])[0]
317
+
318
+ # ------------------------------------------------------------------
319
+ # Async API
320
+ # ------------------------------------------------------------------
321
+
322
+ async def embed_batch_async(self, texts: list[str]) -> list[list[float]]:
323
+ if not texts:
324
+ return []
325
+ return await self._post_with_autodetect_async(texts)
326
+
327
+ async def embed_one_async(self, text: str) -> list[float]:
328
+ out = await self.embed_batch_async([text])
329
+ return out[0]
330
+
331
+ # ------------------------------------------------------------------
332
+ # Request paths
333
+ # ------------------------------------------------------------------
334
+
335
+ def _post_with_autodetect(self, texts: list[str], *, async_mode: bool) -> list[list[float]]:
336
+ del async_mode # kept for symmetry; sync path is its own method
337
+ body = self._provider.body_builder(texts, self._model)
338
+ headers = self._headers(self._provider)
339
+ try:
340
+ r = httpx.post(self._url, json=body, headers=headers, timeout=self._timeout)
341
+ except httpx.HTTPError as exc:
342
+ raise EmbedHTTPError(0, str(exc)) from exc
343
+
344
+ if r.status_code == 401 and self._autodetect and not self._detected:
345
+ return self._autodetect_and_retry(texts, last_body=r.text)
346
+
347
+ if r.status_code == 401:
348
+ raise EmbedAuthError(r.text)
349
+ if not r.is_success:
350
+ raise EmbedHTTPError(r.status_code, r.text)
351
+ return self._provider.response_parser(r.json())
352
+
353
+ async def _post_with_autodetect_async(self, texts: list[str]) -> list[list[float]]:
354
+ body = self._provider.body_builder(texts, self._model)
355
+ headers = self._headers(self._provider)
356
+ try:
357
+ async with httpx.AsyncClient(timeout=self._timeout) as client:
358
+ r = await client.post(self._url, json=body, headers=headers)
359
+ except httpx.HTTPError as exc:
360
+ raise EmbedHTTPError(0, str(exc)) from exc
361
+
362
+ if r.status_code == 401 and self._autodetect and not self._detected:
363
+ return await self._autodetect_and_retry_async(texts, last_body=r.text)
364
+
365
+ if r.status_code == 401:
366
+ raise EmbedAuthError(r.text)
367
+ if not r.is_success:
368
+ raise EmbedHTTPError(r.status_code, r.text)
369
+ return self._provider.response_parser(r.json())
370
+
371
+ # ------------------------------------------------------------------
372
+ # Auto-detect
373
+ # ------------------------------------------------------------------
374
+
375
+ def _candidates(self) -> list[EmbedProvider]:
376
+ return [p for p in PROVIDERS.values() if p.name != self._provider.name]
377
+
378
+ def _autodetect_and_retry(self, texts: list[str], *, last_body: str) -> list[list[float]]:
379
+ for candidate in self._candidates():
380
+ url = self._candidate_url(candidate)
381
+ body = candidate.body_builder(texts, self._model)
382
+ headers = (
383
+ {candidate.auth_header: candidate.auth_format.format(key=self._api_key)}
384
+ if self._api_key
385
+ else {}
386
+ )
387
+ try:
388
+ r = httpx.post(url, json=body, headers=headers, timeout=self._timeout)
389
+ except httpx.HTTPError:
390
+ continue
391
+ if r.is_success:
392
+ self._switch_to(candidate)
393
+ return candidate.response_parser(r.json())
394
+ raise EmbedAuthError(
395
+ f"all providers returned auth or transport errors. "
396
+ f"last 401 body: {last_body[:200]}"
397
+ )
398
+
399
+ async def _autodetect_and_retry_async(self, texts: list[str], *, last_body: str) -> list[list[float]]:
400
+ for candidate in self._candidates():
401
+ url = self._candidate_url(candidate)
402
+ body = candidate.body_builder(texts, self._model)
403
+ headers = (
404
+ {candidate.auth_header: candidate.auth_format.format(key=self._api_key)}
405
+ if self._api_key
406
+ else {}
407
+ )
408
+ try:
409
+ async with httpx.AsyncClient(timeout=self._timeout) as client:
410
+ r = await client.post(url, json=body, headers=headers)
411
+ except httpx.HTTPError:
412
+ continue
413
+ if r.is_success:
414
+ self._switch_to(candidate)
415
+ return candidate.response_parser(r.json())
416
+ raise EmbedAuthError(
417
+ f"all providers returned auth or transport errors. "
418
+ f"last 401 body: {last_body[:200]}"
419
+ )
420
+
421
+ # ------------------------------------------------------------------
422
+ # Introspection (handy for /health and tests)
423
+ # ------------------------------------------------------------------
424
+
425
+ @property
426
+ def active_provider(self) -> str:
427
+ return self._provider.name
428
+
429
+ @property
430
+ def active_url(self) -> str:
431
+ return self._url
@@ -15,8 +15,10 @@ RUN pip install --no-cache-dir \
15
15
  "sentence-transformers" \
16
16
  "torch" --extra-index-url https://download.pytorch.org/whl/cpu
17
17
 
18
- COPY l2-hybridrag-proxy.py /app/server.py
19
- COPY init_databases.py /app/init_databases.py
18
+ # Shared embed_provider module (build context is engine/services).
19
+ COPY _shared /app/_shared
20
+ COPY l2/l2-hybridrag-proxy.py /app/server.py
21
+ COPY l2/init_databases.py /app/init_databases.py
20
22
 
21
23
  # Pre-create empty L0 BM25 SQLite + empty workspace
22
24
  RUN mkdir -p /data/workspace /data/.pentatonic/memory /data/.cache && \