@pentatonic-ai/ai-agent-sdk 0.7.13 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. package/package.json +1 -1
  2. package/packages/memory/openclaw-plugin/index.js +7 -0
  3. package/packages/memory/openclaw-plugin/openclaw.plugin.json +9 -1
  4. package/packages/memory/openclaw-plugin/package.json +1 -1
  5. package/packages/memory/src/__tests__/engine.test.js +142 -0
  6. package/packages/memory/src/engine.js +65 -0
  7. package/packages/memory-engine/compat/server.py +90 -5
  8. package/packages/memory-engine/docker-compose.yml +18 -8
  9. package/packages/memory-engine/engine/services/_shared/__init__.py +1 -0
  10. package/packages/memory-engine/engine/services/_shared/embed_provider.py +431 -0
  11. package/packages/memory-engine/engine/services/l2/Dockerfile +4 -2
  12. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +640 -81
  13. package/packages/memory-engine/engine/services/l4/Dockerfile +5 -1
  14. package/packages/memory-engine/engine/services/l4/server.py +19 -57
  15. package/packages/memory-engine/engine/services/l5/Dockerfile +3 -1
  16. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +24 -32
  17. package/packages/memory-engine/engine/services/l6/Dockerfile +3 -1
  18. package/packages/memory-engine/engine/services/l6/l6-document-store.py +24 -29
  19. package/packages/memory-engine/scripts/wipe-legacy-l3-entities.py +128 -0
  20. package/packages/memory-engine/tests/e2e_arena.sh +28 -4
  21. package/packages/memory-engine/tests/test_aggregate.py +333 -0
  22. package/packages/memory-engine/tests/test_arena_safety.py +232 -0
  23. package/packages/memory-engine/tests/test_channel_stat_reader.py +437 -0
  24. package/packages/memory-engine/tests/test_channel_stat_rollups.py +308 -0
  25. package/packages/memory-engine/tests/test_embed_provider.py +354 -0
  26. package/packages/memory-engine/tests/test_l3_arena_isolation.py +412 -0
@@ -0,0 +1,431 @@
1
+ """Embedding provider abstraction for memory-engine layer services.
2
+
3
+ Different embedding gateways disagree on three things:
4
+ 1. Auth scheme (Authorization: Bearer ... vs X-API-Key: ...)
5
+ 2. URL path (/v1/embeddings vs /v1/embed vs vendor-specific)
6
+ 3. Request shape (OpenAI {"input": [...]} vs Cohere {"texts": [...], "input_type": ...})
7
+ 4. Response shape ({"data": [{"embedding": ...}]} vs {"embeddings": [...]})
8
+
9
+ `EmbedProvider` captures all four for one gateway. Built-in profiles cover
10
+ OpenAI-compatible, Pentatonic AI Gateway, and Cohere. A "custom" profile lets
11
+ you override auth + path via env without code changes.
12
+
13
+ Auto-detect: when the configured provider returns 401, the client probes the
14
+ other built-in profiles once and caches the winner for the rest of the
15
+ process. Disabled with `EMBED_AUTODETECT=false` per layer.
16
+
17
+ Usage from a layer service:
18
+
19
+ from _shared.embed_provider import EmbedClient
20
+
21
+ embed = EmbedClient.from_env(prefix="L4_")
22
+ vectors = embed.embed_batch(["hello", "world"])
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import logging
28
+ import os
29
+ from dataclasses import dataclass
30
+ from typing import Any, Callable
31
+ from urllib.parse import urlparse, urlunparse
32
+
33
+ import httpx
34
+
35
+ log = logging.getLogger("embed_provider")
36
+
37
+
38
+ # ----------------------------------------------------------------------
39
+ # Body builders + response parsers
40
+ # ----------------------------------------------------------------------
41
+
42
+ def _openai_body(texts: list[str], model: str) -> dict[str, Any]:
43
+ return {"input": texts, "model": model}
44
+
45
+
46
+ def _openai_response(payload: dict[str, Any]) -> list[list[float]]:
47
+ return [d["embedding"] for d in payload["data"]]
48
+
49
+
50
+ def _pentatonic_response(payload: dict[str, Any]) -> list[list[float]]:
51
+ """Pentatonic AI Gateway returns {"embeddings": [...]} on /v1/embed
52
+ and OpenAI-shaped {"data": [{"embedding": ...}]} on /v1/embeddings.
53
+ Accept either."""
54
+ if "data" in payload:
55
+ return [d["embedding"] for d in payload["data"]]
56
+ return payload["embeddings"]
57
+
58
+
59
+ def _cohere_body(texts: list[str], model: str) -> dict[str, Any]:
60
+ return {"texts": texts, "model": model, "input_type": "search_document"}
61
+
62
+
63
+ def _cohere_response(payload: dict[str, Any]) -> list[list[float]]:
64
+ return payload["embeddings"]
65
+
66
+
67
+ _BODY_BUILDERS: dict[str, Callable[[list[str], str], dict[str, Any]]] = {
68
+ "openai": _openai_body,
69
+ "cohere": _cohere_body,
70
+ }
71
+
72
+ _RESPONSE_PARSERS: dict[str, Callable[[dict[str, Any]], list[list[float]]]] = {
73
+ "openai": _openai_response,
74
+ "pentatonic": _pentatonic_response,
75
+ "cohere": _cohere_response,
76
+ }
77
+
78
+
79
+ # ----------------------------------------------------------------------
80
+ # Provider profiles
81
+ # ----------------------------------------------------------------------
82
+
83
+ @dataclass(frozen=True)
84
+ class EmbedProvider:
85
+ name: str
86
+ auth_header: str
87
+ auth_format: str # f-string-ish; "{key}" placeholder substituted at request time
88
+ path_default: str
89
+ body_builder: Callable[[list[str], str], dict[str, Any]]
90
+ response_parser: Callable[[dict[str, Any]], list[list[float]]]
91
+
92
+
93
+ PROVIDERS: dict[str, EmbedProvider] = {
94
+ "openai": EmbedProvider(
95
+ name="openai",
96
+ auth_header="Authorization",
97
+ auth_format="Bearer {key}",
98
+ path_default="/v1/embeddings",
99
+ body_builder=_openai_body,
100
+ response_parser=_openai_response,
101
+ ),
102
+ "pentatonic-gateway": EmbedProvider(
103
+ name="pentatonic-gateway",
104
+ auth_header="X-API-Key",
105
+ auth_format="{key}",
106
+ path_default="/v1/embed",
107
+ body_builder=_openai_body,
108
+ response_parser=_pentatonic_response,
109
+ ),
110
+ "cohere": EmbedProvider(
111
+ name="cohere",
112
+ auth_header="Authorization",
113
+ auth_format="Bearer {key}",
114
+ path_default="/v1/embed",
115
+ body_builder=_cohere_body,
116
+ response_parser=_cohere_response,
117
+ ),
118
+ }
119
+
120
+
121
+ def _build_custom_provider(env_prefix: str) -> EmbedProvider:
122
+ """Build a custom provider from env vars.
123
+
124
+ Env vars (env_prefix is e.g. 'L4_'):
125
+ {prefix}EMBED_AUTH_HEADER default Authorization
126
+ {prefix}EMBED_AUTH_FORMAT default Bearer {key}
127
+ {prefix}EMBED_PATH_DEFAULT default /v1/embeddings
128
+ {prefix}EMBED_BODY_SHAPE "openai" | "cohere" default openai
129
+ {prefix}EMBED_RESPONSE_SHAPE "openai" | "pentatonic" | "cohere" default openai
130
+ """
131
+ body_shape = os.environ.get(f"{env_prefix}EMBED_BODY_SHAPE", "openai")
132
+ response_shape = os.environ.get(f"{env_prefix}EMBED_RESPONSE_SHAPE", "openai")
133
+ return EmbedProvider(
134
+ name="custom",
135
+ auth_header=os.environ.get(f"{env_prefix}EMBED_AUTH_HEADER", "Authorization"),
136
+ auth_format=os.environ.get(f"{env_prefix}EMBED_AUTH_FORMAT", "Bearer {key}"),
137
+ path_default=os.environ.get(f"{env_prefix}EMBED_PATH_DEFAULT", "/v1/embeddings"),
138
+ body_builder=_BODY_BUILDERS.get(body_shape, _openai_body),
139
+ response_parser=_RESPONSE_PARSERS.get(response_shape, _openai_response),
140
+ )
141
+
142
+
143
+ def resolve_provider(name: str, env_prefix: str = "") -> EmbedProvider:
144
+ """Look up a built-in provider, or build a custom one from env."""
145
+ if name in PROVIDERS:
146
+ return PROVIDERS[name]
147
+ if name == "custom":
148
+ return _build_custom_provider(env_prefix)
149
+ raise ValueError(
150
+ f"Unknown EMBED_PROVIDER {name!r}. "
151
+ f"Built-ins: {sorted(PROVIDERS)} or 'custom'."
152
+ )
153
+
154
+
155
+ # ----------------------------------------------------------------------
156
+ # URL helpers
157
+ # ----------------------------------------------------------------------
158
+
159
+ def _swap_path(url: str, new_path: str) -> str:
160
+ """Replace the path component of `url` with `new_path`. Empty path on the
161
+ input URL is treated as 'no path provided' and gets `new_path` appended."""
162
+ p = urlparse(url)
163
+ return urlunparse((p.scheme, p.netloc, new_path, "", p.query, p.fragment))
164
+
165
+
166
+ def _resolved_url(url: str, provider: EmbedProvider) -> str:
167
+ """If `url` has no path, append the provider's default path; otherwise
168
+ leave as-is (operator chose a specific path)."""
169
+ p = urlparse(url)
170
+ if not p.path or p.path == "/":
171
+ return _swap_path(url, provider.path_default)
172
+ return url
173
+
174
+
175
+ # ----------------------------------------------------------------------
176
+ # Client
177
+ # ----------------------------------------------------------------------
178
+
179
+ class EmbedAuthError(RuntimeError):
180
+ """Raised when every candidate provider returns 401."""
181
+
182
+
183
+ class EmbedHTTPError(RuntimeError):
184
+ """Raised on non-401 HTTP errors from the embedding endpoint."""
185
+
186
+ def __init__(self, status: int, body: str):
187
+ super().__init__(f"HTTP {status}: {body[:200]}")
188
+ self.status = status
189
+ self.body = body
190
+
191
+
192
+ class EmbedClient:
193
+ """Sync + async embedding client with provider auto-detection.
194
+
195
+ Construct via `EmbedClient.from_env(prefix="L4_")` so each layer service
196
+ reads its own env-var namespace; or call the constructor directly for
197
+ tests.
198
+
199
+ The client is stateful: after a successful auto-detect, the winning
200
+ provider is cached for the rest of the process lifetime. Set
201
+ `autodetect=False` (or env `{prefix}EMBED_AUTODETECT=false`) to disable.
202
+ """
203
+
204
+ def __init__(
205
+ self,
206
+ *,
207
+ url: str,
208
+ api_key: str,
209
+ model: str,
210
+ provider: EmbedProvider,
211
+ autodetect: bool = True,
212
+ timeout: float = 120.0,
213
+ env_prefix: str = "",
214
+ ) -> None:
215
+ self._configured_provider = provider
216
+ self._provider = provider
217
+ self._configured_url = url
218
+ self._url = _resolved_url(url, provider)
219
+ self._api_key = api_key
220
+ self._model = model
221
+ self._timeout = timeout
222
+ self._autodetect = autodetect
223
+ self._env_prefix = env_prefix
224
+ self._detected = False
225
+
226
+ # ------------------------------------------------------------------
227
+ # Construction
228
+ # ------------------------------------------------------------------
229
+
230
+ @classmethod
231
+ def from_env(
232
+ cls,
233
+ prefix: str,
234
+ *,
235
+ url_var: str | None = None,
236
+ key_var: str | None = None,
237
+ model_var: str | None = None,
238
+ default_url: str = "",
239
+ default_model: str = "nv-embed-v2",
240
+ ) -> "EmbedClient":
241
+ """Build an EmbedClient from layer-prefixed env vars.
242
+
243
+ Layer services use `EmbedClient.from_env(prefix="L4_")` and the
244
+ client reads:
245
+ {prefix}NV_EMBED_URL (override with `url_var=...`)
246
+ {prefix}EMBED_API_KEY
247
+ {prefix}EMBED_MODEL
248
+ {prefix}EMBED_PROVIDER default 'openai'
249
+ {prefix}EMBED_AUTODETECT default 'true'
250
+ {prefix}EMBED_TIMEOUT default '120'
251
+ """
252
+ url_var = url_var or f"{prefix}NV_EMBED_URL"
253
+ key_var = key_var or f"{prefix}EMBED_API_KEY"
254
+ model_var = model_var or f"{prefix}EMBED_MODEL"
255
+
256
+ url = os.environ.get(url_var, default_url)
257
+ api_key = os.environ.get(key_var, "")
258
+ model = os.environ.get(model_var, default_model)
259
+ provider_name = os.environ.get(f"{prefix}EMBED_PROVIDER", "openai")
260
+ autodetect = os.environ.get(f"{prefix}EMBED_AUTODETECT", "true").lower() == "true"
261
+ timeout = float(os.environ.get(f"{prefix}EMBED_TIMEOUT", "120"))
262
+
263
+ provider = resolve_provider(provider_name, env_prefix=prefix)
264
+ return cls(
265
+ url=url,
266
+ api_key=api_key,
267
+ model=model,
268
+ provider=provider,
269
+ autodetect=autodetect,
270
+ timeout=timeout,
271
+ env_prefix=prefix,
272
+ )
273
+
274
+ # ------------------------------------------------------------------
275
+ # Internals
276
+ # ------------------------------------------------------------------
277
+
278
+ def _headers(self, provider: EmbedProvider) -> dict[str, str]:
279
+ if not self._api_key:
280
+ return {}
281
+ return {provider.auth_header: provider.auth_format.format(key=self._api_key)}
282
+
283
+ def _candidate_url(self, provider: EmbedProvider) -> str:
284
+ """URL to try for this provider. The configured provider keeps the
285
+ operator's chosen URL; auto-detect candidates swap in their own
286
+ path_default since the operator's path was wrong for them."""
287
+ if provider.name == self._configured_provider.name:
288
+ return self._url
289
+ return _swap_path(self._configured_url, provider.path_default)
290
+
291
+ def _switch_to(self, provider: EmbedProvider) -> None:
292
+ """Make `provider` the active provider for future requests."""
293
+ log.warning(
294
+ "embed_provider auto-detect switched: configured=%s -> active=%s. "
295
+ "Set %sEMBED_PROVIDER=%s to silence this.",
296
+ self._configured_provider.name,
297
+ provider.name,
298
+ self._env_prefix,
299
+ provider.name,
300
+ )
301
+ self._provider = provider
302
+ self._url = self._candidate_url(provider)
303
+ self._detected = True
304
+
305
+ # ------------------------------------------------------------------
306
+ # Sync API
307
+ # ------------------------------------------------------------------
308
+
309
+ def embed_batch(self, texts: list[str]) -> list[list[float]]:
310
+ """Embed a list of texts. Empty list returns empty list."""
311
+ if not texts:
312
+ return []
313
+ return self._post_with_autodetect(texts, async_mode=False)
314
+
315
+ def embed_one(self, text: str) -> list[float]:
316
+ return self.embed_batch([text])[0]
317
+
318
+ # ------------------------------------------------------------------
319
+ # Async API
320
+ # ------------------------------------------------------------------
321
+
322
+ async def embed_batch_async(self, texts: list[str]) -> list[list[float]]:
323
+ if not texts:
324
+ return []
325
+ return await self._post_with_autodetect_async(texts)
326
+
327
+ async def embed_one_async(self, text: str) -> list[float]:
328
+ out = await self.embed_batch_async([text])
329
+ return out[0]
330
+
331
+ # ------------------------------------------------------------------
332
+ # Request paths
333
+ # ------------------------------------------------------------------
334
+
335
+ def _post_with_autodetect(self, texts: list[str], *, async_mode: bool) -> list[list[float]]:
336
+ del async_mode # kept for symmetry; sync path is its own method
337
+ body = self._provider.body_builder(texts, self._model)
338
+ headers = self._headers(self._provider)
339
+ try:
340
+ r = httpx.post(self._url, json=body, headers=headers, timeout=self._timeout)
341
+ except httpx.HTTPError as exc:
342
+ raise EmbedHTTPError(0, str(exc)) from exc
343
+
344
+ if r.status_code == 401 and self._autodetect and not self._detected:
345
+ return self._autodetect_and_retry(texts, last_body=r.text)
346
+
347
+ if r.status_code == 401:
348
+ raise EmbedAuthError(r.text)
349
+ if not r.is_success:
350
+ raise EmbedHTTPError(r.status_code, r.text)
351
+ return self._provider.response_parser(r.json())
352
+
353
+ async def _post_with_autodetect_async(self, texts: list[str]) -> list[list[float]]:
354
+ body = self._provider.body_builder(texts, self._model)
355
+ headers = self._headers(self._provider)
356
+ try:
357
+ async with httpx.AsyncClient(timeout=self._timeout) as client:
358
+ r = await client.post(self._url, json=body, headers=headers)
359
+ except httpx.HTTPError as exc:
360
+ raise EmbedHTTPError(0, str(exc)) from exc
361
+
362
+ if r.status_code == 401 and self._autodetect and not self._detected:
363
+ return await self._autodetect_and_retry_async(texts, last_body=r.text)
364
+
365
+ if r.status_code == 401:
366
+ raise EmbedAuthError(r.text)
367
+ if not r.is_success:
368
+ raise EmbedHTTPError(r.status_code, r.text)
369
+ return self._provider.response_parser(r.json())
370
+
371
+ # ------------------------------------------------------------------
372
+ # Auto-detect
373
+ # ------------------------------------------------------------------
374
+
375
+ def _candidates(self) -> list[EmbedProvider]:
376
+ return [p for p in PROVIDERS.values() if p.name != self._provider.name]
377
+
378
+ def _autodetect_and_retry(self, texts: list[str], *, last_body: str) -> list[list[float]]:
379
+ for candidate in self._candidates():
380
+ url = self._candidate_url(candidate)
381
+ body = candidate.body_builder(texts, self._model)
382
+ headers = (
383
+ {candidate.auth_header: candidate.auth_format.format(key=self._api_key)}
384
+ if self._api_key
385
+ else {}
386
+ )
387
+ try:
388
+ r = httpx.post(url, json=body, headers=headers, timeout=self._timeout)
389
+ except httpx.HTTPError:
390
+ continue
391
+ if r.is_success:
392
+ self._switch_to(candidate)
393
+ return candidate.response_parser(r.json())
394
+ raise EmbedAuthError(
395
+ f"all providers returned auth or transport errors. "
396
+ f"last 401 body: {last_body[:200]}"
397
+ )
398
+
399
+ async def _autodetect_and_retry_async(self, texts: list[str], *, last_body: str) -> list[list[float]]:
400
+ for candidate in self._candidates():
401
+ url = self._candidate_url(candidate)
402
+ body = candidate.body_builder(texts, self._model)
403
+ headers = (
404
+ {candidate.auth_header: candidate.auth_format.format(key=self._api_key)}
405
+ if self._api_key
406
+ else {}
407
+ )
408
+ try:
409
+ async with httpx.AsyncClient(timeout=self._timeout) as client:
410
+ r = await client.post(url, json=body, headers=headers)
411
+ except httpx.HTTPError:
412
+ continue
413
+ if r.is_success:
414
+ self._switch_to(candidate)
415
+ return candidate.response_parser(r.json())
416
+ raise EmbedAuthError(
417
+ f"all providers returned auth or transport errors. "
418
+ f"last 401 body: {last_body[:200]}"
419
+ )
420
+
421
+ # ------------------------------------------------------------------
422
+ # Introspection (handy for /health and tests)
423
+ # ------------------------------------------------------------------
424
+
425
+ @property
426
+ def active_provider(self) -> str:
427
+ return self._provider.name
428
+
429
+ @property
430
+ def active_url(self) -> str:
431
+ return self._url
@@ -15,8 +15,10 @@ RUN pip install --no-cache-dir \
15
15
  "sentence-transformers" \
16
16
  "torch" --extra-index-url https://download.pytorch.org/whl/cpu
17
17
 
18
- COPY l2-hybridrag-proxy.py /app/server.py
19
- COPY init_databases.py /app/init_databases.py
18
+ # Shared embed_provider module (build context is engine/services).
19
+ COPY _shared /app/_shared
20
+ COPY l2/l2-hybridrag-proxy.py /app/server.py
21
+ COPY l2/init_databases.py /app/init_databases.py
20
22
 
21
23
  # Pre-create empty L0 BM25 SQLite + empty workspace
22
24
  RUN mkdir -p /data/workspace /data/.pentatonic/memory /data/.cache && \