@pentatonic-ai/ai-agent-sdk 0.7.13 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/packages/memory/openclaw-plugin/openclaw.plugin.json +1 -1
- package/packages/memory/openclaw-plugin/package.json +1 -1
- package/packages/memory-engine/docker-compose.yml +18 -8
- package/packages/memory-engine/engine/services/_shared/__init__.py +1 -0
- package/packages/memory-engine/engine/services/_shared/embed_provider.py +431 -0
- package/packages/memory-engine/engine/services/l2/Dockerfile +4 -2
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +44 -23
- package/packages/memory-engine/engine/services/l4/Dockerfile +5 -1
- package/packages/memory-engine/engine/services/l4/server.py +19 -57
- package/packages/memory-engine/engine/services/l5/Dockerfile +3 -1
- package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +24 -32
- package/packages/memory-engine/engine/services/l6/Dockerfile +3 -1
- package/packages/memory-engine/engine/services/l6/l6-document-store.py +24 -29
- package/packages/memory-engine/tests/test_embed_provider.py +354 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.0",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "pentatonic-memory",
|
|
3
3
|
"name": "Pentatonic Memory",
|
|
4
4
|
"description": "Persistent, searchable memory with multi-signal retrieval and HyDE query expansion. Local (Docker + Ollama) or hosted (Pentatonic TES).",
|
|
5
|
-
"version": "0.
|
|
5
|
+
"version": "0.8.4",
|
|
6
6
|
"kind": "context-engine",
|
|
7
7
|
"configSchema": {
|
|
8
8
|
"type": "object",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/openclaw-memory-plugin",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.4",
|
|
4
4
|
"description": "Pentatonic Memory plugin for OpenClaw — persistent, searchable memory with multi-signal retrieval and HyDE query expansion",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -88,8 +88,8 @@ services:
|
|
|
88
88
|
l4:
|
|
89
89
|
<<: *engine-base
|
|
90
90
|
build:
|
|
91
|
-
context: ./engine/services
|
|
92
|
-
dockerfile: Dockerfile
|
|
91
|
+
context: ./engine/services
|
|
92
|
+
dockerfile: l4/Dockerfile
|
|
93
93
|
container_name: pme-l4
|
|
94
94
|
# Default 18042 to avoid port collisions on 8042.
|
|
95
95
|
# Override via PME_L4_PORT for bench setups that intentionally replace it.
|
|
@@ -98,6 +98,8 @@ services:
|
|
|
98
98
|
L4_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
|
|
99
99
|
L4_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
|
|
100
100
|
L4_EMBED_API_KEY: ${EMBED_API_KEY:-}
|
|
101
|
+
L4_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
|
|
102
|
+
L4_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
|
|
101
103
|
L4_EMBED_DIM: ${EMBED_DIM:-4096}
|
|
102
104
|
L4_DB_PATH: /data/vec.db
|
|
103
105
|
extra_hosts:
|
|
@@ -116,8 +118,8 @@ services:
|
|
|
116
118
|
l5:
|
|
117
119
|
<<: *engine-base
|
|
118
120
|
build:
|
|
119
|
-
context: ./engine/services
|
|
120
|
-
dockerfile: Dockerfile
|
|
121
|
+
context: ./engine/services
|
|
122
|
+
dockerfile: l5/Dockerfile
|
|
121
123
|
container_name: pme-l5
|
|
122
124
|
# Default 18034 to avoid port collisions on 8034.
|
|
123
125
|
# Override via PME_L5_PORT for bench setups that intentionally replace it.
|
|
@@ -126,6 +128,8 @@ services:
|
|
|
126
128
|
L5_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
|
|
127
129
|
L5_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
|
|
128
130
|
L5_EMBED_API_KEY: ${EMBED_API_KEY:-}
|
|
131
|
+
L5_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
|
|
132
|
+
L5_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
|
|
129
133
|
L5_EMBED_DIM: ${EMBED_DIM:-4096}
|
|
130
134
|
L5_OLLAMA_DIM: ${OLLAMA_DIM:-768}
|
|
131
135
|
L5_OLLAMA_EMBED_URL: ${L5_OLLAMA_EMBED_URL:-http://host.docker.internal:11434/api/embed}
|
|
@@ -143,8 +147,8 @@ services:
|
|
|
143
147
|
l6:
|
|
144
148
|
<<: *engine-base
|
|
145
149
|
build:
|
|
146
|
-
context: ./engine/services
|
|
147
|
-
dockerfile: Dockerfile
|
|
150
|
+
context: ./engine/services
|
|
151
|
+
dockerfile: l6/Dockerfile
|
|
148
152
|
container_name: pme-l6
|
|
149
153
|
# Default 18037 to avoid colliding with Spark Core L6 doc-store on 8037.
|
|
150
154
|
# Override via PME_L6_PORT for bench setups that intentionally replace it.
|
|
@@ -153,6 +157,8 @@ services:
|
|
|
153
157
|
L6_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
|
|
154
158
|
L6_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
|
|
155
159
|
L6_EMBED_API_KEY: ${EMBED_API_KEY:-}
|
|
160
|
+
L6_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
|
|
161
|
+
L6_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
|
|
156
162
|
L6_EMBED_DIM: ${EMBED_DIM:-4096}
|
|
157
163
|
L6_DATA_DIR: /data
|
|
158
164
|
extra_hosts:
|
|
@@ -166,12 +172,16 @@ services:
|
|
|
166
172
|
l2:
|
|
167
173
|
<<: *engine-base
|
|
168
174
|
build:
|
|
169
|
-
context: ./engine/services
|
|
170
|
-
dockerfile: Dockerfile
|
|
175
|
+
context: ./engine/services
|
|
176
|
+
dockerfile: l2/Dockerfile
|
|
171
177
|
container_name: pme-l2
|
|
172
178
|
ports: ["127.0.0.1:${PME_L2_PORT:-8131}:8031"]
|
|
173
179
|
environment:
|
|
174
180
|
PME_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
|
|
181
|
+
PME_EMBED_API_KEY: ${EMBED_API_KEY:-}
|
|
182
|
+
PME_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
|
|
183
|
+
PME_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
|
|
184
|
+
PME_NV_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
|
|
175
185
|
PME_NEO4J_URI: bolt://l3:7687
|
|
176
186
|
PME_NEO4J_PASSWORD: ${NEO4J_PASSWORD:-local-dev-pw}
|
|
177
187
|
NEO4J_PASSWORD: ${NEO4J_PASSWORD:-local-dev-pw}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Shared utilities used across the memory-engine layer services."""
|
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
"""Embedding provider abstraction for memory-engine layer services.
|
|
2
|
+
|
|
3
|
+
Different embedding gateways disagree on three things:
|
|
4
|
+
1. Auth scheme (Authorization: Bearer ... vs X-API-Key: ...)
|
|
5
|
+
2. URL path (/v1/embeddings vs /v1/embed vs vendor-specific)
|
|
6
|
+
3. Request shape (OpenAI {"input": [...]} vs Cohere {"texts": [...], "input_type": ...})
|
|
7
|
+
4. Response shape ({"data": [{"embedding": ...}]} vs {"embeddings": [...]})
|
|
8
|
+
|
|
9
|
+
`EmbedProvider` captures all four for one gateway. Built-in profiles cover
|
|
10
|
+
OpenAI-compatible, Pentatonic AI Gateway, and Cohere. A "custom" profile lets
|
|
11
|
+
you override auth + path via env without code changes.
|
|
12
|
+
|
|
13
|
+
Auto-detect: when the configured provider returns 401, the client probes the
|
|
14
|
+
other built-in profiles once and caches the winner for the rest of the
|
|
15
|
+
process. Disabled with `EMBED_AUTODETECT=false` per layer.
|
|
16
|
+
|
|
17
|
+
Usage from a layer service:
|
|
18
|
+
|
|
19
|
+
from _shared.embed_provider import EmbedClient
|
|
20
|
+
|
|
21
|
+
embed = EmbedClient.from_env(prefix="L4_")
|
|
22
|
+
vectors = embed.embed_batch(["hello", "world"])
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import logging
|
|
28
|
+
import os
|
|
29
|
+
from dataclasses import dataclass
|
|
30
|
+
from typing import Any, Callable
|
|
31
|
+
from urllib.parse import urlparse, urlunparse
|
|
32
|
+
|
|
33
|
+
import httpx
|
|
34
|
+
|
|
35
|
+
log = logging.getLogger("embed_provider")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# ----------------------------------------------------------------------
|
|
39
|
+
# Body builders + response parsers
|
|
40
|
+
# ----------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
def _openai_body(texts: list[str], model: str) -> dict[str, Any]:
|
|
43
|
+
return {"input": texts, "model": model}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _openai_response(payload: dict[str, Any]) -> list[list[float]]:
|
|
47
|
+
return [d["embedding"] for d in payload["data"]]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _pentatonic_response(payload: dict[str, Any]) -> list[list[float]]:
|
|
51
|
+
"""Pentatonic AI Gateway returns {"embeddings": [...]} on /v1/embed
|
|
52
|
+
and OpenAI-shaped {"data": [{"embedding": ...}]} on /v1/embeddings.
|
|
53
|
+
Accept either."""
|
|
54
|
+
if "data" in payload:
|
|
55
|
+
return [d["embedding"] for d in payload["data"]]
|
|
56
|
+
return payload["embeddings"]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _cohere_body(texts: list[str], model: str) -> dict[str, Any]:
|
|
60
|
+
return {"texts": texts, "model": model, "input_type": "search_document"}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _cohere_response(payload: dict[str, Any]) -> list[list[float]]:
|
|
64
|
+
return payload["embeddings"]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
_BODY_BUILDERS: dict[str, Callable[[list[str], str], dict[str, Any]]] = {
|
|
68
|
+
"openai": _openai_body,
|
|
69
|
+
"cohere": _cohere_body,
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
_RESPONSE_PARSERS: dict[str, Callable[[dict[str, Any]], list[list[float]]]] = {
|
|
73
|
+
"openai": _openai_response,
|
|
74
|
+
"pentatonic": _pentatonic_response,
|
|
75
|
+
"cohere": _cohere_response,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# ----------------------------------------------------------------------
|
|
80
|
+
# Provider profiles
|
|
81
|
+
# ----------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
@dataclass(frozen=True)
|
|
84
|
+
class EmbedProvider:
|
|
85
|
+
name: str
|
|
86
|
+
auth_header: str
|
|
87
|
+
auth_format: str # f-string-ish; "{key}" placeholder substituted at request time
|
|
88
|
+
path_default: str
|
|
89
|
+
body_builder: Callable[[list[str], str], dict[str, Any]]
|
|
90
|
+
response_parser: Callable[[dict[str, Any]], list[list[float]]]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
PROVIDERS: dict[str, EmbedProvider] = {
|
|
94
|
+
"openai": EmbedProvider(
|
|
95
|
+
name="openai",
|
|
96
|
+
auth_header="Authorization",
|
|
97
|
+
auth_format="Bearer {key}",
|
|
98
|
+
path_default="/v1/embeddings",
|
|
99
|
+
body_builder=_openai_body,
|
|
100
|
+
response_parser=_openai_response,
|
|
101
|
+
),
|
|
102
|
+
"pentatonic-gateway": EmbedProvider(
|
|
103
|
+
name="pentatonic-gateway",
|
|
104
|
+
auth_header="X-API-Key",
|
|
105
|
+
auth_format="{key}",
|
|
106
|
+
path_default="/v1/embed",
|
|
107
|
+
body_builder=_openai_body,
|
|
108
|
+
response_parser=_pentatonic_response,
|
|
109
|
+
),
|
|
110
|
+
"cohere": EmbedProvider(
|
|
111
|
+
name="cohere",
|
|
112
|
+
auth_header="Authorization",
|
|
113
|
+
auth_format="Bearer {key}",
|
|
114
|
+
path_default="/v1/embed",
|
|
115
|
+
body_builder=_cohere_body,
|
|
116
|
+
response_parser=_cohere_response,
|
|
117
|
+
),
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _build_custom_provider(env_prefix: str) -> EmbedProvider:
|
|
122
|
+
"""Build a custom provider from env vars.
|
|
123
|
+
|
|
124
|
+
Env vars (env_prefix is e.g. 'L4_'):
|
|
125
|
+
{prefix}EMBED_AUTH_HEADER default Authorization
|
|
126
|
+
{prefix}EMBED_AUTH_FORMAT default Bearer {key}
|
|
127
|
+
{prefix}EMBED_PATH_DEFAULT default /v1/embeddings
|
|
128
|
+
{prefix}EMBED_BODY_SHAPE "openai" | "cohere" default openai
|
|
129
|
+
{prefix}EMBED_RESPONSE_SHAPE "openai" | "pentatonic" | "cohere" default openai
|
|
130
|
+
"""
|
|
131
|
+
body_shape = os.environ.get(f"{env_prefix}EMBED_BODY_SHAPE", "openai")
|
|
132
|
+
response_shape = os.environ.get(f"{env_prefix}EMBED_RESPONSE_SHAPE", "openai")
|
|
133
|
+
return EmbedProvider(
|
|
134
|
+
name="custom",
|
|
135
|
+
auth_header=os.environ.get(f"{env_prefix}EMBED_AUTH_HEADER", "Authorization"),
|
|
136
|
+
auth_format=os.environ.get(f"{env_prefix}EMBED_AUTH_FORMAT", "Bearer {key}"),
|
|
137
|
+
path_default=os.environ.get(f"{env_prefix}EMBED_PATH_DEFAULT", "/v1/embeddings"),
|
|
138
|
+
body_builder=_BODY_BUILDERS.get(body_shape, _openai_body),
|
|
139
|
+
response_parser=_RESPONSE_PARSERS.get(response_shape, _openai_response),
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def resolve_provider(name: str, env_prefix: str = "") -> EmbedProvider:
|
|
144
|
+
"""Look up a built-in provider, or build a custom one from env."""
|
|
145
|
+
if name in PROVIDERS:
|
|
146
|
+
return PROVIDERS[name]
|
|
147
|
+
if name == "custom":
|
|
148
|
+
return _build_custom_provider(env_prefix)
|
|
149
|
+
raise ValueError(
|
|
150
|
+
f"Unknown EMBED_PROVIDER {name!r}. "
|
|
151
|
+
f"Built-ins: {sorted(PROVIDERS)} or 'custom'."
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# ----------------------------------------------------------------------
|
|
156
|
+
# URL helpers
|
|
157
|
+
# ----------------------------------------------------------------------
|
|
158
|
+
|
|
159
|
+
def _swap_path(url: str, new_path: str) -> str:
|
|
160
|
+
"""Replace the path component of `url` with `new_path`. Empty path on the
|
|
161
|
+
input URL is treated as 'no path provided' and gets `new_path` appended."""
|
|
162
|
+
p = urlparse(url)
|
|
163
|
+
return urlunparse((p.scheme, p.netloc, new_path, "", p.query, p.fragment))
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _resolved_url(url: str, provider: EmbedProvider) -> str:
|
|
167
|
+
"""If `url` has no path, append the provider's default path; otherwise
|
|
168
|
+
leave as-is (operator chose a specific path)."""
|
|
169
|
+
p = urlparse(url)
|
|
170
|
+
if not p.path or p.path == "/":
|
|
171
|
+
return _swap_path(url, provider.path_default)
|
|
172
|
+
return url
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# ----------------------------------------------------------------------
|
|
176
|
+
# Client
|
|
177
|
+
# ----------------------------------------------------------------------
|
|
178
|
+
|
|
179
|
+
class EmbedAuthError(RuntimeError):
|
|
180
|
+
"""Raised when every candidate provider returns 401."""
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class EmbedHTTPError(RuntimeError):
|
|
184
|
+
"""Raised on non-401 HTTP errors from the embedding endpoint."""
|
|
185
|
+
|
|
186
|
+
def __init__(self, status: int, body: str):
|
|
187
|
+
super().__init__(f"HTTP {status}: {body[:200]}")
|
|
188
|
+
self.status = status
|
|
189
|
+
self.body = body
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class EmbedClient:
|
|
193
|
+
"""Sync + async embedding client with provider auto-detection.
|
|
194
|
+
|
|
195
|
+
Construct via `EmbedClient.from_env(prefix="L4_")` so each layer service
|
|
196
|
+
reads its own env-var namespace; or call the constructor directly for
|
|
197
|
+
tests.
|
|
198
|
+
|
|
199
|
+
The client is stateful: after a successful auto-detect, the winning
|
|
200
|
+
provider is cached for the rest of the process lifetime. Set
|
|
201
|
+
`autodetect=False` (or env `{prefix}EMBED_AUTODETECT=false`) to disable.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
def __init__(
|
|
205
|
+
self,
|
|
206
|
+
*,
|
|
207
|
+
url: str,
|
|
208
|
+
api_key: str,
|
|
209
|
+
model: str,
|
|
210
|
+
provider: EmbedProvider,
|
|
211
|
+
autodetect: bool = True,
|
|
212
|
+
timeout: float = 120.0,
|
|
213
|
+
env_prefix: str = "",
|
|
214
|
+
) -> None:
|
|
215
|
+
self._configured_provider = provider
|
|
216
|
+
self._provider = provider
|
|
217
|
+
self._configured_url = url
|
|
218
|
+
self._url = _resolved_url(url, provider)
|
|
219
|
+
self._api_key = api_key
|
|
220
|
+
self._model = model
|
|
221
|
+
self._timeout = timeout
|
|
222
|
+
self._autodetect = autodetect
|
|
223
|
+
self._env_prefix = env_prefix
|
|
224
|
+
self._detected = False
|
|
225
|
+
|
|
226
|
+
# ------------------------------------------------------------------
|
|
227
|
+
# Construction
|
|
228
|
+
# ------------------------------------------------------------------
|
|
229
|
+
|
|
230
|
+
@classmethod
|
|
231
|
+
def from_env(
|
|
232
|
+
cls,
|
|
233
|
+
prefix: str,
|
|
234
|
+
*,
|
|
235
|
+
url_var: str | None = None,
|
|
236
|
+
key_var: str | None = None,
|
|
237
|
+
model_var: str | None = None,
|
|
238
|
+
default_url: str = "",
|
|
239
|
+
default_model: str = "nv-embed-v2",
|
|
240
|
+
) -> "EmbedClient":
|
|
241
|
+
"""Build an EmbedClient from layer-prefixed env vars.
|
|
242
|
+
|
|
243
|
+
Layer services use `EmbedClient.from_env(prefix="L4_")` and the
|
|
244
|
+
client reads:
|
|
245
|
+
{prefix}NV_EMBED_URL (override with `url_var=...`)
|
|
246
|
+
{prefix}EMBED_API_KEY
|
|
247
|
+
{prefix}EMBED_MODEL
|
|
248
|
+
{prefix}EMBED_PROVIDER default 'openai'
|
|
249
|
+
{prefix}EMBED_AUTODETECT default 'true'
|
|
250
|
+
{prefix}EMBED_TIMEOUT default '120'
|
|
251
|
+
"""
|
|
252
|
+
url_var = url_var or f"{prefix}NV_EMBED_URL"
|
|
253
|
+
key_var = key_var or f"{prefix}EMBED_API_KEY"
|
|
254
|
+
model_var = model_var or f"{prefix}EMBED_MODEL"
|
|
255
|
+
|
|
256
|
+
url = os.environ.get(url_var, default_url)
|
|
257
|
+
api_key = os.environ.get(key_var, "")
|
|
258
|
+
model = os.environ.get(model_var, default_model)
|
|
259
|
+
provider_name = os.environ.get(f"{prefix}EMBED_PROVIDER", "openai")
|
|
260
|
+
autodetect = os.environ.get(f"{prefix}EMBED_AUTODETECT", "true").lower() == "true"
|
|
261
|
+
timeout = float(os.environ.get(f"{prefix}EMBED_TIMEOUT", "120"))
|
|
262
|
+
|
|
263
|
+
provider = resolve_provider(provider_name, env_prefix=prefix)
|
|
264
|
+
return cls(
|
|
265
|
+
url=url,
|
|
266
|
+
api_key=api_key,
|
|
267
|
+
model=model,
|
|
268
|
+
provider=provider,
|
|
269
|
+
autodetect=autodetect,
|
|
270
|
+
timeout=timeout,
|
|
271
|
+
env_prefix=prefix,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# ------------------------------------------------------------------
|
|
275
|
+
# Internals
|
|
276
|
+
# ------------------------------------------------------------------
|
|
277
|
+
|
|
278
|
+
def _headers(self, provider: EmbedProvider) -> dict[str, str]:
|
|
279
|
+
if not self._api_key:
|
|
280
|
+
return {}
|
|
281
|
+
return {provider.auth_header: provider.auth_format.format(key=self._api_key)}
|
|
282
|
+
|
|
283
|
+
def _candidate_url(self, provider: EmbedProvider) -> str:
|
|
284
|
+
"""URL to try for this provider. The configured provider keeps the
|
|
285
|
+
operator's chosen URL; auto-detect candidates swap in their own
|
|
286
|
+
path_default since the operator's path was wrong for them."""
|
|
287
|
+
if provider.name == self._configured_provider.name:
|
|
288
|
+
return self._url
|
|
289
|
+
return _swap_path(self._configured_url, provider.path_default)
|
|
290
|
+
|
|
291
|
+
def _switch_to(self, provider: EmbedProvider) -> None:
|
|
292
|
+
"""Make `provider` the active provider for future requests."""
|
|
293
|
+
log.warning(
|
|
294
|
+
"embed_provider auto-detect switched: configured=%s -> active=%s. "
|
|
295
|
+
"Set %sEMBED_PROVIDER=%s to silence this.",
|
|
296
|
+
self._configured_provider.name,
|
|
297
|
+
provider.name,
|
|
298
|
+
self._env_prefix,
|
|
299
|
+
provider.name,
|
|
300
|
+
)
|
|
301
|
+
self._provider = provider
|
|
302
|
+
self._url = self._candidate_url(provider)
|
|
303
|
+
self._detected = True
|
|
304
|
+
|
|
305
|
+
# ------------------------------------------------------------------
|
|
306
|
+
# Sync API
|
|
307
|
+
# ------------------------------------------------------------------
|
|
308
|
+
|
|
309
|
+
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
310
|
+
"""Embed a list of texts. Empty list returns empty list."""
|
|
311
|
+
if not texts:
|
|
312
|
+
return []
|
|
313
|
+
return self._post_with_autodetect(texts, async_mode=False)
|
|
314
|
+
|
|
315
|
+
def embed_one(self, text: str) -> list[float]:
|
|
316
|
+
return self.embed_batch([text])[0]
|
|
317
|
+
|
|
318
|
+
# ------------------------------------------------------------------
|
|
319
|
+
# Async API
|
|
320
|
+
# ------------------------------------------------------------------
|
|
321
|
+
|
|
322
|
+
async def embed_batch_async(self, texts: list[str]) -> list[list[float]]:
|
|
323
|
+
if not texts:
|
|
324
|
+
return []
|
|
325
|
+
return await self._post_with_autodetect_async(texts)
|
|
326
|
+
|
|
327
|
+
async def embed_one_async(self, text: str) -> list[float]:
|
|
328
|
+
out = await self.embed_batch_async([text])
|
|
329
|
+
return out[0]
|
|
330
|
+
|
|
331
|
+
# ------------------------------------------------------------------
|
|
332
|
+
# Request paths
|
|
333
|
+
# ------------------------------------------------------------------
|
|
334
|
+
|
|
335
|
+
def _post_with_autodetect(self, texts: list[str], *, async_mode: bool) -> list[list[float]]:
|
|
336
|
+
del async_mode # kept for symmetry; sync path is its own method
|
|
337
|
+
body = self._provider.body_builder(texts, self._model)
|
|
338
|
+
headers = self._headers(self._provider)
|
|
339
|
+
try:
|
|
340
|
+
r = httpx.post(self._url, json=body, headers=headers, timeout=self._timeout)
|
|
341
|
+
except httpx.HTTPError as exc:
|
|
342
|
+
raise EmbedHTTPError(0, str(exc)) from exc
|
|
343
|
+
|
|
344
|
+
if r.status_code == 401 and self._autodetect and not self._detected:
|
|
345
|
+
return self._autodetect_and_retry(texts, last_body=r.text)
|
|
346
|
+
|
|
347
|
+
if r.status_code == 401:
|
|
348
|
+
raise EmbedAuthError(r.text)
|
|
349
|
+
if not r.is_success:
|
|
350
|
+
raise EmbedHTTPError(r.status_code, r.text)
|
|
351
|
+
return self._provider.response_parser(r.json())
|
|
352
|
+
|
|
353
|
+
async def _post_with_autodetect_async(self, texts: list[str]) -> list[list[float]]:
|
|
354
|
+
body = self._provider.body_builder(texts, self._model)
|
|
355
|
+
headers = self._headers(self._provider)
|
|
356
|
+
try:
|
|
357
|
+
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
|
358
|
+
r = await client.post(self._url, json=body, headers=headers)
|
|
359
|
+
except httpx.HTTPError as exc:
|
|
360
|
+
raise EmbedHTTPError(0, str(exc)) from exc
|
|
361
|
+
|
|
362
|
+
if r.status_code == 401 and self._autodetect and not self._detected:
|
|
363
|
+
return await self._autodetect_and_retry_async(texts, last_body=r.text)
|
|
364
|
+
|
|
365
|
+
if r.status_code == 401:
|
|
366
|
+
raise EmbedAuthError(r.text)
|
|
367
|
+
if not r.is_success:
|
|
368
|
+
raise EmbedHTTPError(r.status_code, r.text)
|
|
369
|
+
return self._provider.response_parser(r.json())
|
|
370
|
+
|
|
371
|
+
# ------------------------------------------------------------------
|
|
372
|
+
# Auto-detect
|
|
373
|
+
# ------------------------------------------------------------------
|
|
374
|
+
|
|
375
|
+
def _candidates(self) -> list[EmbedProvider]:
|
|
376
|
+
return [p for p in PROVIDERS.values() if p.name != self._provider.name]
|
|
377
|
+
|
|
378
|
+
def _autodetect_and_retry(self, texts: list[str], *, last_body: str) -> list[list[float]]:
|
|
379
|
+
for candidate in self._candidates():
|
|
380
|
+
url = self._candidate_url(candidate)
|
|
381
|
+
body = candidate.body_builder(texts, self._model)
|
|
382
|
+
headers = (
|
|
383
|
+
{candidate.auth_header: candidate.auth_format.format(key=self._api_key)}
|
|
384
|
+
if self._api_key
|
|
385
|
+
else {}
|
|
386
|
+
)
|
|
387
|
+
try:
|
|
388
|
+
r = httpx.post(url, json=body, headers=headers, timeout=self._timeout)
|
|
389
|
+
except httpx.HTTPError:
|
|
390
|
+
continue
|
|
391
|
+
if r.is_success:
|
|
392
|
+
self._switch_to(candidate)
|
|
393
|
+
return candidate.response_parser(r.json())
|
|
394
|
+
raise EmbedAuthError(
|
|
395
|
+
f"all providers returned auth or transport errors. "
|
|
396
|
+
f"last 401 body: {last_body[:200]}"
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
async def _autodetect_and_retry_async(self, texts: list[str], *, last_body: str) -> list[list[float]]:
|
|
400
|
+
for candidate in self._candidates():
|
|
401
|
+
url = self._candidate_url(candidate)
|
|
402
|
+
body = candidate.body_builder(texts, self._model)
|
|
403
|
+
headers = (
|
|
404
|
+
{candidate.auth_header: candidate.auth_format.format(key=self._api_key)}
|
|
405
|
+
if self._api_key
|
|
406
|
+
else {}
|
|
407
|
+
)
|
|
408
|
+
try:
|
|
409
|
+
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
|
410
|
+
r = await client.post(url, json=body, headers=headers)
|
|
411
|
+
except httpx.HTTPError:
|
|
412
|
+
continue
|
|
413
|
+
if r.is_success:
|
|
414
|
+
self._switch_to(candidate)
|
|
415
|
+
return candidate.response_parser(r.json())
|
|
416
|
+
raise EmbedAuthError(
|
|
417
|
+
f"all providers returned auth or transport errors. "
|
|
418
|
+
f"last 401 body: {last_body[:200]}"
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# ------------------------------------------------------------------
|
|
422
|
+
# Introspection (handy for /health and tests)
|
|
423
|
+
# ------------------------------------------------------------------
|
|
424
|
+
|
|
425
|
+
@property
|
|
426
|
+
def active_provider(self) -> str:
|
|
427
|
+
return self._provider.name
|
|
428
|
+
|
|
429
|
+
@property
|
|
430
|
+
def active_url(self) -> str:
|
|
431
|
+
return self._url
|
|
@@ -15,8 +15,10 @@ RUN pip install --no-cache-dir \
|
|
|
15
15
|
"sentence-transformers" \
|
|
16
16
|
"torch" --extra-index-url https://download.pytorch.org/whl/cpu
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
COPY
|
|
18
|
+
# Shared embed_provider module (build context is engine/services).
|
|
19
|
+
COPY _shared /app/_shared
|
|
20
|
+
COPY l2/l2-hybridrag-proxy.py /app/server.py
|
|
21
|
+
COPY l2/init_databases.py /app/init_databases.py
|
|
20
22
|
|
|
21
23
|
# Pre-create empty L0 BM25 SQLite + empty workspace
|
|
22
24
|
RUN mkdir -p /data/workspace /data/.pentatonic/memory /data/.cache && \
|
|
@@ -17,6 +17,7 @@ import json
|
|
|
17
17
|
import logging
|
|
18
18
|
import os
|
|
19
19
|
import sqlite3
|
|
20
|
+
import sys
|
|
20
21
|
import time
|
|
21
22
|
from datetime import datetime
|
|
22
23
|
from pathlib import Path
|
|
@@ -30,6 +31,10 @@ from neo4j.time import DateTime as Neo4jDateTime, Date as Neo4jDate
|
|
|
30
31
|
from pydantic import BaseModel
|
|
31
32
|
import uvicorn
|
|
32
33
|
|
|
34
|
+
# Shared embed client lives at engine/services/_shared/.
|
|
35
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
36
|
+
from _shared.embed_provider import EmbedClient # noqa: E402
|
|
37
|
+
|
|
33
38
|
|
|
34
39
|
def _serialize_neo4j_value(v: Any) -> Any:
|
|
35
40
|
"""Convert neo4j-specific types to JSON-serialisable equivalents.
|
|
@@ -93,10 +98,27 @@ QMD_DB_PATH = _resolve_qmd_db()
|
|
|
93
98
|
OLLAMA_URL = os.environ.get("PME_OLLAMA_URL", "http://localhost:11434/api/embeddings")
|
|
94
99
|
EMBEDDING_MODEL = os.environ.get("PME_EMBED_MODEL", "nomic-embed-text")
|
|
95
100
|
|
|
96
|
-
# NV-Embed-v2 service (primary, 4096-dim)
|
|
97
|
-
|
|
101
|
+
# NV-Embed-v2 service (primary, 4096-dim). URL/auth/path/body/response are
|
|
102
|
+
# managed by the shared EmbedClient; PME_EMBED_PROVIDER (default openai)
|
|
103
|
+
# selects auth scheme (Bearer vs X-API-Key) and request shape.
|
|
98
104
|
NV_EMBED_ENABLED = os.environ.get("PME_NV_EMBED_ENABLED", "true").lower() == "true"
|
|
99
105
|
|
|
106
|
+
_embed: EmbedClient | None = None
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _embed_client() -> EmbedClient:
|
|
110
|
+
"""Lazily build the shared EmbedClient for L2."""
|
|
111
|
+
global _embed
|
|
112
|
+
if _embed is None:
|
|
113
|
+
_embed = EmbedClient.from_env(
|
|
114
|
+
prefix="PME_",
|
|
115
|
+
url_var="PME_NV_EMBED_URL",
|
|
116
|
+
key_var="PME_EMBED_API_KEY",
|
|
117
|
+
model_var="PME_NV_EMBED_MODEL",
|
|
118
|
+
default_url="http://localhost:8041/v1/embeddings",
|
|
119
|
+
)
|
|
120
|
+
return _embed
|
|
121
|
+
|
|
100
122
|
# Sequential processing weights - OPTIMIZED FOR QUALITY
|
|
101
123
|
GRAPH_PRIORITY_BOOST = 0.5 # Extra score for graph-derived results (↑ for better entity/relationship context)
|
|
102
124
|
VECTOR_BASE_WEIGHT = 0.5 # Base weight for vector results (↓ balanced for accuracy over speed)
|
|
@@ -389,12 +411,11 @@ def search_neo4j_sequential(query: str, entities: List[str], limit: int = 12) ->
|
|
|
389
411
|
|
|
390
412
|
def get_embedding(text: str) -> List[float]:
|
|
391
413
|
"""Get embedding — tries NV-Embed-v2 (4096-dim) first, falls back to Ollama."""
|
|
392
|
-
# Try NV-Embed-v2 service first
|
|
414
|
+
# Try NV-Embed-v2 service first via the shared EmbedClient (handles
|
|
415
|
+
# provider selection, auth scheme, path, and 401 auto-detect).
|
|
393
416
|
if NV_EMBED_ENABLED:
|
|
394
417
|
try:
|
|
395
|
-
|
|
396
|
-
r.raise_for_status()
|
|
397
|
-
return r.json()["data"][0]["embedding"]
|
|
418
|
+
return _embed_client().embed_one(text)
|
|
398
419
|
except Exception as e:
|
|
399
420
|
log.warning(f"NV-Embed-v2 failed, falling back to Ollama: {e}")
|
|
400
421
|
|
|
@@ -1073,17 +1094,23 @@ async def list_models() -> dict:
|
|
|
1073
1094
|
@app.post("/v1/embeddings")
|
|
1074
1095
|
async def create_embeddings(request: EmbeddingRequest) -> dict:
|
|
1075
1096
|
"""Pass-through to NV-Embed-v2 (4096-dim). Batch-native — forwards the full
|
|
1076
|
-
input list in a single HTTP call instead of looping one-at-a-time.
|
|
1097
|
+
input list in a single HTTP call instead of looping one-at-a-time.
|
|
1098
|
+
|
|
1099
|
+
Returns OpenAI-shaped response regardless of upstream provider, so
|
|
1100
|
+
callers (including L4 search and external clients) get a consistent
|
|
1101
|
+
contract from this proxy."""
|
|
1077
1102
|
try:
|
|
1078
|
-
import httpx
|
|
1079
1103
|
inputs = [request.input] if isinstance(request.input, str) else request.input
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1104
|
+
embeddings = await _embed_client().embed_batch_async(inputs)
|
|
1105
|
+
return {
|
|
1106
|
+
"object": "list",
|
|
1107
|
+
"model": request.model or "nv-embed-v2",
|
|
1108
|
+
"data": [
|
|
1109
|
+
{"object": "embedding", "embedding": e, "index": i}
|
|
1110
|
+
for i, e in enumerate(embeddings)
|
|
1111
|
+
],
|
|
1112
|
+
"usage": {"prompt_tokens": 0, "total_tokens": 0},
|
|
1113
|
+
}
|
|
1087
1114
|
except Exception as e:
|
|
1088
1115
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1089
1116
|
|
|
@@ -1319,17 +1346,11 @@ def _extract_entities_for_kg(text: str, max_entities: int = 32) -> List[str]:
|
|
|
1319
1346
|
|
|
1320
1347
|
|
|
1321
1348
|
def _embed_batch_local(texts: List[str]) -> List[List[float]]:
|
|
1322
|
-
"""Batch embed via
|
|
1349
|
+
"""Batch embed via the shared EmbedClient. Returns vectors in input order."""
|
|
1323
1350
|
if not texts:
|
|
1324
1351
|
return []
|
|
1325
1352
|
try:
|
|
1326
|
-
|
|
1327
|
-
json={"input": texts, "model": "nv-embed-v2"},
|
|
1328
|
-
timeout=120)
|
|
1329
|
-
r.raise_for_status()
|
|
1330
|
-
data = r.json().get("data", [])
|
|
1331
|
-
# NV-Embed returns [{embedding: [...]}, ...]
|
|
1332
|
-
return [d["embedding"] for d in data]
|
|
1353
|
+
return _embed_client().embed_batch(texts)
|
|
1333
1354
|
except Exception as e:
|
|
1334
1355
|
log.warning(f"NV-Embed batch failed: {e}; trying singletons")
|
|
1335
1356
|
return [get_embedding(t) for t in texts]
|
|
@@ -4,7 +4,11 @@ WORKDIR /app
|
|
|
4
4
|
|
|
5
5
|
RUN pip install --no-cache-dir fastapi 'uvicorn[standard]' httpx pydantic
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
# Build context is engine/services so the shared embed_provider module is
|
|
8
|
+
# COPYable. server.py adds engine/services to sys.path at startup, then
|
|
9
|
+
# imports from `_shared.embed_provider`.
|
|
10
|
+
COPY _shared /app/_shared
|
|
11
|
+
COPY l4/server.py /app/server.py
|
|
8
12
|
|
|
9
13
|
RUN mkdir -p /data
|
|
10
14
|
ENV L4_DB_PATH=/data/vec.db
|
|
@@ -23,27 +23,25 @@ import hashlib
|
|
|
23
23
|
import os
|
|
24
24
|
import sqlite3
|
|
25
25
|
import struct
|
|
26
|
+
import sys
|
|
26
27
|
import time
|
|
27
28
|
from pathlib import Path
|
|
28
29
|
from typing import Any
|
|
29
30
|
|
|
30
|
-
import httpx
|
|
31
31
|
from fastapi import FastAPI, HTTPException
|
|
32
32
|
from pydantic import BaseModel
|
|
33
33
|
|
|
34
|
+
# Shared embedding client lives at engine/services/_shared/. Add the parent of
|
|
35
|
+
# the service dir to sys.path so `from _shared.embed_provider import ...` works
|
|
36
|
+
# regardless of how the service is launched (uvicorn, python server.py, etc.).
|
|
37
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
38
|
+
from _shared.embed_provider import EmbedClient # noqa: E402
|
|
39
|
+
|
|
34
40
|
# ----------------------------------------------------------------------
|
|
35
41
|
# Config
|
|
36
42
|
# ----------------------------------------------------------------------
|
|
37
43
|
|
|
38
44
|
DB_PATH = os.environ.get("L4_DB_PATH", "/data/vec.db")
|
|
39
|
-
NV_EMBED_URL = os.environ.get("L4_NV_EMBED_URL", "http://nv-embed:8041/v1/embeddings")
|
|
40
|
-
# Embedding model name sent in /v1/embeddings request body. Defaults to
|
|
41
|
-
# the production NV-Embed-v2 name; override via env when pointing at a
|
|
42
|
-
# different OpenAI-compat endpoint (e.g. Ollama with nomic-embed-text).
|
|
43
|
-
EMBED_MODEL_NAME = os.environ.get("L4_EMBED_MODEL", "nv-embed-v2")
|
|
44
|
-
# Optional Authorization: Bearer <key> for the embedding endpoint.
|
|
45
|
-
# Set when calling a hosted gateway (e.g. pentatonic-ai-gateway). Empty = no auth.
|
|
46
|
-
EMBED_API_KEY = os.environ.get("L4_EMBED_API_KEY", "")
|
|
47
45
|
EMBED_DIM = int(os.environ.get("L4_EMBED_DIM", "4096"))
|
|
48
46
|
|
|
49
47
|
|
|
@@ -96,59 +94,23 @@ def _get_db() -> sqlite3.Connection:
|
|
|
96
94
|
# Embedding client
|
|
97
95
|
# ----------------------------------------------------------------------
|
|
98
96
|
|
|
99
|
-
|
|
97
|
+
_embed: EmbedClient | None = None
|
|
100
98
|
|
|
101
99
|
|
|
102
|
-
def
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
100
|
+
def _embed_client() -> EmbedClient:
|
|
101
|
+
"""Lazily build the embed client so env vars are read at first use."""
|
|
102
|
+
global _embed
|
|
103
|
+
if _embed is None:
|
|
104
|
+
_embed = EmbedClient.from_env(
|
|
105
|
+
prefix="L4_",
|
|
106
|
+
default_url="http://nv-embed:8041/v1/embeddings",
|
|
107
|
+
)
|
|
108
|
+
return _embed
|
|
107
109
|
|
|
108
110
|
|
|
109
111
|
async def _embed_batch(texts: list[str]) -> list[list[float]]:
|
|
110
|
-
"""Embed a batch of texts.
|
|
111
|
-
|
|
112
|
-
Tries OpenAI-compatible shape first (POST <url>, Bearer auth,
|
|
113
|
-
response data[i].embedding). On failure, falls back to the
|
|
114
|
-
Pentatonic-AI gateway's native shape (POST .../v1/embed, X-API-Key
|
|
115
|
-
auth, response embeddings[i]). When the gateway eventually adds an
|
|
116
|
-
OpenAI-compat /v1/embeddings alias, the primary path will succeed
|
|
117
|
-
and the fallback will never fire — no code change needed.
|
|
118
|
-
"""
|
|
119
|
-
if not texts:
|
|
120
|
-
return []
|
|
121
|
-
payload = {"input": texts, "model": EMBED_MODEL_NAME}
|
|
122
|
-
# Primary: OpenAI-compat
|
|
123
|
-
try:
|
|
124
|
-
resp = await _client().post(
|
|
125
|
-
NV_EMBED_URL,
|
|
126
|
-
headers=_openai_headers(),
|
|
127
|
-
json=payload,
|
|
128
|
-
timeout=120.0,
|
|
129
|
-
)
|
|
130
|
-
resp.raise_for_status()
|
|
131
|
-
return [d["embedding"] for d in resp.json()["data"]]
|
|
132
|
-
except Exception:
|
|
133
|
-
pass
|
|
134
|
-
# Fallback: lambda-gateway native shape
|
|
135
|
-
fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
|
|
136
|
-
resp = await _client().post(
|
|
137
|
-
fallback_url,
|
|
138
|
-
headers=_lambda_headers(),
|
|
139
|
-
json=payload,
|
|
140
|
-
timeout=120.0,
|
|
141
|
-
)
|
|
142
|
-
resp.raise_for_status()
|
|
143
|
-
return resp.json()["embeddings"]
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
def _openai_headers() -> dict:
|
|
147
|
-
return {"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {}
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
def _lambda_headers() -> dict:
|
|
151
|
-
return {"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {}
|
|
112
|
+
"""Embed a batch of texts via the shared EmbedClient."""
|
|
113
|
+
return await _embed_client().embed_batch_async(texts)
|
|
152
114
|
|
|
153
115
|
|
|
154
116
|
# ----------------------------------------------------------------------
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
FROM python:3.12-slim
|
|
2
2
|
WORKDIR /app
|
|
3
3
|
RUN pip install --no-cache-dir fastapi "uvicorn[standard]" httpx "pymilvus[milvus_lite]" "setuptools<70" pydantic
|
|
4
|
-
|
|
4
|
+
# Shared embed_provider module (build context is engine/services).
|
|
5
|
+
COPY _shared /app/_shared
|
|
6
|
+
COPY l5/l5-comms-layer.py /app/server.py
|
|
5
7
|
RUN mkdir -p /data
|
|
6
8
|
ENV L5_DB_PATH=/data/comms.db
|
|
7
9
|
EXPOSE 8034
|
|
@@ -23,6 +23,7 @@ import os
|
|
|
23
23
|
import glob
|
|
24
24
|
import hashlib
|
|
25
25
|
import json
|
|
26
|
+
import sys
|
|
26
27
|
import time
|
|
27
28
|
from datetime import datetime
|
|
28
29
|
from pathlib import Path
|
|
@@ -30,6 +31,10 @@ from pathlib import Path
|
|
|
30
31
|
import httpx
|
|
31
32
|
from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
|
|
32
33
|
|
|
34
|
+
# Shared embed client lives at engine/services/_shared/.
|
|
35
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
36
|
+
from _shared.embed_provider import EmbedClient # noqa: E402
|
|
37
|
+
|
|
33
38
|
# --- Config ---
|
|
34
39
|
DB_PATH = os.environ.get(
|
|
35
40
|
"L5_DB_PATH",
|
|
@@ -43,43 +48,30 @@ PEOPLE_DIR = WORKSPACE / "memory" / "people"
|
|
|
43
48
|
CONTACTS_DIR = WORKSPACE / "memory" / "contacts"
|
|
44
49
|
MEMORY_DIR = WORKSPACE / "memory"
|
|
45
50
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
_embed: EmbedClient | None = None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _embed_client() -> EmbedClient:
|
|
55
|
+
"""Lazily build the shared EmbedClient for L5."""
|
|
56
|
+
global _embed
|
|
57
|
+
if _embed is None:
|
|
58
|
+
_embed = EmbedClient.from_env(
|
|
59
|
+
prefix="L5_",
|
|
60
|
+
default_url="http://localhost:8041/v1/embeddings",
|
|
61
|
+
)
|
|
62
|
+
return _embed
|
|
63
|
+
|
|
53
64
|
|
|
54
65
|
def _embed_post(texts):
|
|
55
|
-
"""
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
66
|
+
"""Embed a batch of texts via the shared EmbedClient.
|
|
67
|
+
|
|
68
|
+
Provider profile (auth scheme + URL path + body/response shape) is
|
|
69
|
+
chosen by L5_EMBED_PROVIDER env var (openai | pentatonic-gateway |
|
|
70
|
+
cohere | custom). Auto-detects on 401 unless L5_EMBED_AUTODETECT=false.
|
|
59
71
|
|
|
60
72
|
Returns: list[list[float]] (one embedding per input text).
|
|
61
73
|
"""
|
|
62
|
-
|
|
63
|
-
try:
|
|
64
|
-
r = httpx.post(
|
|
65
|
-
NV_EMBED_URL,
|
|
66
|
-
headers={"Authorization": f"Bearer {EMBED_API_KEY}"} if EMBED_API_KEY else {},
|
|
67
|
-
json=payload,
|
|
68
|
-
timeout=120,
|
|
69
|
-
)
|
|
70
|
-
r.raise_for_status()
|
|
71
|
-
return [d["embedding"] for d in r.json()["data"]]
|
|
72
|
-
except Exception:
|
|
73
|
-
pass
|
|
74
|
-
fallback_url = NV_EMBED_URL.replace("/v1/embeddings", "/v1/embed").replace("/embeddings", "/embed")
|
|
75
|
-
r = httpx.post(
|
|
76
|
-
fallback_url,
|
|
77
|
-
headers={"X-API-Key": EMBED_API_KEY} if EMBED_API_KEY else {},
|
|
78
|
-
json=payload,
|
|
79
|
-
timeout=120,
|
|
80
|
-
)
|
|
81
|
-
r.raise_for_status()
|
|
82
|
-
return r.json()["embeddings"]
|
|
74
|
+
return _embed_client().embed_batch(texts)
|
|
83
75
|
|
|
84
76
|
# Ollama fallback path. URL/model can be overridden so the L5 container can
|
|
85
77
|
# reach an Ollama instance running on the docker host (host.docker.internal)
|
|
@@ -3,7 +3,9 @@ WORKDIR /app
|
|
|
3
3
|
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
|
|
4
4
|
RUN pip install --no-cache-dir fastapi "uvicorn[standard]" httpx "pymilvus[milvus_lite]" "setuptools<70" pydantic spacy
|
|
5
5
|
RUN python -m spacy download en_core_web_sm
|
|
6
|
-
|
|
6
|
+
# Shared embed_provider module (build context is engine/services).
|
|
7
|
+
COPY _shared /app/_shared
|
|
8
|
+
COPY l6/l6-document-store.py /app/server.py
|
|
7
9
|
RUN mkdir -p /data
|
|
8
10
|
ENV L6_DATA_DIR=/data
|
|
9
11
|
EXPOSE 8037
|
|
@@ -20,6 +20,7 @@ import logging
|
|
|
20
20
|
import os
|
|
21
21
|
import re
|
|
22
22
|
import sqlite3
|
|
23
|
+
import sys
|
|
23
24
|
import time
|
|
24
25
|
from datetime import datetime, timezone
|
|
25
26
|
from pathlib import Path
|
|
@@ -29,6 +30,10 @@ import httpx
|
|
|
29
30
|
from pymilvus import MilvusClient, DataType, CollectionSchema, FieldSchema
|
|
30
31
|
from pymilvus.milvus_client.index import IndexParams
|
|
31
32
|
|
|
33
|
+
# Shared embed client lives at engine/services/_shared/.
|
|
34
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
35
|
+
from _shared.embed_provider import EmbedClient # noqa: E402
|
|
36
|
+
|
|
32
37
|
# ---------------------------------------------------------------------------
|
|
33
38
|
# Config
|
|
34
39
|
# ---------------------------------------------------------------------------
|
|
@@ -37,39 +42,29 @@ DATA_DIR = Path(os.environ.get("L6_DATA_DIR", str(Path.home() / "l6-document-sto
|
|
|
37
42
|
MILVUS_DB = str(DATA_DIR / "documents.db")
|
|
38
43
|
FTS_DB = str(DATA_DIR / "documents_fts.db")
|
|
39
44
|
OLLAMA_URL = os.environ.get("L6_OLLAMA_URL", "http://localhost:11434")
|
|
40
|
-
EMBED_MODEL = os.environ.get("L6_EMBED_MODEL", "nomic-embed-text")
|
|
41
|
-
NV_EMBED_URL = os.environ.get("L6_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
|
|
42
45
|
NV_EMBED_ENABLED = os.environ.get("L6_NV_EMBED_ENABLED", "true").lower() == "true"
|
|
43
46
|
EMBED_DIM = int(os.environ.get("L6_EMBED_DIM", "4096"))
|
|
44
|
-
# Optional Authorization: Bearer <key> for the embedding endpoint.
|
|
45
|
-
EMBED_API_KEY = os.environ.get("L6_EMBED_API_KEY", "")
|
|
46
47
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
timeout=120,
|
|
48
|
+
_embed: EmbedClient | None = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _embed_client() -> EmbedClient:
|
|
52
|
+
"""Lazily build the shared EmbedClient for L6."""
|
|
53
|
+
global _embed
|
|
54
|
+
if _embed is None:
|
|
55
|
+
_embed = EmbedClient.from_env(
|
|
56
|
+
prefix="L6_",
|
|
57
|
+
default_url="http://localhost:8041/v1/embeddings",
|
|
58
|
+
default_model="nomic-embed-text",
|
|
59
59
|
)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
json=payload,
|
|
69
|
-
timeout=120,
|
|
70
|
-
)
|
|
71
|
-
r.raise_for_status()
|
|
72
|
-
return r.json()["embeddings"]
|
|
60
|
+
return _embed
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _embed_post(texts):
|
|
64
|
+
"""Embed a batch of texts via the shared EmbedClient. Provider profile
|
|
65
|
+
chosen by L6_EMBED_PROVIDER env (openai | pentatonic-gateway | cohere
|
|
66
|
+
| custom). See engine/services/_shared/embed_provider.py for details."""
|
|
67
|
+
return _embed_client().embed_batch(texts)
|
|
73
68
|
|
|
74
69
|
COLLECTION_NAME = "documents"
|
|
75
70
|
RRF_K = 60
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""Unit tests for engine/services/_shared/embed_provider.py.
|
|
2
|
+
|
|
3
|
+
Run with:
|
|
4
|
+
cd packages/memory-engine
|
|
5
|
+
python -m pytest tests/test_embed_provider.py -v
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import sys
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
# Make the engine/services tree importable for tests without packaging it.
|
|
14
|
+
ROOT = Path(__file__).parent.parent / "engine" / "services"
|
|
15
|
+
sys.path.insert(0, str(ROOT))
|
|
16
|
+
|
|
17
|
+
import json # noqa: E402
|
|
18
|
+
|
|
19
|
+
import httpx # noqa: E402
|
|
20
|
+
import pytest # noqa: E402
|
|
21
|
+
|
|
22
|
+
from _shared.embed_provider import ( # noqa: E402
|
|
23
|
+
PROVIDERS,
|
|
24
|
+
EmbedAuthError,
|
|
25
|
+
EmbedClient,
|
|
26
|
+
EmbedHTTPError,
|
|
27
|
+
EmbedProvider,
|
|
28
|
+
resolve_provider,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ----------------------------------------------------------------------
|
|
33
|
+
# Helpers — stub httpx so we can assert the request shape.
|
|
34
|
+
# ----------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
class _FakeResponse:
|
|
37
|
+
def __init__(self, status_code: int, payload: dict | str = ""):
|
|
38
|
+
self.status_code = status_code
|
|
39
|
+
if isinstance(payload, dict):
|
|
40
|
+
self._json = payload
|
|
41
|
+
self.text = json.dumps(payload)
|
|
42
|
+
else:
|
|
43
|
+
self._json = None
|
|
44
|
+
self.text = payload
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def is_success(self) -> bool:
|
|
48
|
+
return 200 <= self.status_code < 300
|
|
49
|
+
|
|
50
|
+
def json(self) -> dict:
|
|
51
|
+
if self._json is None:
|
|
52
|
+
raise ValueError("not json")
|
|
53
|
+
return self._json
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class _Recorder:
|
|
57
|
+
"""Records every httpx.post call and returns canned responses keyed by URL."""
|
|
58
|
+
|
|
59
|
+
def __init__(self):
|
|
60
|
+
self.calls: list[dict] = []
|
|
61
|
+
self.responses: dict[str, _FakeResponse] = {}
|
|
62
|
+
|
|
63
|
+
def respond(self, url: str, response: _FakeResponse) -> None:
|
|
64
|
+
self.responses[url] = response
|
|
65
|
+
|
|
66
|
+
def __call__(self, url, *, json, headers, timeout):
|
|
67
|
+
self.calls.append({"url": url, "json": json, "headers": headers, "timeout": timeout})
|
|
68
|
+
if url in self.responses:
|
|
69
|
+
return self.responses[url]
|
|
70
|
+
# default: 401 to flush out unmatched URLs
|
|
71
|
+
return _FakeResponse(401, "no stub for this url")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@pytest.fixture
|
|
75
|
+
def recorder(monkeypatch):
|
|
76
|
+
rec = _Recorder()
|
|
77
|
+
monkeypatch.setattr(httpx, "post", rec)
|
|
78
|
+
return rec
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# ----------------------------------------------------------------------
|
|
82
|
+
# Provider resolution
|
|
83
|
+
# ----------------------------------------------------------------------
|
|
84
|
+
|
|
85
|
+
def test_resolve_built_in_providers():
|
|
86
|
+
for name in ("openai", "pentatonic-gateway", "cohere"):
|
|
87
|
+
p = resolve_provider(name)
|
|
88
|
+
assert p.name == name
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_resolve_unknown_provider_raises():
|
|
92
|
+
with pytest.raises(ValueError):
|
|
93
|
+
resolve_provider("not-a-provider")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_resolve_custom_provider_from_env(monkeypatch):
|
|
97
|
+
monkeypatch.setenv("L4_EMBED_AUTH_HEADER", "X-Custom-Auth")
|
|
98
|
+
monkeypatch.setenv("L4_EMBED_AUTH_FORMAT", "Token {key}")
|
|
99
|
+
monkeypatch.setenv("L4_EMBED_PATH_DEFAULT", "/embed")
|
|
100
|
+
monkeypatch.setenv("L4_EMBED_BODY_SHAPE", "cohere")
|
|
101
|
+
monkeypatch.setenv("L4_EMBED_RESPONSE_SHAPE", "cohere")
|
|
102
|
+
p = resolve_provider("custom", env_prefix="L4_")
|
|
103
|
+
assert p.auth_header == "X-Custom-Auth"
|
|
104
|
+
assert p.auth_format == "Token {key}"
|
|
105
|
+
assert p.path_default == "/embed"
|
|
106
|
+
# body shape produces Cohere-style "texts" field
|
|
107
|
+
body = p.body_builder(["hi"], "model-x")
|
|
108
|
+
assert body == {"texts": ["hi"], "model": "model-x", "input_type": "search_document"}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# ----------------------------------------------------------------------
|
|
112
|
+
# Request shape
|
|
113
|
+
# ----------------------------------------------------------------------
|
|
114
|
+
|
|
115
|
+
def test_openai_provider_request_shape(recorder):
|
|
116
|
+
recorder.respond(
|
|
117
|
+
"https://gw/v1/embeddings",
|
|
118
|
+
_FakeResponse(200, {"data": [{"embedding": [0.1, 0.2]}]}),
|
|
119
|
+
)
|
|
120
|
+
client = EmbedClient(
|
|
121
|
+
url="https://gw/v1/embeddings",
|
|
122
|
+
api_key="k",
|
|
123
|
+
model="m",
|
|
124
|
+
provider=PROVIDERS["openai"],
|
|
125
|
+
)
|
|
126
|
+
out = client.embed_batch(["hello"])
|
|
127
|
+
assert out == [[0.1, 0.2]]
|
|
128
|
+
call = recorder.calls[0]
|
|
129
|
+
assert call["url"] == "https://gw/v1/embeddings"
|
|
130
|
+
assert call["json"] == {"input": ["hello"], "model": "m"}
|
|
131
|
+
assert call["headers"] == {"Authorization": "Bearer k"}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def test_pentatonic_provider_request_shape(recorder):
|
|
135
|
+
recorder.respond(
|
|
136
|
+
"https://lambda-gateway.pentatonic.com/v1/embed",
|
|
137
|
+
_FakeResponse(200, {"data": [{"embedding": [1.0, 2.0]}]}),
|
|
138
|
+
)
|
|
139
|
+
client = EmbedClient(
|
|
140
|
+
url="https://lambda-gateway.pentatonic.com/v1/embed",
|
|
141
|
+
api_key="secret",
|
|
142
|
+
model="nv-embed-v2",
|
|
143
|
+
provider=PROVIDERS["pentatonic-gateway"],
|
|
144
|
+
)
|
|
145
|
+
out = client.embed_batch(["t1"])
|
|
146
|
+
assert out == [[1.0, 2.0]]
|
|
147
|
+
call = recorder.calls[0]
|
|
148
|
+
assert call["url"] == "https://lambda-gateway.pentatonic.com/v1/embed"
|
|
149
|
+
assert call["json"] == {"input": ["t1"], "model": "nv-embed-v2"}
|
|
150
|
+
assert call["headers"] == {"X-API-Key": "secret"}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def test_pentatonic_response_parser_handles_both_shapes(recorder):
|
|
154
|
+
"""Pentatonic Gateway has historically returned both {"data":[...]} and
|
|
155
|
+
{"embeddings":[...]} on different endpoints. Parser accepts either."""
|
|
156
|
+
p = PROVIDERS["pentatonic-gateway"]
|
|
157
|
+
assert p.response_parser({"data": [{"embedding": [1.0]}]}) == [[1.0]]
|
|
158
|
+
assert p.response_parser({"embeddings": [[1.0]]}) == [[1.0]]
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def test_cohere_provider_request_shape(recorder):
|
|
162
|
+
recorder.respond(
|
|
163
|
+
"https://api.cohere.ai/v1/embed",
|
|
164
|
+
_FakeResponse(200, {"embeddings": [[3.0, 4.0]]}),
|
|
165
|
+
)
|
|
166
|
+
client = EmbedClient(
|
|
167
|
+
url="https://api.cohere.ai/v1/embed",
|
|
168
|
+
api_key="cohere-key",
|
|
169
|
+
model="embed-english-v3.0",
|
|
170
|
+
provider=PROVIDERS["cohere"],
|
|
171
|
+
)
|
|
172
|
+
out = client.embed_batch(["hi"])
|
|
173
|
+
assert out == [[3.0, 4.0]]
|
|
174
|
+
call = recorder.calls[0]
|
|
175
|
+
assert call["json"] == {
|
|
176
|
+
"texts": ["hi"],
|
|
177
|
+
"model": "embed-english-v3.0",
|
|
178
|
+
"input_type": "search_document",
|
|
179
|
+
}
|
|
180
|
+
assert call["headers"] == {"Authorization": "Bearer cohere-key"}
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# ----------------------------------------------------------------------
|
|
184
|
+
# Auto-detect
|
|
185
|
+
# ----------------------------------------------------------------------
|
|
186
|
+
|
|
187
|
+
def test_autodetect_on_401_falls_back_to_pentatonic(recorder):
|
|
188
|
+
"""Operator configured openai but the URL+key actually belong to
|
|
189
|
+
Pentatonic Gateway. First call 401s, auto-detect probes pentatonic
|
|
190
|
+
and succeeds."""
|
|
191
|
+
recorder.respond(
|
|
192
|
+
"https://lambda-gateway.pentatonic.com/v1/embeddings",
|
|
193
|
+
_FakeResponse(401, '{"error":"Invalid or missing API key"}'),
|
|
194
|
+
)
|
|
195
|
+
recorder.respond(
|
|
196
|
+
"https://lambda-gateway.pentatonic.com/v1/embed",
|
|
197
|
+
_FakeResponse(200, {"data": [{"embedding": [9.0]}]}),
|
|
198
|
+
)
|
|
199
|
+
client = EmbedClient(
|
|
200
|
+
url="https://lambda-gateway.pentatonic.com/v1/embeddings",
|
|
201
|
+
api_key="k",
|
|
202
|
+
model="nv-embed-v2",
|
|
203
|
+
provider=PROVIDERS["openai"],
|
|
204
|
+
)
|
|
205
|
+
out = client.embed_batch(["x"])
|
|
206
|
+
assert out == [[9.0]]
|
|
207
|
+
assert client.active_provider == "pentatonic-gateway"
|
|
208
|
+
# First call uses configured (openai) shape, second uses pentatonic
|
|
209
|
+
assert recorder.calls[0]["headers"] == {"Authorization": "Bearer k"}
|
|
210
|
+
assert recorder.calls[1]["headers"] == {"X-API-Key": "k"}
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def test_autodetect_caches_after_first_success(recorder):
|
|
214
|
+
"""Once auto-detect picks a winner, subsequent calls go straight to it
|
|
215
|
+
without retrying the original 401."""
|
|
216
|
+
recorder.respond(
|
|
217
|
+
"https://gw/v1/embeddings",
|
|
218
|
+
_FakeResponse(401, "wrong scheme"),
|
|
219
|
+
)
|
|
220
|
+
recorder.respond(
|
|
221
|
+
"https://gw/v1/embed",
|
|
222
|
+
_FakeResponse(200, {"data": [{"embedding": [1.0]}]}),
|
|
223
|
+
)
|
|
224
|
+
client = EmbedClient(
|
|
225
|
+
url="https://gw/v1/embeddings",
|
|
226
|
+
api_key="k",
|
|
227
|
+
model="m",
|
|
228
|
+
provider=PROVIDERS["openai"],
|
|
229
|
+
)
|
|
230
|
+
client.embed_batch(["a"]) # triggers detect
|
|
231
|
+
n_after_first = len(recorder.calls)
|
|
232
|
+
client.embed_batch(["b"]) # should go straight to /v1/embed
|
|
233
|
+
assert len(recorder.calls) == n_after_first + 1
|
|
234
|
+
assert recorder.calls[-1]["url"] == "https://gw/v1/embed"
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def test_autodetect_disabled_raises(recorder):
|
|
238
|
+
recorder.respond("https://gw/v1/embeddings", _FakeResponse(401, "no auth"))
|
|
239
|
+
client = EmbedClient(
|
|
240
|
+
url="https://gw/v1/embeddings",
|
|
241
|
+
api_key="k",
|
|
242
|
+
model="m",
|
|
243
|
+
provider=PROVIDERS["openai"],
|
|
244
|
+
autodetect=False,
|
|
245
|
+
)
|
|
246
|
+
with pytest.raises(EmbedAuthError):
|
|
247
|
+
client.embed_batch(["x"])
|
|
248
|
+
# Only one call: no probing happened.
|
|
249
|
+
assert len(recorder.calls) == 1
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def test_autodetect_all_fail_raises(recorder):
|
|
253
|
+
"""Every candidate also 401s — raise EmbedAuthError."""
|
|
254
|
+
recorder.respond("https://gw/v1/embeddings", _FakeResponse(401, "x"))
|
|
255
|
+
recorder.respond("https://gw/v1/embed", _FakeResponse(401, "x"))
|
|
256
|
+
client = EmbedClient(
|
|
257
|
+
url="https://gw/v1/embeddings",
|
|
258
|
+
api_key="k",
|
|
259
|
+
model="m",
|
|
260
|
+
provider=PROVIDERS["openai"],
|
|
261
|
+
)
|
|
262
|
+
with pytest.raises(EmbedAuthError):
|
|
263
|
+
client.embed_batch(["x"])
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
# ----------------------------------------------------------------------
|
|
267
|
+
# Error handling
|
|
268
|
+
# ----------------------------------------------------------------------
|
|
269
|
+
|
|
270
|
+
def test_non_401_http_error_does_not_trigger_autodetect(recorder):
|
|
271
|
+
recorder.respond(
|
|
272
|
+
"https://gw/v1/embeddings",
|
|
273
|
+
_FakeResponse(503, "upstream down"),
|
|
274
|
+
)
|
|
275
|
+
client = EmbedClient(
|
|
276
|
+
url="https://gw/v1/embeddings",
|
|
277
|
+
api_key="k",
|
|
278
|
+
model="m",
|
|
279
|
+
provider=PROVIDERS["openai"],
|
|
280
|
+
)
|
|
281
|
+
with pytest.raises(EmbedHTTPError) as exc:
|
|
282
|
+
client.embed_batch(["x"])
|
|
283
|
+
assert exc.value.status == 503
|
|
284
|
+
assert len(recorder.calls) == 1
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def test_empty_input_returns_empty(recorder):
|
|
288
|
+
client = EmbedClient(
|
|
289
|
+
url="https://gw/v1/embeddings",
|
|
290
|
+
api_key="k",
|
|
291
|
+
model="m",
|
|
292
|
+
provider=PROVIDERS["openai"],
|
|
293
|
+
)
|
|
294
|
+
assert client.embed_batch([]) == []
|
|
295
|
+
assert recorder.calls == []
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
# ----------------------------------------------------------------------
|
|
299
|
+
# from_env construction
|
|
300
|
+
# ----------------------------------------------------------------------
|
|
301
|
+
|
|
302
|
+
def test_from_env_reads_layer_prefix(monkeypatch, recorder):
|
|
303
|
+
monkeypatch.setenv("L4_NV_EMBED_URL", "https://lambda-gateway.pentatonic.com/v1/embed")
|
|
304
|
+
monkeypatch.setenv("L4_EMBED_API_KEY", "real-key")
|
|
305
|
+
monkeypatch.setenv("L4_EMBED_MODEL", "nv-embed-v2")
|
|
306
|
+
monkeypatch.setenv("L4_EMBED_PROVIDER", "pentatonic-gateway")
|
|
307
|
+
recorder.respond(
|
|
308
|
+
"https://lambda-gateway.pentatonic.com/v1/embed",
|
|
309
|
+
_FakeResponse(200, {"data": [{"embedding": [42.0]}]}),
|
|
310
|
+
)
|
|
311
|
+
client = EmbedClient.from_env(prefix="L4_")
|
|
312
|
+
out = client.embed_batch(["t"])
|
|
313
|
+
assert out == [[42.0]]
|
|
314
|
+
assert client.active_provider == "pentatonic-gateway"
|
|
315
|
+
assert recorder.calls[0]["headers"] == {"X-API-Key": "real-key"}
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def test_from_env_default_provider_is_openai(monkeypatch):
|
|
319
|
+
monkeypatch.setenv("L5_NV_EMBED_URL", "https://gw/v1/embeddings")
|
|
320
|
+
monkeypatch.setenv("L5_EMBED_API_KEY", "k")
|
|
321
|
+
client = EmbedClient.from_env(prefix="L5_")
|
|
322
|
+
assert client.active_provider == "openai"
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def test_from_env_autodetect_opt_out(monkeypatch, recorder):
|
|
326
|
+
monkeypatch.setenv("L4_NV_EMBED_URL", "https://gw/v1/embeddings")
|
|
327
|
+
monkeypatch.setenv("L4_EMBED_API_KEY", "k")
|
|
328
|
+
monkeypatch.setenv("L4_EMBED_AUTODETECT", "false")
|
|
329
|
+
recorder.respond("https://gw/v1/embeddings", _FakeResponse(401, "x"))
|
|
330
|
+
client = EmbedClient.from_env(prefix="L4_")
|
|
331
|
+
with pytest.raises(EmbedAuthError):
|
|
332
|
+
client.embed_batch(["x"])
|
|
333
|
+
assert len(recorder.calls) == 1
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
# ----------------------------------------------------------------------
|
|
337
|
+
# URL handling
|
|
338
|
+
# ----------------------------------------------------------------------
|
|
339
|
+
|
|
340
|
+
def test_url_without_path_gets_provider_default(recorder):
|
|
341
|
+
"""If operator provides only a base URL, the provider's path_default
|
|
342
|
+
is appended."""
|
|
343
|
+
recorder.respond(
|
|
344
|
+
"https://lambda-gateway.pentatonic.com/v1/embed",
|
|
345
|
+
_FakeResponse(200, {"data": [{"embedding": [0.0]}]}),
|
|
346
|
+
)
|
|
347
|
+
client = EmbedClient(
|
|
348
|
+
url="https://lambda-gateway.pentatonic.com",
|
|
349
|
+
api_key="k",
|
|
350
|
+
model="m",
|
|
351
|
+
provider=PROVIDERS["pentatonic-gateway"],
|
|
352
|
+
)
|
|
353
|
+
client.embed_batch(["x"])
|
|
354
|
+
assert recorder.calls[0]["url"] == "https://lambda-gateway.pentatonic.com/v1/embed"
|