@pentatonic-ai/ai-agent-sdk 0.7.12 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/packages/memory/openclaw-plugin/__tests__/chat-turn.test.js +1 -1
- package/packages/memory/openclaw-plugin/index.js +1 -1
- package/packages/memory/openclaw-plugin/openclaw.plugin.json +1 -1
- package/packages/memory/openclaw-plugin/package.json +1 -1
- package/packages/memory/src/__tests__/hosted.test.js +2 -2
- package/packages/memory/src/hosted.js +1 -1
- package/packages/memory-engine/docker-compose.yml +18 -8
- package/packages/memory-engine/engine/services/_shared/__init__.py +1 -0
- package/packages/memory-engine/engine/services/_shared/embed_provider.py +431 -0
- package/packages/memory-engine/engine/services/l2/Dockerfile +4 -2
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +44 -23
- package/packages/memory-engine/engine/services/l4/Dockerfile +5 -1
- package/packages/memory-engine/engine/services/l4/server.py +19 -57
- package/packages/memory-engine/engine/services/l5/Dockerfile +3 -1
- package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +24 -32
- package/packages/memory-engine/engine/services/l6/Dockerfile +3 -1
- package/packages/memory-engine/engine/services/l6/l6-document-store.py +24 -29
- package/packages/memory-engine/tests/test_embed_provider.py +354 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.0",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -98,7 +98,7 @@ describe("openclaw-memory-plugin — hosted CHAT_TURN via afterTurn", () => {
|
|
|
98
98
|
});
|
|
99
99
|
|
|
100
100
|
const storeMemory = calls.filter(
|
|
101
|
-
(c) => c.body?.variables?.moduleId === "
|
|
101
|
+
(c) => c.body?.variables?.moduleId === "pentatonic-memory"
|
|
102
102
|
);
|
|
103
103
|
const chatTurn = calls.filter(
|
|
104
104
|
(c) => c.body?.variables?.moduleId === "conversation-analytics"
|
|
@@ -227,7 +227,7 @@ async function hostedStore(config, content, metadata = {}) {
|
|
|
227
227
|
createModuleEvent(moduleId: $moduleId, input: $input) { success eventId }
|
|
228
228
|
}`,
|
|
229
229
|
variables: {
|
|
230
|
-
moduleId: "
|
|
230
|
+
moduleId: "pentatonic-memory",
|
|
231
231
|
input: {
|
|
232
232
|
eventType: "STORE_MEMORY",
|
|
233
233
|
data: {
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "pentatonic-memory",
|
|
3
3
|
"name": "Pentatonic Memory",
|
|
4
4
|
"description": "Persistent, searchable memory with multi-signal retrieval and HyDE query expansion. Local (Docker + Ollama) or hosted (Pentatonic TES).",
|
|
5
|
-
"version": "0.
|
|
5
|
+
"version": "0.8.4",
|
|
6
6
|
"kind": "context-engine",
|
|
7
7
|
"configSchema": {
|
|
8
8
|
"type": "object",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/openclaw-memory-plugin",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.4",
|
|
4
4
|
"description": "Pentatonic Memory plugin for OpenClaw — persistent, searchable memory with multi-signal retrieval and HyDE query expansion",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -220,7 +220,7 @@ describe("hostedEmitChatTurn", () => {
|
|
|
220
220
|
// =============================================================================
|
|
221
221
|
|
|
222
222
|
describe("hostedStoreMemory", () => {
|
|
223
|
-
it("emits STORE_MEMORY against
|
|
223
|
+
it("emits STORE_MEMORY against pentatonic-memory", async () => {
|
|
224
224
|
stubFetch(() =>
|
|
225
225
|
new Response(
|
|
226
226
|
JSON.stringify({
|
|
@@ -236,7 +236,7 @@ describe("hostedStoreMemory", () => {
|
|
|
236
236
|
{ source: "my-app" }
|
|
237
237
|
);
|
|
238
238
|
expect(out.ok).toBe(true);
|
|
239
|
-
expect(lastCall.body.variables.moduleId).toBe("
|
|
239
|
+
expect(lastCall.body.variables.moduleId).toBe("pentatonic-memory");
|
|
240
240
|
expect(lastCall.body.variables.input.eventType).toBe("STORE_MEMORY");
|
|
241
241
|
expect(lastCall.body.variables.input.data.attributes.content).toBe(
|
|
242
242
|
"User owns a Subaru"
|
|
@@ -324,7 +324,7 @@ export async function hostedStoreMemory(
|
|
|
324
324
|
body: JSON.stringify({
|
|
325
325
|
query: CREATE_MODULE_EVENT_MUTATION,
|
|
326
326
|
variables: {
|
|
327
|
-
moduleId: "
|
|
327
|
+
moduleId: "pentatonic-memory",
|
|
328
328
|
input: { eventType: "STORE_MEMORY", data },
|
|
329
329
|
},
|
|
330
330
|
}),
|
|
@@ -88,8 +88,8 @@ services:
|
|
|
88
88
|
l4:
|
|
89
89
|
<<: *engine-base
|
|
90
90
|
build:
|
|
91
|
-
context: ./engine/services
|
|
92
|
-
dockerfile: Dockerfile
|
|
91
|
+
context: ./engine/services
|
|
92
|
+
dockerfile: l4/Dockerfile
|
|
93
93
|
container_name: pme-l4
|
|
94
94
|
# Default 18042 to avoid port collisions on 8042.
|
|
95
95
|
# Override via PME_L4_PORT for bench setups that intentionally replace it.
|
|
@@ -98,6 +98,8 @@ services:
|
|
|
98
98
|
L4_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
|
|
99
99
|
L4_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
|
|
100
100
|
L4_EMBED_API_KEY: ${EMBED_API_KEY:-}
|
|
101
|
+
L4_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
|
|
102
|
+
L4_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
|
|
101
103
|
L4_EMBED_DIM: ${EMBED_DIM:-4096}
|
|
102
104
|
L4_DB_PATH: /data/vec.db
|
|
103
105
|
extra_hosts:
|
|
@@ -116,8 +118,8 @@ services:
|
|
|
116
118
|
l5:
|
|
117
119
|
<<: *engine-base
|
|
118
120
|
build:
|
|
119
|
-
context: ./engine/services
|
|
120
|
-
dockerfile: Dockerfile
|
|
121
|
+
context: ./engine/services
|
|
122
|
+
dockerfile: l5/Dockerfile
|
|
121
123
|
container_name: pme-l5
|
|
122
124
|
# Default 18034 to avoid port collisions on 8034.
|
|
123
125
|
# Override via PME_L5_PORT for bench setups that intentionally replace it.
|
|
@@ -126,6 +128,8 @@ services:
|
|
|
126
128
|
L5_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
|
|
127
129
|
L5_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
|
|
128
130
|
L5_EMBED_API_KEY: ${EMBED_API_KEY:-}
|
|
131
|
+
L5_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
|
|
132
|
+
L5_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
|
|
129
133
|
L5_EMBED_DIM: ${EMBED_DIM:-4096}
|
|
130
134
|
L5_OLLAMA_DIM: ${OLLAMA_DIM:-768}
|
|
131
135
|
L5_OLLAMA_EMBED_URL: ${L5_OLLAMA_EMBED_URL:-http://host.docker.internal:11434/api/embed}
|
|
@@ -143,8 +147,8 @@ services:
|
|
|
143
147
|
l6:
|
|
144
148
|
<<: *engine-base
|
|
145
149
|
build:
|
|
146
|
-
context: ./engine/services
|
|
147
|
-
dockerfile: Dockerfile
|
|
150
|
+
context: ./engine/services
|
|
151
|
+
dockerfile: l6/Dockerfile
|
|
148
152
|
container_name: pme-l6
|
|
149
153
|
# Default 18037 to avoid colliding with Spark Core L6 doc-store on 8037.
|
|
150
154
|
# Override via PME_L6_PORT for bench setups that intentionally replace it.
|
|
@@ -153,6 +157,8 @@ services:
|
|
|
153
157
|
L6_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
|
|
154
158
|
L6_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
|
|
155
159
|
L6_EMBED_API_KEY: ${EMBED_API_KEY:-}
|
|
160
|
+
L6_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
|
|
161
|
+
L6_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
|
|
156
162
|
L6_EMBED_DIM: ${EMBED_DIM:-4096}
|
|
157
163
|
L6_DATA_DIR: /data
|
|
158
164
|
extra_hosts:
|
|
@@ -166,12 +172,16 @@ services:
|
|
|
166
172
|
l2:
|
|
167
173
|
<<: *engine-base
|
|
168
174
|
build:
|
|
169
|
-
context: ./engine/services
|
|
170
|
-
dockerfile: Dockerfile
|
|
175
|
+
context: ./engine/services
|
|
176
|
+
dockerfile: l2/Dockerfile
|
|
171
177
|
container_name: pme-l2
|
|
172
178
|
ports: ["127.0.0.1:${PME_L2_PORT:-8131}:8031"]
|
|
173
179
|
environment:
|
|
174
180
|
PME_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
|
|
181
|
+
PME_EMBED_API_KEY: ${EMBED_API_KEY:-}
|
|
182
|
+
PME_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
|
|
183
|
+
PME_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
|
|
184
|
+
PME_NV_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
|
|
175
185
|
PME_NEO4J_URI: bolt://l3:7687
|
|
176
186
|
PME_NEO4J_PASSWORD: ${NEO4J_PASSWORD:-local-dev-pw}
|
|
177
187
|
NEO4J_PASSWORD: ${NEO4J_PASSWORD:-local-dev-pw}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Shared utilities used across the memory-engine layer services."""
|
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
"""Embedding provider abstraction for memory-engine layer services.
|
|
2
|
+
|
|
3
|
+
Different embedding gateways disagree on three things:
|
|
4
|
+
1. Auth scheme (Authorization: Bearer ... vs X-API-Key: ...)
|
|
5
|
+
2. URL path (/v1/embeddings vs /v1/embed vs vendor-specific)
|
|
6
|
+
3. Request shape (OpenAI {"input": [...]} vs Cohere {"texts": [...], "input_type": ...})
|
|
7
|
+
4. Response shape ({"data": [{"embedding": ...}]} vs {"embeddings": [...]})
|
|
8
|
+
|
|
9
|
+
`EmbedProvider` captures all four for one gateway. Built-in profiles cover
|
|
10
|
+
OpenAI-compatible, Pentatonic AI Gateway, and Cohere. A "custom" profile lets
|
|
11
|
+
you override auth + path via env without code changes.
|
|
12
|
+
|
|
13
|
+
Auto-detect: when the configured provider returns 401, the client probes the
|
|
14
|
+
other built-in profiles once and caches the winner for the rest of the
|
|
15
|
+
process. Disabled with `EMBED_AUTODETECT=false` per layer.
|
|
16
|
+
|
|
17
|
+
Usage from a layer service:
|
|
18
|
+
|
|
19
|
+
from _shared.embed_provider import EmbedClient
|
|
20
|
+
|
|
21
|
+
embed = EmbedClient.from_env(prefix="L4_")
|
|
22
|
+
vectors = embed.embed_batch(["hello", "world"])
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import logging
|
|
28
|
+
import os
|
|
29
|
+
from dataclasses import dataclass
|
|
30
|
+
from typing import Any, Callable
|
|
31
|
+
from urllib.parse import urlparse, urlunparse
|
|
32
|
+
|
|
33
|
+
import httpx
|
|
34
|
+
|
|
35
|
+
log = logging.getLogger("embed_provider")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# ----------------------------------------------------------------------
|
|
39
|
+
# Body builders + response parsers
|
|
40
|
+
# ----------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
def _openai_body(texts: list[str], model: str) -> dict[str, Any]:
|
|
43
|
+
return {"input": texts, "model": model}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _openai_response(payload: dict[str, Any]) -> list[list[float]]:
|
|
47
|
+
return [d["embedding"] for d in payload["data"]]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _pentatonic_response(payload: dict[str, Any]) -> list[list[float]]:
|
|
51
|
+
"""Pentatonic AI Gateway returns {"embeddings": [...]} on /v1/embed
|
|
52
|
+
and OpenAI-shaped {"data": [{"embedding": ...}]} on /v1/embeddings.
|
|
53
|
+
Accept either."""
|
|
54
|
+
if "data" in payload:
|
|
55
|
+
return [d["embedding"] for d in payload["data"]]
|
|
56
|
+
return payload["embeddings"]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _cohere_body(texts: list[str], model: str) -> dict[str, Any]:
|
|
60
|
+
return {"texts": texts, "model": model, "input_type": "search_document"}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _cohere_response(payload: dict[str, Any]) -> list[list[float]]:
|
|
64
|
+
return payload["embeddings"]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
_BODY_BUILDERS: dict[str, Callable[[list[str], str], dict[str, Any]]] = {
|
|
68
|
+
"openai": _openai_body,
|
|
69
|
+
"cohere": _cohere_body,
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
_RESPONSE_PARSERS: dict[str, Callable[[dict[str, Any]], list[list[float]]]] = {
|
|
73
|
+
"openai": _openai_response,
|
|
74
|
+
"pentatonic": _pentatonic_response,
|
|
75
|
+
"cohere": _cohere_response,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# ----------------------------------------------------------------------
|
|
80
|
+
# Provider profiles
|
|
81
|
+
# ----------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
@dataclass(frozen=True)
|
|
84
|
+
class EmbedProvider:
|
|
85
|
+
name: str
|
|
86
|
+
auth_header: str
|
|
87
|
+
auth_format: str # f-string-ish; "{key}" placeholder substituted at request time
|
|
88
|
+
path_default: str
|
|
89
|
+
body_builder: Callable[[list[str], str], dict[str, Any]]
|
|
90
|
+
response_parser: Callable[[dict[str, Any]], list[list[float]]]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
PROVIDERS: dict[str, EmbedProvider] = {
|
|
94
|
+
"openai": EmbedProvider(
|
|
95
|
+
name="openai",
|
|
96
|
+
auth_header="Authorization",
|
|
97
|
+
auth_format="Bearer {key}",
|
|
98
|
+
path_default="/v1/embeddings",
|
|
99
|
+
body_builder=_openai_body,
|
|
100
|
+
response_parser=_openai_response,
|
|
101
|
+
),
|
|
102
|
+
"pentatonic-gateway": EmbedProvider(
|
|
103
|
+
name="pentatonic-gateway",
|
|
104
|
+
auth_header="X-API-Key",
|
|
105
|
+
auth_format="{key}",
|
|
106
|
+
path_default="/v1/embed",
|
|
107
|
+
body_builder=_openai_body,
|
|
108
|
+
response_parser=_pentatonic_response,
|
|
109
|
+
),
|
|
110
|
+
"cohere": EmbedProvider(
|
|
111
|
+
name="cohere",
|
|
112
|
+
auth_header="Authorization",
|
|
113
|
+
auth_format="Bearer {key}",
|
|
114
|
+
path_default="/v1/embed",
|
|
115
|
+
body_builder=_cohere_body,
|
|
116
|
+
response_parser=_cohere_response,
|
|
117
|
+
),
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _build_custom_provider(env_prefix: str) -> EmbedProvider:
|
|
122
|
+
"""Build a custom provider from env vars.
|
|
123
|
+
|
|
124
|
+
Env vars (env_prefix is e.g. 'L4_'):
|
|
125
|
+
{prefix}EMBED_AUTH_HEADER default Authorization
|
|
126
|
+
{prefix}EMBED_AUTH_FORMAT default Bearer {key}
|
|
127
|
+
{prefix}EMBED_PATH_DEFAULT default /v1/embeddings
|
|
128
|
+
{prefix}EMBED_BODY_SHAPE "openai" | "cohere" default openai
|
|
129
|
+
{prefix}EMBED_RESPONSE_SHAPE "openai" | "pentatonic" | "cohere" default openai
|
|
130
|
+
"""
|
|
131
|
+
body_shape = os.environ.get(f"{env_prefix}EMBED_BODY_SHAPE", "openai")
|
|
132
|
+
response_shape = os.environ.get(f"{env_prefix}EMBED_RESPONSE_SHAPE", "openai")
|
|
133
|
+
return EmbedProvider(
|
|
134
|
+
name="custom",
|
|
135
|
+
auth_header=os.environ.get(f"{env_prefix}EMBED_AUTH_HEADER", "Authorization"),
|
|
136
|
+
auth_format=os.environ.get(f"{env_prefix}EMBED_AUTH_FORMAT", "Bearer {key}"),
|
|
137
|
+
path_default=os.environ.get(f"{env_prefix}EMBED_PATH_DEFAULT", "/v1/embeddings"),
|
|
138
|
+
body_builder=_BODY_BUILDERS.get(body_shape, _openai_body),
|
|
139
|
+
response_parser=_RESPONSE_PARSERS.get(response_shape, _openai_response),
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def resolve_provider(name: str, env_prefix: str = "") -> EmbedProvider:
|
|
144
|
+
"""Look up a built-in provider, or build a custom one from env."""
|
|
145
|
+
if name in PROVIDERS:
|
|
146
|
+
return PROVIDERS[name]
|
|
147
|
+
if name == "custom":
|
|
148
|
+
return _build_custom_provider(env_prefix)
|
|
149
|
+
raise ValueError(
|
|
150
|
+
f"Unknown EMBED_PROVIDER {name!r}. "
|
|
151
|
+
f"Built-ins: {sorted(PROVIDERS)} or 'custom'."
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# ----------------------------------------------------------------------
|
|
156
|
+
# URL helpers
|
|
157
|
+
# ----------------------------------------------------------------------
|
|
158
|
+
|
|
159
|
+
def _swap_path(url: str, new_path: str) -> str:
|
|
160
|
+
"""Replace the path component of `url` with `new_path`. Empty path on the
|
|
161
|
+
input URL is treated as 'no path provided' and gets `new_path` appended."""
|
|
162
|
+
p = urlparse(url)
|
|
163
|
+
return urlunparse((p.scheme, p.netloc, new_path, "", p.query, p.fragment))
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _resolved_url(url: str, provider: EmbedProvider) -> str:
|
|
167
|
+
"""If `url` has no path, append the provider's default path; otherwise
|
|
168
|
+
leave as-is (operator chose a specific path)."""
|
|
169
|
+
p = urlparse(url)
|
|
170
|
+
if not p.path or p.path == "/":
|
|
171
|
+
return _swap_path(url, provider.path_default)
|
|
172
|
+
return url
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# ----------------------------------------------------------------------
|
|
176
|
+
# Client
|
|
177
|
+
# ----------------------------------------------------------------------
|
|
178
|
+
|
|
179
|
+
class EmbedAuthError(RuntimeError):
|
|
180
|
+
"""Raised when every candidate provider returns 401."""
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class EmbedHTTPError(RuntimeError):
|
|
184
|
+
"""Raised on non-401 HTTP errors from the embedding endpoint."""
|
|
185
|
+
|
|
186
|
+
def __init__(self, status: int, body: str):
|
|
187
|
+
super().__init__(f"HTTP {status}: {body[:200]}")
|
|
188
|
+
self.status = status
|
|
189
|
+
self.body = body
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class EmbedClient:
|
|
193
|
+
"""Sync + async embedding client with provider auto-detection.
|
|
194
|
+
|
|
195
|
+
Construct via `EmbedClient.from_env(prefix="L4_")` so each layer service
|
|
196
|
+
reads its own env-var namespace; or call the constructor directly for
|
|
197
|
+
tests.
|
|
198
|
+
|
|
199
|
+
The client is stateful: after a successful auto-detect, the winning
|
|
200
|
+
provider is cached for the rest of the process lifetime. Set
|
|
201
|
+
`autodetect=False` (or env `{prefix}EMBED_AUTODETECT=false`) to disable.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
def __init__(
|
|
205
|
+
self,
|
|
206
|
+
*,
|
|
207
|
+
url: str,
|
|
208
|
+
api_key: str,
|
|
209
|
+
model: str,
|
|
210
|
+
provider: EmbedProvider,
|
|
211
|
+
autodetect: bool = True,
|
|
212
|
+
timeout: float = 120.0,
|
|
213
|
+
env_prefix: str = "",
|
|
214
|
+
) -> None:
|
|
215
|
+
self._configured_provider = provider
|
|
216
|
+
self._provider = provider
|
|
217
|
+
self._configured_url = url
|
|
218
|
+
self._url = _resolved_url(url, provider)
|
|
219
|
+
self._api_key = api_key
|
|
220
|
+
self._model = model
|
|
221
|
+
self._timeout = timeout
|
|
222
|
+
self._autodetect = autodetect
|
|
223
|
+
self._env_prefix = env_prefix
|
|
224
|
+
self._detected = False
|
|
225
|
+
|
|
226
|
+
# ------------------------------------------------------------------
|
|
227
|
+
# Construction
|
|
228
|
+
# ------------------------------------------------------------------
|
|
229
|
+
|
|
230
|
+
@classmethod
|
|
231
|
+
def from_env(
|
|
232
|
+
cls,
|
|
233
|
+
prefix: str,
|
|
234
|
+
*,
|
|
235
|
+
url_var: str | None = None,
|
|
236
|
+
key_var: str | None = None,
|
|
237
|
+
model_var: str | None = None,
|
|
238
|
+
default_url: str = "",
|
|
239
|
+
default_model: str = "nv-embed-v2",
|
|
240
|
+
) -> "EmbedClient":
|
|
241
|
+
"""Build an EmbedClient from layer-prefixed env vars.
|
|
242
|
+
|
|
243
|
+
Layer services use `EmbedClient.from_env(prefix="L4_")` and the
|
|
244
|
+
client reads:
|
|
245
|
+
{prefix}NV_EMBED_URL (override with `url_var=...`)
|
|
246
|
+
{prefix}EMBED_API_KEY
|
|
247
|
+
{prefix}EMBED_MODEL
|
|
248
|
+
{prefix}EMBED_PROVIDER default 'openai'
|
|
249
|
+
{prefix}EMBED_AUTODETECT default 'true'
|
|
250
|
+
{prefix}EMBED_TIMEOUT default '120'
|
|
251
|
+
"""
|
|
252
|
+
url_var = url_var or f"{prefix}NV_EMBED_URL"
|
|
253
|
+
key_var = key_var or f"{prefix}EMBED_API_KEY"
|
|
254
|
+
model_var = model_var or f"{prefix}EMBED_MODEL"
|
|
255
|
+
|
|
256
|
+
url = os.environ.get(url_var, default_url)
|
|
257
|
+
api_key = os.environ.get(key_var, "")
|
|
258
|
+
model = os.environ.get(model_var, default_model)
|
|
259
|
+
provider_name = os.environ.get(f"{prefix}EMBED_PROVIDER", "openai")
|
|
260
|
+
autodetect = os.environ.get(f"{prefix}EMBED_AUTODETECT", "true").lower() == "true"
|
|
261
|
+
timeout = float(os.environ.get(f"{prefix}EMBED_TIMEOUT", "120"))
|
|
262
|
+
|
|
263
|
+
provider = resolve_provider(provider_name, env_prefix=prefix)
|
|
264
|
+
return cls(
|
|
265
|
+
url=url,
|
|
266
|
+
api_key=api_key,
|
|
267
|
+
model=model,
|
|
268
|
+
provider=provider,
|
|
269
|
+
autodetect=autodetect,
|
|
270
|
+
timeout=timeout,
|
|
271
|
+
env_prefix=prefix,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# ------------------------------------------------------------------
|
|
275
|
+
# Internals
|
|
276
|
+
# ------------------------------------------------------------------
|
|
277
|
+
|
|
278
|
+
def _headers(self, provider: EmbedProvider) -> dict[str, str]:
|
|
279
|
+
if not self._api_key:
|
|
280
|
+
return {}
|
|
281
|
+
return {provider.auth_header: provider.auth_format.format(key=self._api_key)}
|
|
282
|
+
|
|
283
|
+
def _candidate_url(self, provider: EmbedProvider) -> str:
|
|
284
|
+
"""URL to try for this provider. The configured provider keeps the
|
|
285
|
+
operator's chosen URL; auto-detect candidates swap in their own
|
|
286
|
+
path_default since the operator's path was wrong for them."""
|
|
287
|
+
if provider.name == self._configured_provider.name:
|
|
288
|
+
return self._url
|
|
289
|
+
return _swap_path(self._configured_url, provider.path_default)
|
|
290
|
+
|
|
291
|
+
def _switch_to(self, provider: EmbedProvider) -> None:
|
|
292
|
+
"""Make `provider` the active provider for future requests."""
|
|
293
|
+
log.warning(
|
|
294
|
+
"embed_provider auto-detect switched: configured=%s -> active=%s. "
|
|
295
|
+
"Set %sEMBED_PROVIDER=%s to silence this.",
|
|
296
|
+
self._configured_provider.name,
|
|
297
|
+
provider.name,
|
|
298
|
+
self._env_prefix,
|
|
299
|
+
provider.name,
|
|
300
|
+
)
|
|
301
|
+
self._provider = provider
|
|
302
|
+
self._url = self._candidate_url(provider)
|
|
303
|
+
self._detected = True
|
|
304
|
+
|
|
305
|
+
# ------------------------------------------------------------------
|
|
306
|
+
# Sync API
|
|
307
|
+
# ------------------------------------------------------------------
|
|
308
|
+
|
|
309
|
+
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
310
|
+
"""Embed a list of texts. Empty list returns empty list."""
|
|
311
|
+
if not texts:
|
|
312
|
+
return []
|
|
313
|
+
return self._post_with_autodetect(texts, async_mode=False)
|
|
314
|
+
|
|
315
|
+
def embed_one(self, text: str) -> list[float]:
|
|
316
|
+
return self.embed_batch([text])[0]
|
|
317
|
+
|
|
318
|
+
# ------------------------------------------------------------------
|
|
319
|
+
# Async API
|
|
320
|
+
# ------------------------------------------------------------------
|
|
321
|
+
|
|
322
|
+
async def embed_batch_async(self, texts: list[str]) -> list[list[float]]:
|
|
323
|
+
if not texts:
|
|
324
|
+
return []
|
|
325
|
+
return await self._post_with_autodetect_async(texts)
|
|
326
|
+
|
|
327
|
+
async def embed_one_async(self, text: str) -> list[float]:
|
|
328
|
+
out = await self.embed_batch_async([text])
|
|
329
|
+
return out[0]
|
|
330
|
+
|
|
331
|
+
# ------------------------------------------------------------------
|
|
332
|
+
# Request paths
|
|
333
|
+
# ------------------------------------------------------------------
|
|
334
|
+
|
|
335
|
+
def _post_with_autodetect(self, texts: list[str], *, async_mode: bool) -> list[list[float]]:
|
|
336
|
+
del async_mode # kept for symmetry; sync path is its own method
|
|
337
|
+
body = self._provider.body_builder(texts, self._model)
|
|
338
|
+
headers = self._headers(self._provider)
|
|
339
|
+
try:
|
|
340
|
+
r = httpx.post(self._url, json=body, headers=headers, timeout=self._timeout)
|
|
341
|
+
except httpx.HTTPError as exc:
|
|
342
|
+
raise EmbedHTTPError(0, str(exc)) from exc
|
|
343
|
+
|
|
344
|
+
if r.status_code == 401 and self._autodetect and not self._detected:
|
|
345
|
+
return self._autodetect_and_retry(texts, last_body=r.text)
|
|
346
|
+
|
|
347
|
+
if r.status_code == 401:
|
|
348
|
+
raise EmbedAuthError(r.text)
|
|
349
|
+
if not r.is_success:
|
|
350
|
+
raise EmbedHTTPError(r.status_code, r.text)
|
|
351
|
+
return self._provider.response_parser(r.json())
|
|
352
|
+
|
|
353
|
+
async def _post_with_autodetect_async(self, texts: list[str]) -> list[list[float]]:
|
|
354
|
+
body = self._provider.body_builder(texts, self._model)
|
|
355
|
+
headers = self._headers(self._provider)
|
|
356
|
+
try:
|
|
357
|
+
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
|
358
|
+
r = await client.post(self._url, json=body, headers=headers)
|
|
359
|
+
except httpx.HTTPError as exc:
|
|
360
|
+
raise EmbedHTTPError(0, str(exc)) from exc
|
|
361
|
+
|
|
362
|
+
if r.status_code == 401 and self._autodetect and not self._detected:
|
|
363
|
+
return await self._autodetect_and_retry_async(texts, last_body=r.text)
|
|
364
|
+
|
|
365
|
+
if r.status_code == 401:
|
|
366
|
+
raise EmbedAuthError(r.text)
|
|
367
|
+
if not r.is_success:
|
|
368
|
+
raise EmbedHTTPError(r.status_code, r.text)
|
|
369
|
+
return self._provider.response_parser(r.json())
|
|
370
|
+
|
|
371
|
+
# ------------------------------------------------------------------
|
|
372
|
+
# Auto-detect
|
|
373
|
+
# ------------------------------------------------------------------
|
|
374
|
+
|
|
375
|
+
def _candidates(self) -> list[EmbedProvider]:
|
|
376
|
+
return [p for p in PROVIDERS.values() if p.name != self._provider.name]
|
|
377
|
+
|
|
378
|
+
def _autodetect_and_retry(self, texts: list[str], *, last_body: str) -> list[list[float]]:
|
|
379
|
+
for candidate in self._candidates():
|
|
380
|
+
url = self._candidate_url(candidate)
|
|
381
|
+
body = candidate.body_builder(texts, self._model)
|
|
382
|
+
headers = (
|
|
383
|
+
{candidate.auth_header: candidate.auth_format.format(key=self._api_key)}
|
|
384
|
+
if self._api_key
|
|
385
|
+
else {}
|
|
386
|
+
)
|
|
387
|
+
try:
|
|
388
|
+
r = httpx.post(url, json=body, headers=headers, timeout=self._timeout)
|
|
389
|
+
except httpx.HTTPError:
|
|
390
|
+
continue
|
|
391
|
+
if r.is_success:
|
|
392
|
+
self._switch_to(candidate)
|
|
393
|
+
return candidate.response_parser(r.json())
|
|
394
|
+
raise EmbedAuthError(
|
|
395
|
+
f"all providers returned auth or transport errors. "
|
|
396
|
+
f"last 401 body: {last_body[:200]}"
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
async def _autodetect_and_retry_async(self, texts: list[str], *, last_body: str) -> list[list[float]]:
|
|
400
|
+
for candidate in self._candidates():
|
|
401
|
+
url = self._candidate_url(candidate)
|
|
402
|
+
body = candidate.body_builder(texts, self._model)
|
|
403
|
+
headers = (
|
|
404
|
+
{candidate.auth_header: candidate.auth_format.format(key=self._api_key)}
|
|
405
|
+
if self._api_key
|
|
406
|
+
else {}
|
|
407
|
+
)
|
|
408
|
+
try:
|
|
409
|
+
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
|
410
|
+
r = await client.post(url, json=body, headers=headers)
|
|
411
|
+
except httpx.HTTPError:
|
|
412
|
+
continue
|
|
413
|
+
if r.is_success:
|
|
414
|
+
self._switch_to(candidate)
|
|
415
|
+
return candidate.response_parser(r.json())
|
|
416
|
+
raise EmbedAuthError(
|
|
417
|
+
f"all providers returned auth or transport errors. "
|
|
418
|
+
f"last 401 body: {last_body[:200]}"
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# ------------------------------------------------------------------
|
|
422
|
+
# Introspection (handy for /health and tests)
|
|
423
|
+
# ------------------------------------------------------------------
|
|
424
|
+
|
|
425
|
+
@property
|
|
426
|
+
def active_provider(self) -> str:
|
|
427
|
+
return self._provider.name
|
|
428
|
+
|
|
429
|
+
@property
|
|
430
|
+
def active_url(self) -> str:
|
|
431
|
+
return self._url
|
|
@@ -15,8 +15,10 @@ RUN pip install --no-cache-dir \
|
|
|
15
15
|
"sentence-transformers" \
|
|
16
16
|
"torch" --extra-index-url https://download.pytorch.org/whl/cpu
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
COPY
|
|
18
|
+
# Shared embed_provider module (build context is engine/services).
|
|
19
|
+
COPY _shared /app/_shared
|
|
20
|
+
COPY l2/l2-hybridrag-proxy.py /app/server.py
|
|
21
|
+
COPY l2/init_databases.py /app/init_databases.py
|
|
20
22
|
|
|
21
23
|
# Pre-create empty L0 BM25 SQLite + empty workspace
|
|
22
24
|
RUN mkdir -p /data/workspace /data/.pentatonic/memory /data/.cache && \
|