@pentatonic-ai/ai-agent-sdk 0.7.13 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. package/package.json +1 -1
  2. package/packages/memory/openclaw-plugin/index.js +7 -0
  3. package/packages/memory/openclaw-plugin/openclaw.plugin.json +9 -1
  4. package/packages/memory/openclaw-plugin/package.json +1 -1
  5. package/packages/memory/src/__tests__/engine.test.js +142 -0
  6. package/packages/memory/src/engine.js +65 -0
  7. package/packages/memory-engine/compat/server.py +90 -5
  8. package/packages/memory-engine/docker-compose.yml +18 -8
  9. package/packages/memory-engine/engine/services/_shared/__init__.py +1 -0
  10. package/packages/memory-engine/engine/services/_shared/embed_provider.py +431 -0
  11. package/packages/memory-engine/engine/services/l2/Dockerfile +4 -2
  12. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +640 -81
  13. package/packages/memory-engine/engine/services/l4/Dockerfile +5 -1
  14. package/packages/memory-engine/engine/services/l4/server.py +19 -57
  15. package/packages/memory-engine/engine/services/l5/Dockerfile +3 -1
  16. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +24 -32
  17. package/packages/memory-engine/engine/services/l6/Dockerfile +3 -1
  18. package/packages/memory-engine/engine/services/l6/l6-document-store.py +24 -29
  19. package/packages/memory-engine/scripts/wipe-legacy-l3-entities.py +128 -0
  20. package/packages/memory-engine/tests/e2e_arena.sh +28 -4
  21. package/packages/memory-engine/tests/test_aggregate.py +333 -0
  22. package/packages/memory-engine/tests/test_arena_safety.py +232 -0
  23. package/packages/memory-engine/tests/test_channel_stat_reader.py +437 -0
  24. package/packages/memory-engine/tests/test_channel_stat_rollups.py +308 -0
  25. package/packages/memory-engine/tests/test_embed_provider.py +354 -0
  26. package/packages/memory-engine/tests/test_l3_arena_isolation.py +412 -0
@@ -0,0 +1,308 @@
1
+ """Integration tests for ChannelStat rollup writes + reader fast-path.
2
+
3
+ These exercise the actual Cypher running against a live Neo4j: the
4
+ writer block in /index-internal-batch maintains (:ChannelStat) nodes
5
+ on every store with contact_email metadata, and /aggregate-internal
6
+ reads from those nodes when present (falling back to the edge walk
7
+ when stats haven't been populated yet).
8
+
9
+ Gated on NEO4J_TEST_URI + NEO4J_TEST_PASSWORD; skip cleanly when
10
+ those env vars are absent so unit-only test runs stay fast.
11
+
12
+ Run:
13
+
14
+ cd packages/memory-engine
15
+ NEO4J_TEST_URI=bolt://localhost:17687 \\
16
+ NEO4J_TEST_PASSWORD=testpassword \\
17
+ .venv/bin/python -m pytest tests/test_channel_stat_rollups.py -v
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import os
22
+ import uuid
23
+
24
+ import pytest
25
+
26
+
27
+ _NEO4J_URI = os.environ.get("NEO4J_TEST_URI")
28
+ _NEO4J_USER = os.environ.get("NEO4J_TEST_USER", "neo4j")
29
+ _NEO4J_PASSWORD = os.environ.get("NEO4J_TEST_PASSWORD")
30
+
31
+ _skip_no_neo4j = pytest.mark.skipif(
32
+ not (_NEO4J_URI and _NEO4J_PASSWORD),
33
+ reason="set NEO4J_TEST_URI + NEO4J_TEST_PASSWORD to run integration tests",
34
+ )
35
+
36
+
37
+ @pytest.fixture
38
+ def neo4j_driver():
39
+ """Per-test driver + cleanup. Two arenas so isolation tests can
40
+ run side by side without trampling each other."""
41
+ from neo4j import GraphDatabase
42
+
43
+ driver = GraphDatabase.driver(_NEO4J_URI, auth=(_NEO4J_USER, _NEO4J_PASSWORD))
44
+ arenas = [f"stat_a_{uuid.uuid4().hex[:8]}", f"stat_b_{uuid.uuid4().hex[:8]}"]
45
+ yield driver, arenas
46
+ with driver.session() as session:
47
+ for arena in arenas:
48
+ # ChannelStat lives alongside Person/Chunk in the arena;
49
+ # the per-arena DETACH DELETE catches all of them.
50
+ session.run(
51
+ "MATCH (n) WHERE n.arena = $arena DETACH DELETE n",
52
+ arena=arena,
53
+ )
54
+ driver.close()
55
+
56
+
57
+ def _store_chunk_with_email(
58
+ session,
59
+ arena: str,
60
+ cid: str,
61
+ email: str,
62
+ channel: str,
63
+ direction: str,
64
+ occurred_at: str,
65
+ ) -> None:
66
+ """Mirror exactly the Cypher /index-internal-batch runs for the
67
+ email-keyed Person path, including the ChannelStat rollup. The
68
+ real call sites this is testing are inside that endpoint; this
69
+ helper lets the integration test exercise the same Cypher
70
+ without spinning up the FastAPI app."""
71
+ now_iso = "2026-05-10T00:00:00Z"
72
+ # Ensure the chunk exists (matches what /index-internal-batch
73
+ # does immediately before writing the Person edge).
74
+ session.run(
75
+ """
76
+ MERGE (c:Chunk {id: $cid})
77
+ SET c.text = 't', c.path = 'p', c.arena = $arena,
78
+ c.created_at = $now
79
+ """,
80
+ cid=cid, arena=arena, now=now_iso,
81
+ )
82
+ session.run(
83
+ """
84
+ MERGE (p:Entity:Person {arena: $arena, email: $email})
85
+ ON CREATE SET p.created_at = $now,
86
+ p.first_seen = $occurred_at,
87
+ p.last_seen = $occurred_at
88
+ ON MATCH SET p.last_seen = CASE
89
+ WHEN $occurred_at > coalesce(p.last_seen, '')
90
+ THEN $occurred_at
91
+ ELSE p.last_seen END
92
+ WITH p
93
+ MATCH (c:Chunk {arena: $arena, id: $cid})
94
+ MERGE (p)-[r:COMMUNICATED]->(c)
95
+ ON CREATE SET r.channel = $channel,
96
+ r.direction = $direction,
97
+ r.occurred_at = $occurred_at,
98
+ r.weight = 1.0,
99
+ r._counted = false
100
+ WITH p, r
101
+ FOREACH (_ IN CASE WHEN r._counted = false THEN [1] ELSE [] END |
102
+ MERGE (s:ChannelStat {arena: $arena, person_email: $email, channel: $channel})
103
+ ON CREATE SET s.count = 0,
104
+ s.inbound = 0,
105
+ s.outbound = 0,
106
+ s.first_seen = $occurred_at,
107
+ s.last_seen = $occurred_at,
108
+ s.created_at = $now
109
+ SET s.count = s.count + 1,
110
+ s.inbound = s.inbound + (CASE WHEN $direction = 'inbound' THEN 1 ELSE 0 END),
111
+ s.outbound = s.outbound + (CASE WHEN $direction = 'outbound' THEN 1 ELSE 0 END),
112
+ s.first_seen = CASE
113
+ WHEN $occurred_at < coalesce(s.first_seen, $occurred_at)
114
+ THEN $occurred_at
115
+ ELSE s.first_seen END,
116
+ s.last_seen = CASE
117
+ WHEN $occurred_at > coalesce(s.last_seen, '')
118
+ THEN $occurred_at
119
+ ELSE s.last_seen END,
120
+ s.updated_at = $now
121
+ MERGE (p)-[:HAS_STAT]->(s)
122
+ SET r._counted = true
123
+ )
124
+ """,
125
+ arena=arena, email=email, cid=cid,
126
+ channel=channel, direction=direction,
127
+ occurred_at=occurred_at, now=now_iso,
128
+ )
129
+
130
+
131
+ def _read_stats(session, arena: str, email: str) -> list[dict]:
132
+ return [
133
+ dict(rec)
134
+ for rec in session.run(
135
+ "MATCH (s:ChannelStat {arena: $arena, person_email: $email})\n"
136
+ "RETURN s.channel AS channel, s.count AS count,\n"
137
+ " s.inbound AS inbound, s.outbound AS outbound,\n"
138
+ " s.first_seen AS first_seen, s.last_seen AS last_seen\n"
139
+ "ORDER BY s.channel\n",
140
+ arena=arena, email=email,
141
+ )
142
+ ]
143
+
144
+
145
+ # ---------------------------------------------------------------------------
146
+ # Writer behaviour.
147
+ # ---------------------------------------------------------------------------
148
+
149
+
150
+ @_skip_no_neo4j
151
+ def test_first_store_creates_channelstat_with_count_one(neo4j_driver) -> None:
152
+ driver, (arena, _) = neo4j_driver
153
+ with driver.session() as session:
154
+ _store_chunk_with_email(
155
+ session, arena, cid="c1", email="alex@x.io",
156
+ channel="email", direction="inbound",
157
+ occurred_at="2026-05-09T10:00:00Z",
158
+ )
159
+ stats = _read_stats(session, arena, "alex@x.io")
160
+ assert len(stats) == 1
161
+ assert stats[0]["channel"] == "email"
162
+ assert stats[0]["count"] == 1
163
+ assert stats[0]["inbound"] == 1
164
+ assert stats[0]["outbound"] == 0
165
+ assert stats[0]["first_seen"] == "2026-05-09T10:00:00Z"
166
+ assert stats[0]["last_seen"] == "2026-05-09T10:00:00Z"
167
+
168
+
169
+ @_skip_no_neo4j
170
+ def test_repeated_stores_accumulate_counts_and_widen_time_bounds(neo4j_driver) -> None:
171
+ driver, (arena, _) = neo4j_driver
172
+ with driver.session() as session:
173
+ _store_chunk_with_email(
174
+ session, arena, cid="c1", email="alex@x.io",
175
+ channel="email", direction="inbound",
176
+ occurred_at="2026-05-08T09:00:00Z",
177
+ )
178
+ _store_chunk_with_email(
179
+ session, arena, cid="c2", email="alex@x.io",
180
+ channel="email", direction="outbound",
181
+ occurred_at="2026-05-09T10:00:00Z",
182
+ )
183
+ _store_chunk_with_email(
184
+ session, arena, cid="c3", email="alex@x.io",
185
+ channel="email", direction="inbound",
186
+ occurred_at="2026-05-07T08:00:00Z", # earliest — should drive first_seen
187
+ )
188
+ stats = _read_stats(session, arena, "alex@x.io")
189
+ assert len(stats) == 1
190
+ assert stats[0]["count"] == 3
191
+ assert stats[0]["inbound"] == 2
192
+ assert stats[0]["outbound"] == 1
193
+ assert stats[0]["first_seen"] == "2026-05-07T08:00:00Z"
194
+ assert stats[0]["last_seen"] == "2026-05-09T10:00:00Z"
195
+
196
+
197
+ @_skip_no_neo4j
198
+ def test_per_channel_buckets_kept_separate(neo4j_driver) -> None:
199
+ driver, (arena, _) = neo4j_driver
200
+ with driver.session() as session:
201
+ _store_chunk_with_email(
202
+ session, arena, "c1", "alex@x.io",
203
+ "email", "inbound", "2026-05-09T10:00:00Z",
204
+ )
205
+ _store_chunk_with_email(
206
+ session, arena, "c2", "alex@x.io",
207
+ "slack", "outbound", "2026-05-09T11:00:00Z",
208
+ )
209
+ _store_chunk_with_email(
210
+ session, arena, "c3", "alex@x.io",
211
+ "slack", "inbound", "2026-05-09T12:00:00Z",
212
+ )
213
+ stats = sorted(_read_stats(session, arena, "alex@x.io"), key=lambda s: s["channel"])
214
+ assert [s["channel"] for s in stats] == ["email", "slack"]
215
+ email = next(s for s in stats if s["channel"] == "email")
216
+ slack = next(s for s in stats if s["channel"] == "slack")
217
+ assert email["count"] == 1
218
+ assert slack["count"] == 2
219
+ assert slack["inbound"] == 1
220
+ assert slack["outbound"] == 1
221
+
222
+
223
+ @_skip_no_neo4j
224
+ def test_replay_of_same_chunk_does_not_double_count(neo4j_driver) -> None:
225
+ """Idempotency under retry: storing the same (chunk_id, email)
226
+ twice should not double the count. Guarded by the r._counted
227
+ flag set on first edge creation."""
228
+ driver, (arena, _) = neo4j_driver
229
+ with driver.session() as session:
230
+ for _ in range(3):
231
+ _store_chunk_with_email(
232
+ session, arena, "c1", "alex@x.io",
233
+ "email", "inbound", "2026-05-09T10:00:00Z",
234
+ )
235
+ stats = _read_stats(session, arena, "alex@x.io")
236
+ assert len(stats) == 1
237
+ # Three identical stores → still count == 1.
238
+ assert stats[0]["count"] == 1
239
+ assert stats[0]["inbound"] == 1
240
+
241
+
242
+ @_skip_no_neo4j
243
+ def test_arena_isolation_on_channelstat(neo4j_driver) -> None:
244
+ """A's stats never reflect B's writes, even when both arenas
245
+ have the same email — the multi-tenancy invariant that #28
246
+ landed for Person/Chunk extends to ChannelStat."""
247
+ driver, (arena_a, arena_b) = neo4j_driver
248
+ with driver.session() as session:
249
+ _store_chunk_with_email(
250
+ session, arena_a, "c1", "shared@example.com",
251
+ "email", "inbound", "2026-05-09T10:00:00Z",
252
+ )
253
+ _store_chunk_with_email(
254
+ session, arena_b, "c2", "shared@example.com",
255
+ "email", "inbound", "2026-05-09T11:00:00Z",
256
+ )
257
+ _store_chunk_with_email(
258
+ session, arena_b, "c3", "shared@example.com",
259
+ "slack", "outbound", "2026-05-09T12:00:00Z",
260
+ )
261
+ a_stats = _read_stats(session, arena_a, "shared@example.com")
262
+ b_stats = _read_stats(session, arena_b, "shared@example.com")
263
+ # A only saw the email message.
264
+ assert len(a_stats) == 1
265
+ assert a_stats[0]["channel"] == "email"
266
+ assert a_stats[0]["count"] == 1
267
+ # B saw both.
268
+ assert len(b_stats) == 2
269
+
270
+
271
+ @_skip_no_neo4j
272
+ def test_communicated_edge_has_counted_flag_after_write(neo4j_driver) -> None:
273
+ """The idempotency flag must be true after the first store so
274
+ re-runs of the writer (which use the same MERGE pattern) skip
275
+ the increment subquery."""
276
+ driver, (arena, _) = neo4j_driver
277
+ with driver.session() as session:
278
+ _store_chunk_with_email(
279
+ session, arena, "c1", "alex@x.io",
280
+ "email", "inbound", "2026-05-09T10:00:00Z",
281
+ )
282
+ rec = session.run(
283
+ "MATCH (:Person {arena: $arena, email: $email})-[r:COMMUNICATED]->(:Chunk {arena: $arena, id: $cid})\n"
284
+ "RETURN r._counted AS counted",
285
+ arena=arena, email="alex@x.io", cid="c1",
286
+ ).single()
287
+ assert rec["counted"] is True
288
+
289
+
290
+ @_skip_no_neo4j
291
+ def test_has_stat_edge_links_person_to_channelstat(neo4j_driver) -> None:
292
+ """The Person→ChannelStat traversal exists for ops queries
293
+ ('show me all stat rollups for this person') and for cascade
294
+ deletes when a tenant is offboarded."""
295
+ driver, (arena, _) = neo4j_driver
296
+ with driver.session() as session:
297
+ _store_chunk_with_email(
298
+ session, arena, "c1", "alex@x.io",
299
+ "email", "inbound", "2026-05-09T10:00:00Z",
300
+ )
301
+ rows = list(session.run(
302
+ "MATCH (p:Person {arena: $arena, email: $email})-[:HAS_STAT]->(s:ChannelStat {arena: $arena})\n"
303
+ "RETURN s.channel AS channel, s.count AS count\n",
304
+ arena=arena, email="alex@x.io",
305
+ ))
306
+ assert len(rows) == 1
307
+ assert rows[0]["channel"] == "email"
308
+ assert rows[0]["count"] == 1
@@ -0,0 +1,354 @@
1
+ """Unit tests for engine/services/_shared/embed_provider.py.
2
+
3
+ Run with:
4
+ cd packages/memory-engine
5
+ python -m pytest tests/test_embed_provider.py -v
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import sys
11
+ from pathlib import Path
12
+
13
+ # Make the engine/services tree importable for tests without packaging it.
14
+ ROOT = Path(__file__).parent.parent / "engine" / "services"
15
+ sys.path.insert(0, str(ROOT))
16
+
17
+ import json # noqa: E402
18
+
19
+ import httpx # noqa: E402
20
+ import pytest # noqa: E402
21
+
22
+ from _shared.embed_provider import ( # noqa: E402
23
+ PROVIDERS,
24
+ EmbedAuthError,
25
+ EmbedClient,
26
+ EmbedHTTPError,
27
+ EmbedProvider,
28
+ resolve_provider,
29
+ )
30
+
31
+
32
+ # ----------------------------------------------------------------------
33
+ # Helpers — stub httpx so we can assert the request shape.
34
+ # ----------------------------------------------------------------------
35
+
36
+ class _FakeResponse:
37
+ def __init__(self, status_code: int, payload: dict | str = ""):
38
+ self.status_code = status_code
39
+ if isinstance(payload, dict):
40
+ self._json = payload
41
+ self.text = json.dumps(payload)
42
+ else:
43
+ self._json = None
44
+ self.text = payload
45
+
46
+ @property
47
+ def is_success(self) -> bool:
48
+ return 200 <= self.status_code < 300
49
+
50
+ def json(self) -> dict:
51
+ if self._json is None:
52
+ raise ValueError("not json")
53
+ return self._json
54
+
55
+
56
+ class _Recorder:
57
+ """Records every httpx.post call and returns canned responses keyed by URL."""
58
+
59
+ def __init__(self):
60
+ self.calls: list[dict] = []
61
+ self.responses: dict[str, _FakeResponse] = {}
62
+
63
+ def respond(self, url: str, response: _FakeResponse) -> None:
64
+ self.responses[url] = response
65
+
66
+ def __call__(self, url, *, json, headers, timeout):
67
+ self.calls.append({"url": url, "json": json, "headers": headers, "timeout": timeout})
68
+ if url in self.responses:
69
+ return self.responses[url]
70
+ # default: 401 to flush out unmatched URLs
71
+ return _FakeResponse(401, "no stub for this url")
72
+
73
+
74
+ @pytest.fixture
75
+ def recorder(monkeypatch):
76
+ rec = _Recorder()
77
+ monkeypatch.setattr(httpx, "post", rec)
78
+ return rec
79
+
80
+
81
+ # ----------------------------------------------------------------------
82
+ # Provider resolution
83
+ # ----------------------------------------------------------------------
84
+
85
+ def test_resolve_built_in_providers():
86
+ for name in ("openai", "pentatonic-gateway", "cohere"):
87
+ p = resolve_provider(name)
88
+ assert p.name == name
89
+
90
+
91
+ def test_resolve_unknown_provider_raises():
92
+ with pytest.raises(ValueError):
93
+ resolve_provider("not-a-provider")
94
+
95
+
96
+ def test_resolve_custom_provider_from_env(monkeypatch):
97
+ monkeypatch.setenv("L4_EMBED_AUTH_HEADER", "X-Custom-Auth")
98
+ monkeypatch.setenv("L4_EMBED_AUTH_FORMAT", "Token {key}")
99
+ monkeypatch.setenv("L4_EMBED_PATH_DEFAULT", "/embed")
100
+ monkeypatch.setenv("L4_EMBED_BODY_SHAPE", "cohere")
101
+ monkeypatch.setenv("L4_EMBED_RESPONSE_SHAPE", "cohere")
102
+ p = resolve_provider("custom", env_prefix="L4_")
103
+ assert p.auth_header == "X-Custom-Auth"
104
+ assert p.auth_format == "Token {key}"
105
+ assert p.path_default == "/embed"
106
+ # body shape produces Cohere-style "texts" field
107
+ body = p.body_builder(["hi"], "model-x")
108
+ assert body == {"texts": ["hi"], "model": "model-x", "input_type": "search_document"}
109
+
110
+
111
+ # ----------------------------------------------------------------------
112
+ # Request shape
113
+ # ----------------------------------------------------------------------
114
+
115
+ def test_openai_provider_request_shape(recorder):
116
+ recorder.respond(
117
+ "https://gw/v1/embeddings",
118
+ _FakeResponse(200, {"data": [{"embedding": [0.1, 0.2]}]}),
119
+ )
120
+ client = EmbedClient(
121
+ url="https://gw/v1/embeddings",
122
+ api_key="k",
123
+ model="m",
124
+ provider=PROVIDERS["openai"],
125
+ )
126
+ out = client.embed_batch(["hello"])
127
+ assert out == [[0.1, 0.2]]
128
+ call = recorder.calls[0]
129
+ assert call["url"] == "https://gw/v1/embeddings"
130
+ assert call["json"] == {"input": ["hello"], "model": "m"}
131
+ assert call["headers"] == {"Authorization": "Bearer k"}
132
+
133
+
134
+ def test_pentatonic_provider_request_shape(recorder):
135
+ recorder.respond(
136
+ "https://lambda-gateway.pentatonic.com/v1/embed",
137
+ _FakeResponse(200, {"data": [{"embedding": [1.0, 2.0]}]}),
138
+ )
139
+ client = EmbedClient(
140
+ url="https://lambda-gateway.pentatonic.com/v1/embed",
141
+ api_key="secret",
142
+ model="nv-embed-v2",
143
+ provider=PROVIDERS["pentatonic-gateway"],
144
+ )
145
+ out = client.embed_batch(["t1"])
146
+ assert out == [[1.0, 2.0]]
147
+ call = recorder.calls[0]
148
+ assert call["url"] == "https://lambda-gateway.pentatonic.com/v1/embed"
149
+ assert call["json"] == {"input": ["t1"], "model": "nv-embed-v2"}
150
+ assert call["headers"] == {"X-API-Key": "secret"}
151
+
152
+
153
+ def test_pentatonic_response_parser_handles_both_shapes(recorder):
154
+ """Pentatonic Gateway has historically returned both {"data":[...]} and
155
+ {"embeddings":[...]} on different endpoints. Parser accepts either."""
156
+ p = PROVIDERS["pentatonic-gateway"]
157
+ assert p.response_parser({"data": [{"embedding": [1.0]}]}) == [[1.0]]
158
+ assert p.response_parser({"embeddings": [[1.0]]}) == [[1.0]]
159
+
160
+
161
+ def test_cohere_provider_request_shape(recorder):
162
+ recorder.respond(
163
+ "https://api.cohere.ai/v1/embed",
164
+ _FakeResponse(200, {"embeddings": [[3.0, 4.0]]}),
165
+ )
166
+ client = EmbedClient(
167
+ url="https://api.cohere.ai/v1/embed",
168
+ api_key="cohere-key",
169
+ model="embed-english-v3.0",
170
+ provider=PROVIDERS["cohere"],
171
+ )
172
+ out = client.embed_batch(["hi"])
173
+ assert out == [[3.0, 4.0]]
174
+ call = recorder.calls[0]
175
+ assert call["json"] == {
176
+ "texts": ["hi"],
177
+ "model": "embed-english-v3.0",
178
+ "input_type": "search_document",
179
+ }
180
+ assert call["headers"] == {"Authorization": "Bearer cohere-key"}
181
+
182
+
183
+ # ----------------------------------------------------------------------
184
+ # Auto-detect
185
+ # ----------------------------------------------------------------------
186
+
187
+ def test_autodetect_on_401_falls_back_to_pentatonic(recorder):
188
+ """Operator configured openai but the URL+key actually belong to
189
+ Pentatonic Gateway. First call 401s, auto-detect probes pentatonic
190
+ and succeeds."""
191
+ recorder.respond(
192
+ "https://lambda-gateway.pentatonic.com/v1/embeddings",
193
+ _FakeResponse(401, '{"error":"Invalid or missing API key"}'),
194
+ )
195
+ recorder.respond(
196
+ "https://lambda-gateway.pentatonic.com/v1/embed",
197
+ _FakeResponse(200, {"data": [{"embedding": [9.0]}]}),
198
+ )
199
+ client = EmbedClient(
200
+ url="https://lambda-gateway.pentatonic.com/v1/embeddings",
201
+ api_key="k",
202
+ model="nv-embed-v2",
203
+ provider=PROVIDERS["openai"],
204
+ )
205
+ out = client.embed_batch(["x"])
206
+ assert out == [[9.0]]
207
+ assert client.active_provider == "pentatonic-gateway"
208
+ # First call uses configured (openai) shape, second uses pentatonic
209
+ assert recorder.calls[0]["headers"] == {"Authorization": "Bearer k"}
210
+ assert recorder.calls[1]["headers"] == {"X-API-Key": "k"}
211
+
212
+
213
+ def test_autodetect_caches_after_first_success(recorder):
214
+ """Once auto-detect picks a winner, subsequent calls go straight to it
215
+ without retrying the original 401."""
216
+ recorder.respond(
217
+ "https://gw/v1/embeddings",
218
+ _FakeResponse(401, "wrong scheme"),
219
+ )
220
+ recorder.respond(
221
+ "https://gw/v1/embed",
222
+ _FakeResponse(200, {"data": [{"embedding": [1.0]}]}),
223
+ )
224
+ client = EmbedClient(
225
+ url="https://gw/v1/embeddings",
226
+ api_key="k",
227
+ model="m",
228
+ provider=PROVIDERS["openai"],
229
+ )
230
+ client.embed_batch(["a"]) # triggers detect
231
+ n_after_first = len(recorder.calls)
232
+ client.embed_batch(["b"]) # should go straight to /v1/embed
233
+ assert len(recorder.calls) == n_after_first + 1
234
+ assert recorder.calls[-1]["url"] == "https://gw/v1/embed"
235
+
236
+
237
+ def test_autodetect_disabled_raises(recorder):
238
+ recorder.respond("https://gw/v1/embeddings", _FakeResponse(401, "no auth"))
239
+ client = EmbedClient(
240
+ url="https://gw/v1/embeddings",
241
+ api_key="k",
242
+ model="m",
243
+ provider=PROVIDERS["openai"],
244
+ autodetect=False,
245
+ )
246
+ with pytest.raises(EmbedAuthError):
247
+ client.embed_batch(["x"])
248
+ # Only one call: no probing happened.
249
+ assert len(recorder.calls) == 1
250
+
251
+
252
+ def test_autodetect_all_fail_raises(recorder):
253
+ """Every candidate also 401s — raise EmbedAuthError."""
254
+ recorder.respond("https://gw/v1/embeddings", _FakeResponse(401, "x"))
255
+ recorder.respond("https://gw/v1/embed", _FakeResponse(401, "x"))
256
+ client = EmbedClient(
257
+ url="https://gw/v1/embeddings",
258
+ api_key="k",
259
+ model="m",
260
+ provider=PROVIDERS["openai"],
261
+ )
262
+ with pytest.raises(EmbedAuthError):
263
+ client.embed_batch(["x"])
264
+
265
+
266
+ # ----------------------------------------------------------------------
267
+ # Error handling
268
+ # ----------------------------------------------------------------------
269
+
270
+ def test_non_401_http_error_does_not_trigger_autodetect(recorder):
271
+ recorder.respond(
272
+ "https://gw/v1/embeddings",
273
+ _FakeResponse(503, "upstream down"),
274
+ )
275
+ client = EmbedClient(
276
+ url="https://gw/v1/embeddings",
277
+ api_key="k",
278
+ model="m",
279
+ provider=PROVIDERS["openai"],
280
+ )
281
+ with pytest.raises(EmbedHTTPError) as exc:
282
+ client.embed_batch(["x"])
283
+ assert exc.value.status == 503
284
+ assert len(recorder.calls) == 1
285
+
286
+
287
+ def test_empty_input_returns_empty(recorder):
288
+ client = EmbedClient(
289
+ url="https://gw/v1/embeddings",
290
+ api_key="k",
291
+ model="m",
292
+ provider=PROVIDERS["openai"],
293
+ )
294
+ assert client.embed_batch([]) == []
295
+ assert recorder.calls == []
296
+
297
+
298
+ # ----------------------------------------------------------------------
299
+ # from_env construction
300
+ # ----------------------------------------------------------------------
301
+
302
+ def test_from_env_reads_layer_prefix(monkeypatch, recorder):
303
+ monkeypatch.setenv("L4_NV_EMBED_URL", "https://lambda-gateway.pentatonic.com/v1/embed")
304
+ monkeypatch.setenv("L4_EMBED_API_KEY", "real-key")
305
+ monkeypatch.setenv("L4_EMBED_MODEL", "nv-embed-v2")
306
+ monkeypatch.setenv("L4_EMBED_PROVIDER", "pentatonic-gateway")
307
+ recorder.respond(
308
+ "https://lambda-gateway.pentatonic.com/v1/embed",
309
+ _FakeResponse(200, {"data": [{"embedding": [42.0]}]}),
310
+ )
311
+ client = EmbedClient.from_env(prefix="L4_")
312
+ out = client.embed_batch(["t"])
313
+ assert out == [[42.0]]
314
+ assert client.active_provider == "pentatonic-gateway"
315
+ assert recorder.calls[0]["headers"] == {"X-API-Key": "real-key"}
316
+
317
+
318
+ def test_from_env_default_provider_is_openai(monkeypatch):
319
+ monkeypatch.setenv("L5_NV_EMBED_URL", "https://gw/v1/embeddings")
320
+ monkeypatch.setenv("L5_EMBED_API_KEY", "k")
321
+ client = EmbedClient.from_env(prefix="L5_")
322
+ assert client.active_provider == "openai"
323
+
324
+
325
+ def test_from_env_autodetect_opt_out(monkeypatch, recorder):
326
+ monkeypatch.setenv("L4_NV_EMBED_URL", "https://gw/v1/embeddings")
327
+ monkeypatch.setenv("L4_EMBED_API_KEY", "k")
328
+ monkeypatch.setenv("L4_EMBED_AUTODETECT", "false")
329
+ recorder.respond("https://gw/v1/embeddings", _FakeResponse(401, "x"))
330
+ client = EmbedClient.from_env(prefix="L4_")
331
+ with pytest.raises(EmbedAuthError):
332
+ client.embed_batch(["x"])
333
+ assert len(recorder.calls) == 1
334
+
335
+
336
+ # ----------------------------------------------------------------------
337
+ # URL handling
338
+ # ----------------------------------------------------------------------
339
+
340
+ def test_url_without_path_gets_provider_default(recorder):
341
+ """If operator provides only a base URL, the provider's path_default
342
+ is appended."""
343
+ recorder.respond(
344
+ "https://lambda-gateway.pentatonic.com/v1/embed",
345
+ _FakeResponse(200, {"data": [{"embedding": [0.0]}]}),
346
+ )
347
+ client = EmbedClient(
348
+ url="https://lambda-gateway.pentatonic.com",
349
+ api_key="k",
350
+ model="m",
351
+ provider=PROVIDERS["pentatonic-gateway"],
352
+ )
353
+ client.embed_batch(["x"])
354
+ assert recorder.calls[0]["url"] == "https://lambda-gateway.pentatonic.com/v1/embed"