@pentatonic-ai/ai-agent-sdk 0.9.0 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/package.json +1 -1
- package/packages/memory/src/__tests__/engine.test.js +124 -0
- package/packages/memory/src/engine.js +69 -0
- package/packages/memory-engine/engine/services/_shared/embed_provider.py +40 -3
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +17 -13
- package/packages/memory-engine/tests/test_embed_provider.py +138 -0
- package/packages/memory-engine/tests/test_people_list_reader.py +53 -0
package/dist/index.cjs
CHANGED
|
@@ -906,7 +906,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
906
906
|
}
|
|
907
907
|
|
|
908
908
|
// src/telemetry.js
|
|
909
|
-
var VERSION = "0.9.
|
|
909
|
+
var VERSION = "0.9.2";
|
|
910
910
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
911
911
|
function machineId() {
|
|
912
912
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/dist/index.js
CHANGED
|
@@ -875,7 +875,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
875
875
|
}
|
|
876
876
|
|
|
877
877
|
// src/telemetry.js
|
|
878
|
-
var VERSION = "0.9.
|
|
878
|
+
var VERSION = "0.9.2";
|
|
879
879
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
880
880
|
function machineId() {
|
|
881
881
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.2",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -2,6 +2,7 @@ import { describe, it, expect, beforeEach, jest } from "@jest/globals";
|
|
|
2
2
|
import {
|
|
3
3
|
fetchEngine,
|
|
4
4
|
engineStore,
|
|
5
|
+
engineStoreBatch,
|
|
5
6
|
engineSearch,
|
|
6
7
|
engineAggregate,
|
|
7
8
|
enginePeopleList,
|
|
@@ -170,6 +171,129 @@ describe("engine HTTP client", () => {
|
|
|
170
171
|
});
|
|
171
172
|
});
|
|
172
173
|
|
|
174
|
+
describe("engineStoreBatch", () => {
|
|
175
|
+
it("posts to /store-batch with arena fixed per-record", async () => {
|
|
176
|
+
mockOk({ status: "ok", inserted: 2, ids: ["a1", "a2"] });
|
|
177
|
+
await engineStoreBatch("https://e", {
|
|
178
|
+
clientId: "acme",
|
|
179
|
+
records: [
|
|
180
|
+
{ content: "first", metadata: { kind: "note" } },
|
|
181
|
+
{ content: "second", metadata: { kind: "doc" } },
|
|
182
|
+
],
|
|
183
|
+
layerType: "episodic",
|
|
184
|
+
actorUserId: "u-1",
|
|
185
|
+
});
|
|
186
|
+
const body = JSON.parse(calls[0].init.body);
|
|
187
|
+
expect(calls[0].url).toBe("https://e/store-batch");
|
|
188
|
+
expect(body).toEqual({
|
|
189
|
+
records: [
|
|
190
|
+
{
|
|
191
|
+
content: "first",
|
|
192
|
+
metadata: {
|
|
193
|
+
kind: "note",
|
|
194
|
+
arena: "acme",
|
|
195
|
+
layer_type: "episodic",
|
|
196
|
+
actor_user_id: "u-1",
|
|
197
|
+
},
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
content: "second",
|
|
201
|
+
metadata: {
|
|
202
|
+
kind: "doc",
|
|
203
|
+
arena: "acme",
|
|
204
|
+
layer_type: "episodic",
|
|
205
|
+
actor_user_id: "u-1",
|
|
206
|
+
},
|
|
207
|
+
},
|
|
208
|
+
],
|
|
209
|
+
});
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
it("user-scoped arena applies to every record when userId provided", async () => {
|
|
213
|
+
mockOk({ status: "ok", inserted: 3, ids: ["a", "b", "c"] });
|
|
214
|
+
await engineStoreBatch("https://e", {
|
|
215
|
+
clientId: "acme",
|
|
216
|
+
userId: "user-42",
|
|
217
|
+
records: [
|
|
218
|
+
{ content: "a" },
|
|
219
|
+
{ content: "b" },
|
|
220
|
+
{ content: "c" },
|
|
221
|
+
],
|
|
222
|
+
});
|
|
223
|
+
const body = JSON.parse(calls[0].init.body);
|
|
224
|
+
for (const r of body.records) {
|
|
225
|
+
expect(r.metadata.arena).toBe("acme:user-42");
|
|
226
|
+
}
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
it("passes explicit per-record id through unchanged", async () => {
|
|
230
|
+
mockOk({ status: "ok", inserted: 1, ids: ["custom-id"] });
|
|
231
|
+
await engineStoreBatch("https://e", {
|
|
232
|
+
clientId: "acme",
|
|
233
|
+
records: [{ id: "custom-id", content: "x" }],
|
|
234
|
+
});
|
|
235
|
+
const body = JSON.parse(calls[0].init.body);
|
|
236
|
+
expect(body.records[0].id).toBe("custom-id");
|
|
237
|
+
expect(body.records[0].content).toBe("x");
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
it("omits id when not provided so engine hashes the content", async () => {
|
|
241
|
+
mockOk({ status: "ok", inserted: 1, ids: ["server-hashed"] });
|
|
242
|
+
await engineStoreBatch("https://e", {
|
|
243
|
+
clientId: "acme",
|
|
244
|
+
records: [{ content: "x" }],
|
|
245
|
+
});
|
|
246
|
+
const body = JSON.parse(calls[0].init.body);
|
|
247
|
+
expect(body.records[0]).not.toHaveProperty("id");
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
it("does NOT let caller override arena via per-record metadata", async () => {
|
|
251
|
+
mockOk({ status: "ok", inserted: 1, ids: ["x"] });
|
|
252
|
+
await engineStoreBatch("https://e", {
|
|
253
|
+
clientId: "acme",
|
|
254
|
+
records: [
|
|
255
|
+
{ content: "x", metadata: { arena: "tenant-b" } },
|
|
256
|
+
],
|
|
257
|
+
});
|
|
258
|
+
const body = JSON.parse(calls[0].init.body);
|
|
259
|
+
expect(body.records[0].metadata.arena).toBe("acme");
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
it("returns early without HTTP call when records is empty", async () => {
|
|
263
|
+
mockOk({ status: "ok", inserted: 0, ids: [] });
|
|
264
|
+
const out = await engineStoreBatch("https://e", {
|
|
265
|
+
clientId: "acme",
|
|
266
|
+
records: [],
|
|
267
|
+
});
|
|
268
|
+
expect(out).toEqual({ status: "ok", inserted: 0, ids: [] });
|
|
269
|
+
expect(calls.length).toBe(0);
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
it("rejects missing clientId", async () => {
|
|
273
|
+
await expect(
|
|
274
|
+
engineStoreBatch("https://e", { records: [{ content: "x" }] })
|
|
275
|
+
).rejects.toThrow(/clientId/);
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
it("rejects non-array records", async () => {
|
|
279
|
+
await expect(
|
|
280
|
+
engineStoreBatch("https://e", { clientId: "a" })
|
|
281
|
+
).rejects.toThrow(/records/);
|
|
282
|
+
await expect(
|
|
283
|
+
engineStoreBatch("https://e", { clientId: "a", records: "oops" })
|
|
284
|
+
).rejects.toThrow(/records/);
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
it("rejects non-string content with the offending index", async () => {
|
|
288
|
+
await expect(
|
|
289
|
+
engineStoreBatch("https://e", {
|
|
290
|
+
clientId: "a",
|
|
291
|
+
records: [{ content: "ok" }, { content: 123 }],
|
|
292
|
+
})
|
|
293
|
+
).rejects.toThrow(/records\[1\]\.content/);
|
|
294
|
+
});
|
|
295
|
+
});
|
|
296
|
+
|
|
173
297
|
describe("engineSearch", () => {
|
|
174
298
|
it("tenant-only arenas list when no userId", async () => {
|
|
175
299
|
mockOk({ results: [] });
|
|
@@ -206,6 +206,75 @@ export async function engineStore(engineUrl, opts) {
|
|
|
206
206
|
return fetchEngine(engineUrl, "/store", body, { headers });
|
|
207
207
|
}
|
|
208
208
|
|
|
209
|
+
/**
|
|
210
|
+
* Batch-store many records in one engine call.
|
|
211
|
+
*
|
|
212
|
+
* One HTTP round-trip → engine issues one batched embed call covering
|
|
213
|
+
* every record (vs N round-trips + N single-text embeds for `engineStore`).
|
|
214
|
+
* Per `/store-batch` semantics, the response includes per-record ids
|
|
215
|
+
* plus the engine block with per-layer write counts.
|
|
216
|
+
*
|
|
217
|
+
* Arena composition matches `engineStore`: tenant-wide
|
|
218
|
+
* (`arena = clientId`) by default, user-scoped (`arena = clientId:userId`)
|
|
219
|
+
* when `userId` is supplied, overridable via `scope`. The shared arena
|
|
220
|
+
* is applied to **every** record; per-record `metadata` is preserved
|
|
221
|
+
* but cannot override the SDK-fixed arena (last-spread-wins ordering).
|
|
222
|
+
*
|
|
223
|
+
* Each record may carry an explicit `id` (stable dedup key) or omit
|
|
224
|
+
* it and let the engine hash the content. Per-record `metadata` is
|
|
225
|
+
* merged with the shared layerType / actorUserId before posting.
|
|
226
|
+
*
|
|
227
|
+
* @param {string} engineUrl
|
|
228
|
+
* @param {object} opts
|
|
229
|
+
* @param {string} opts.clientId
|
|
230
|
+
* @param {string} [opts.userId]
|
|
231
|
+
* @param {"tenant"|"user"} [opts.scope]
|
|
232
|
+
* @param {Array<{content: string, metadata?: object, id?: string}>} opts.records
|
|
233
|
+
* @param {string} [opts.layerType]
|
|
234
|
+
* @param {string} [opts.actorUserId]
|
|
235
|
+
* @param {Record<string,string>} [opts.headers]
|
|
236
|
+
* @returns {Promise<{status: string, inserted: number, ids: string[], engine?: object, duration_ms?: number}>}
|
|
237
|
+
*/
|
|
238
|
+
export async function engineStoreBatch(engineUrl, opts) {
|
|
239
|
+
const {
|
|
240
|
+
clientId,
|
|
241
|
+
userId,
|
|
242
|
+
scope,
|
|
243
|
+
records,
|
|
244
|
+
layerType,
|
|
245
|
+
actorUserId,
|
|
246
|
+
headers,
|
|
247
|
+
} = opts || {};
|
|
248
|
+
if (!clientId) throw new Error("engineStoreBatch: clientId required");
|
|
249
|
+
if (!Array.isArray(records)) {
|
|
250
|
+
throw new Error("engineStoreBatch: records[] required");
|
|
251
|
+
}
|
|
252
|
+
// Short-circuit empty input — no network round-trip, no error.
|
|
253
|
+
// Matches engineStore's tolerance for trivial inputs at upstream callers.
|
|
254
|
+
if (records.length === 0) {
|
|
255
|
+
return { status: "ok", inserted: 0, ids: [] };
|
|
256
|
+
}
|
|
257
|
+
const arena = composeArena(clientId, userId, scope);
|
|
258
|
+
const body = {
|
|
259
|
+
records: records.map((r, i) => {
|
|
260
|
+
if (typeof r?.content !== "string") {
|
|
261
|
+
throw new Error(`engineStoreBatch: records[${i}].content must be a string`);
|
|
262
|
+
}
|
|
263
|
+
return {
|
|
264
|
+
...(r.id ? { id: r.id } : {}),
|
|
265
|
+
content: r.content,
|
|
266
|
+
metadata: {
|
|
267
|
+
...(r.metadata || {}),
|
|
268
|
+
arena,
|
|
269
|
+
...(layerType ? { layer_type: layerType } : {}),
|
|
270
|
+
...(actorUserId !== undefined ? { actor_user_id: actorUserId } : {}),
|
|
271
|
+
},
|
|
272
|
+
};
|
|
273
|
+
}),
|
|
274
|
+
};
|
|
275
|
+
return fetchEngine(engineUrl, "/store-batch", body, { headers });
|
|
276
|
+
}
|
|
277
|
+
|
|
209
278
|
/**
|
|
210
279
|
* Search the engine, scoped to a tenant.
|
|
211
280
|
*
|
|
@@ -211,6 +211,7 @@ class EmbedClient:
|
|
|
211
211
|
autodetect: bool = True,
|
|
212
212
|
timeout: float = 120.0,
|
|
213
213
|
env_prefix: str = "",
|
|
214
|
+
max_batch: int = 5,
|
|
214
215
|
) -> None:
|
|
215
216
|
self._configured_provider = provider
|
|
216
217
|
self._provider = provider
|
|
@@ -222,6 +223,12 @@ class EmbedClient:
|
|
|
222
223
|
self._autodetect = autodetect
|
|
223
224
|
self._env_prefix = env_prefix
|
|
224
225
|
self._detected = False
|
|
226
|
+
# 0 = unlimited (no chunking). Positive = max texts per upstream call;
|
|
227
|
+
# larger inputs are split into multiple calls (concurrent in async path)
|
|
228
|
+
# and the results concatenated. Defaults to 5 because that's the per-call
|
|
229
|
+
# cap observed on Pentatonic AI Gateway — above which it 502s and the
|
|
230
|
+
# caller silently loses vector writes (see test_chunking_* tests).
|
|
231
|
+
self._max_batch = max(0, max_batch)
|
|
225
232
|
|
|
226
233
|
# ------------------------------------------------------------------
|
|
227
234
|
# Construction
|
|
@@ -248,6 +255,7 @@ class EmbedClient:
|
|
|
248
255
|
{prefix}EMBED_PROVIDER default 'openai'
|
|
249
256
|
{prefix}EMBED_AUTODETECT default 'true'
|
|
250
257
|
{prefix}EMBED_TIMEOUT default '120'
|
|
258
|
+
{prefix}EMBED_MAX_BATCH default '5' (gateway-safe; '0' disables chunking)
|
|
251
259
|
"""
|
|
252
260
|
url_var = url_var or f"{prefix}NV_EMBED_URL"
|
|
253
261
|
key_var = key_var or f"{prefix}EMBED_API_KEY"
|
|
@@ -259,6 +267,7 @@ class EmbedClient:
|
|
|
259
267
|
provider_name = os.environ.get(f"{prefix}EMBED_PROVIDER", "openai")
|
|
260
268
|
autodetect = os.environ.get(f"{prefix}EMBED_AUTODETECT", "true").lower() == "true"
|
|
261
269
|
timeout = float(os.environ.get(f"{prefix}EMBED_TIMEOUT", "120"))
|
|
270
|
+
max_batch = int(os.environ.get(f"{prefix}EMBED_MAX_BATCH", "5"))
|
|
262
271
|
|
|
263
272
|
provider = resolve_provider(provider_name, env_prefix=prefix)
|
|
264
273
|
return cls(
|
|
@@ -269,6 +278,7 @@ class EmbedClient:
|
|
|
269
278
|
autodetect=autodetect,
|
|
270
279
|
timeout=timeout,
|
|
271
280
|
env_prefix=prefix,
|
|
281
|
+
max_batch=max_batch,
|
|
272
282
|
)
|
|
273
283
|
|
|
274
284
|
# ------------------------------------------------------------------
|
|
@@ -307,10 +317,21 @@ class EmbedClient:
|
|
|
307
317
|
# ------------------------------------------------------------------
|
|
308
318
|
|
|
309
319
|
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
310
|
-
"""Embed a list of texts. Empty list returns empty list.
|
|
320
|
+
"""Embed a list of texts. Empty list returns empty list.
|
|
321
|
+
|
|
322
|
+
Splits into chunks of `max_batch` (default 5) and posts each
|
|
323
|
+
sequentially when the input exceeds the limit. Results are
|
|
324
|
+
concatenated in input order. `max_batch=0` disables chunking.
|
|
325
|
+
"""
|
|
311
326
|
if not texts:
|
|
312
327
|
return []
|
|
313
|
-
|
|
328
|
+
if self._max_batch == 0 or len(texts) <= self._max_batch:
|
|
329
|
+
return self._post_with_autodetect(texts, async_mode=False)
|
|
330
|
+
out: list[list[float]] = []
|
|
331
|
+
for start in range(0, len(texts), self._max_batch):
|
|
332
|
+
chunk = texts[start:start + self._max_batch]
|
|
333
|
+
out.extend(self._post_with_autodetect(chunk, async_mode=False))
|
|
334
|
+
return out
|
|
314
335
|
|
|
315
336
|
def embed_one(self, text: str) -> list[float]:
|
|
316
337
|
return self.embed_batch([text])[0]
|
|
@@ -320,9 +341,25 @@ class EmbedClient:
|
|
|
320
341
|
# ------------------------------------------------------------------
|
|
321
342
|
|
|
322
343
|
async def embed_batch_async(self, texts: list[str]) -> list[list[float]]:
|
|
344
|
+
"""Async embed. Chunks are fired concurrently via asyncio.gather
|
|
345
|
+
when the input exceeds `max_batch`; raises the first error if any
|
|
346
|
+
chunk fails (matching the un-chunked semantics)."""
|
|
323
347
|
if not texts:
|
|
324
348
|
return []
|
|
325
|
-
|
|
349
|
+
if self._max_batch == 0 or len(texts) <= self._max_batch:
|
|
350
|
+
return await self._post_with_autodetect_async(texts)
|
|
351
|
+
import asyncio
|
|
352
|
+
chunks = [
|
|
353
|
+
texts[start:start + self._max_batch]
|
|
354
|
+
for start in range(0, len(texts), self._max_batch)
|
|
355
|
+
]
|
|
356
|
+
results = await asyncio.gather(
|
|
357
|
+
*(self._post_with_autodetect_async(chunk) for chunk in chunks)
|
|
358
|
+
)
|
|
359
|
+
out: list[list[float]] = []
|
|
360
|
+
for r in results:
|
|
361
|
+
out.extend(r)
|
|
362
|
+
return out
|
|
326
363
|
|
|
327
364
|
async def embed_one_async(self, text: str) -> list[float]:
|
|
328
365
|
out = await self.embed_batch_async([text])
|
|
@@ -2362,22 +2362,20 @@ async def people_list_internal(
|
|
|
2362
2362
|
email_filter_clause = " AND s.person_email IN $emails"
|
|
2363
2363
|
params["emails"] = emails_filter
|
|
2364
2364
|
|
|
2365
|
-
search_clause = ""
|
|
2366
2365
|
if search_pattern:
|
|
2367
|
-
#
|
|
2368
|
-
#
|
|
2369
|
-
#
|
|
2370
|
-
#
|
|
2371
|
-
#
|
|
2372
|
-
#
|
|
2373
|
-
|
|
2366
|
+
# Defer the whole search filter until after the Person
|
|
2367
|
+
# join — person_name only exists after the OPTIONAL
|
|
2368
|
+
# MATCH below. Pre-filtering ChannelStats on email
|
|
2369
|
+
# alone (the earlier two-step approach) silently dropped
|
|
2370
|
+
# name-only matches, e.g. email=ag@x.io / name="Alex
|
|
2371
|
+
# Tong" with search="alex" — the early WHERE failed and
|
|
2372
|
+
# the post-join filter never saw the row.
|
|
2374
2373
|
params["search"] = search_pattern
|
|
2375
2374
|
|
|
2376
2375
|
cypher = (
|
|
2377
2376
|
"MATCH (s:ChannelStat)\n"
|
|
2378
2377
|
"WHERE s.arena IN $arenas"
|
|
2379
2378
|
+ email_filter_clause
|
|
2380
|
-
+ search_clause
|
|
2381
2379
|
+ "\n"
|
|
2382
2380
|
"WITH s.person_email AS person_email,\n"
|
|
2383
2381
|
" collect({channel: s.channel, count: s.count,\n"
|
|
@@ -2393,11 +2391,17 @@ async def people_list_internal(
|
|
|
2393
2391
|
"WITH person_email,\n"
|
|
2394
2392
|
" channels,\n"
|
|
2395
2393
|
" head(collect(DISTINCT p.name)) AS person_name\n"
|
|
2396
|
-
# Apply the
|
|
2397
|
-
#
|
|
2394
|
+
# Apply the search filter now that we have both the
|
|
2395
|
+
# joined name and the email available. `coalesce(name,
|
|
2396
|
+
# '')` keeps people without a Person node in the result
|
|
2397
|
+
# set when their email matches — they fall through the
|
|
2398
|
+
# name probe cleanly instead of bypassing the filter
|
|
2399
|
+
# via a `person_name IS NULL` short-circuit, which was
|
|
2400
|
+
# the prior bug (anyone without a Person node passed
|
|
2401
|
+
# search regardless of term).
|
|
2398
2402
|
+ (
|
|
2399
|
-
"WHERE (
|
|
2400
|
-
" OR toLower(person_email) CONTAINS $search
|
|
2403
|
+
"WHERE toLower(coalesce(person_name, '')) CONTAINS $search\n"
|
|
2404
|
+
" OR toLower(person_email) CONTAINS $search\n"
|
|
2401
2405
|
if search_pattern
|
|
2402
2406
|
else ""
|
|
2403
2407
|
)
|
|
@@ -352,3 +352,141 @@ def test_url_without_path_gets_provider_default(recorder):
|
|
|
352
352
|
)
|
|
353
353
|
client.embed_batch(["x"])
|
|
354
354
|
assert recorder.calls[0]["url"] == "https://lambda-gateway.pentatonic.com/v1/embed"
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
# ----------------------------------------------------------------------
|
|
358
|
+
# Chunking — work around the Pentatonic AI Gateway's per-call cap of 5
|
|
359
|
+
# texts. Above the cap the gateway 502s; without chunking the layer's
|
|
360
|
+
# /index-batch handler raises, the compat shim swallows it, and vector
|
|
361
|
+
# writes silently drop. Chunking splits the request into chunks of
|
|
362
|
+
# `max_batch` so each call stays within the gateway's limit.
|
|
363
|
+
# ----------------------------------------------------------------------
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
class _PentatonicEchoStub:
|
|
367
|
+
"""httpx.post replacement that returns one embedding per input text,
|
|
368
|
+
matching real gateway behaviour. Each response embedding encodes the
|
|
369
|
+
input index so tests can assert order preservation across chunks."""
|
|
370
|
+
|
|
371
|
+
def __init__(self):
|
|
372
|
+
self.calls: list[dict] = []
|
|
373
|
+
self._offset = 0 # running input-index counter across calls
|
|
374
|
+
|
|
375
|
+
def __call__(self, url, *, json, headers, timeout):
|
|
376
|
+
self.calls.append({"url": url, "json": json, "headers": headers, "timeout": timeout})
|
|
377
|
+
n = len(json.get("input") or [])
|
|
378
|
+
embs = [[float(self._offset + i)] for i in range(n)]
|
|
379
|
+
self._offset += n
|
|
380
|
+
return _FakeResponse(200, {"embeddings": embs})
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def test_chunking_below_max_batch_makes_one_call(monkeypatch):
|
|
384
|
+
"""N <= max_batch sends one request, no chunking overhead."""
|
|
385
|
+
stub = _PentatonicEchoStub()
|
|
386
|
+
monkeypatch.setattr(httpx, "post", stub)
|
|
387
|
+
client = EmbedClient(
|
|
388
|
+
url="https://lambda-gateway.pentatonic.com/v1/embed",
|
|
389
|
+
api_key="k", model="m",
|
|
390
|
+
provider=PROVIDERS["pentatonic-gateway"],
|
|
391
|
+
max_batch=5,
|
|
392
|
+
)
|
|
393
|
+
out = client.embed_batch([f"t{i}" for i in range(5)])
|
|
394
|
+
assert len(out) == 5
|
|
395
|
+
assert len(stub.calls) == 1
|
|
396
|
+
assert len(stub.calls[0]["json"]["input"]) == 5
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def test_chunking_above_max_batch_splits_into_calls(monkeypatch):
|
|
400
|
+
"""N > max_batch is split into len(N)/max_batch posts; results are
|
|
401
|
+
concatenated in input order so the caller can't tell."""
|
|
402
|
+
stub = _PentatonicEchoStub()
|
|
403
|
+
monkeypatch.setattr(httpx, "post", stub)
|
|
404
|
+
client = EmbedClient(
|
|
405
|
+
url="https://lambda-gateway.pentatonic.com/v1/embed",
|
|
406
|
+
api_key="k", model="m",
|
|
407
|
+
provider=PROVIDERS["pentatonic-gateway"],
|
|
408
|
+
max_batch=5,
|
|
409
|
+
)
|
|
410
|
+
out = client.embed_batch([f"t{i}" for i in range(12)])
|
|
411
|
+
# 12 texts → chunks of [5, 5, 2] → 3 calls
|
|
412
|
+
assert len(stub.calls) == 3
|
|
413
|
+
assert [len(c["json"]["input"]) for c in stub.calls] == [5, 5, 2]
|
|
414
|
+
# Stub returns one vector per input. Each vector encodes its
|
|
415
|
+
# cross-chunk input index → assert order preserved.
|
|
416
|
+
assert len(out) == 12
|
|
417
|
+
assert out == [[float(i)] for i in range(12)]
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def test_chunking_disabled_with_max_batch_zero(monkeypatch):
|
|
421
|
+
"""max_batch=0 means no chunking — old behaviour (one big call)."""
|
|
422
|
+
stub = _PentatonicEchoStub()
|
|
423
|
+
monkeypatch.setattr(httpx, "post", stub)
|
|
424
|
+
client = EmbedClient(
|
|
425
|
+
url="https://lambda-gateway.pentatonic.com/v1/embed",
|
|
426
|
+
api_key="k", model="m",
|
|
427
|
+
provider=PROVIDERS["pentatonic-gateway"],
|
|
428
|
+
max_batch=0,
|
|
429
|
+
)
|
|
430
|
+
client.embed_batch([f"t{i}" for i in range(20)])
|
|
431
|
+
assert len(stub.calls) == 1
|
|
432
|
+
assert len(stub.calls[0]["json"]["input"]) == 20
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def test_chunking_propagates_first_error(recorder):
|
|
436
|
+
"""If a chunk fails (e.g., gateway 502), the whole call raises with
|
|
437
|
+
the first error — matching the un-chunked semantics. We don't return
|
|
438
|
+
a partial vector list because the caller's downstream `for r, emb, txt
|
|
439
|
+
in zip(...)` loop would silently drop the failed records."""
|
|
440
|
+
# Pentatonic gateway 502 on every call (simulates the real bug)
|
|
441
|
+
recorder.respond(
|
|
442
|
+
"https://lambda-gateway.pentatonic.com/v1/embed",
|
|
443
|
+
_FakeResponse(502, "<html>...bad gateway...</html>"),
|
|
444
|
+
)
|
|
445
|
+
client = EmbedClient(
|
|
446
|
+
url="https://lambda-gateway.pentatonic.com/v1/embed",
|
|
447
|
+
api_key="k", model="m",
|
|
448
|
+
provider=PROVIDERS["pentatonic-gateway"],
|
|
449
|
+
max_batch=5,
|
|
450
|
+
)
|
|
451
|
+
with pytest.raises(EmbedHTTPError) as exc:
|
|
452
|
+
client.embed_batch([f"t{i}" for i in range(8)])
|
|
453
|
+
assert exc.value.status == 502
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
class _OpenAIEchoStub:
|
|
457
|
+
"""OpenAI-shaped stub: returns one embedding per input as
|
|
458
|
+
{data: [{embedding: [...]}]}."""
|
|
459
|
+
|
|
460
|
+
def __init__(self):
|
|
461
|
+
self.calls: list[dict] = []
|
|
462
|
+
|
|
463
|
+
def __call__(self, url, *, json, headers, timeout):
|
|
464
|
+
self.calls.append({"url": url, "json": json, "headers": headers, "timeout": timeout})
|
|
465
|
+
n = len(json.get("input") or [])
|
|
466
|
+
return _FakeResponse(200, {"data": [{"embedding": [0.0]} for _ in range(n)]})
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def test_from_env_reads_max_batch(monkeypatch):
|
|
470
|
+
"""{prefix}EMBED_MAX_BATCH overrides the default of 5."""
|
|
471
|
+
monkeypatch.setenv("L4_NV_EMBED_URL", "https://gw/v1/embeddings")
|
|
472
|
+
monkeypatch.setenv("L4_EMBED_API_KEY", "k")
|
|
473
|
+
monkeypatch.setenv("L4_EMBED_MAX_BATCH", "3")
|
|
474
|
+
stub = _OpenAIEchoStub()
|
|
475
|
+
monkeypatch.setattr(httpx, "post", stub)
|
|
476
|
+
client = EmbedClient.from_env(prefix="L4_")
|
|
477
|
+
client.embed_batch([f"t{i}" for i in range(7)])
|
|
478
|
+
# 7 with chunk=3 → [3, 3, 1] → 3 calls
|
|
479
|
+
assert len(stub.calls) == 3
|
|
480
|
+
assert [len(c["json"]["input"]) for c in stub.calls] == [3, 3, 1]
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def test_from_env_default_max_batch_is_five(monkeypatch):
|
|
484
|
+
"""Default max_batch=5 matches the observed Pentatonic Gateway cap."""
|
|
485
|
+
monkeypatch.setenv("L4_NV_EMBED_URL", "https://gw/v1/embeddings")
|
|
486
|
+
monkeypatch.setenv("L4_EMBED_API_KEY", "k")
|
|
487
|
+
stub = _OpenAIEchoStub()
|
|
488
|
+
monkeypatch.setattr(httpx, "post", stub)
|
|
489
|
+
client = EmbedClient.from_env(prefix="L4_")
|
|
490
|
+
client.embed_batch([f"t{i}" for i in range(10)])
|
|
491
|
+
# 10 with default chunk=5 → [5, 5] → 2 calls
|
|
492
|
+
assert len(stub.calls) == 2
|
|
@@ -301,6 +301,59 @@ def test_search_substring_matches_email_or_name(
|
|
|
301
301
|
}
|
|
302
302
|
|
|
303
303
|
|
|
304
|
+
@_skip_no_neo4j
|
|
305
|
+
def test_search_matches_name_when_email_does_not(
|
|
306
|
+
neo4j_driver, proxy_module
|
|
307
|
+
) -> None:
|
|
308
|
+
"""Regression: an early-WHERE on ``ChannelStat`` filtered rows by
|
|
309
|
+
email-only before the Person join, so a person whose NAME matched
|
|
310
|
+
the search term but whose EMAIL didn't was silently dropped. Fixed
|
|
311
|
+
by deferring the whole search filter until after the OPTIONAL MATCH
|
|
312
|
+
on Person. Sentinel case: email ``ag@x.io`` / name ``Alex Tong`` /
|
|
313
|
+
search ``alex`` — must match on name even though email has no
|
|
314
|
+
substring overlap."""
|
|
315
|
+
driver, (arena, _, _) = neo4j_driver
|
|
316
|
+
with driver.session() as session:
|
|
317
|
+
_ensure_indexes(session)
|
|
318
|
+
_write_stat(session, arena, "ag@x.io", "email", name="Alex Tong")
|
|
319
|
+
_write_stat(session, arena, "other@x.io", "email", name="Bea Chen")
|
|
320
|
+
|
|
321
|
+
out = _call_people_list(proxy_module, arenas=[arena], search="alex")
|
|
322
|
+
assert {i.person_email for i in out.items} == {"ag@x.io"}
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
@_skip_no_neo4j
|
|
326
|
+
def test_search_does_not_bypass_filter_when_person_node_missing(
|
|
327
|
+
neo4j_driver, proxy_module
|
|
328
|
+
) -> None:
|
|
329
|
+
"""Regression: the previous WHERE clause had a ``person_name IS
|
|
330
|
+
NULL`` short-circuit that bypassed the search filter for anyone
|
|
331
|
+
without a Person node — they matched any search term. Fixed by
|
|
332
|
+
using ``coalesce(person_name, '')`` so the name probe just fails
|
|
333
|
+
cleanly when no Person record exists, falling through to the email
|
|
334
|
+
probe."""
|
|
335
|
+
driver, (arena, _, _) = neo4j_driver
|
|
336
|
+
with driver.session() as session:
|
|
337
|
+
_ensure_indexes(session)
|
|
338
|
+
# Insert a ChannelStat WITHOUT a Person node — simulates a
|
|
339
|
+
# contact who's been emailed but never had a Person record
|
|
340
|
+
# materialised. Use a raw write so _write_stat's MERGE doesn't
|
|
341
|
+
# auto-create a Person.
|
|
342
|
+
session.run(
|
|
343
|
+
"MERGE (s:ChannelStat {arena: $arena, person_email: $email, channel: 'email'}) "
|
|
344
|
+
"SET s.count = 1, s.inbound = 1, s.outbound = 0, "
|
|
345
|
+
" s.last_seen = '2026-05-10T00:00:00Z', "
|
|
346
|
+
" s.first_seen = '2026-05-10T00:00:00Z'",
|
|
347
|
+
arena=arena, email="orphan@x.io",
|
|
348
|
+
)
|
|
349
|
+
_write_stat(session, arena, "alex@x.io", "email", name="Alex Tong")
|
|
350
|
+
|
|
351
|
+
# Search "alex" must NOT match orphan@x.io — neither name (missing)
|
|
352
|
+
# nor email contains "alex".
|
|
353
|
+
out = _call_people_list(proxy_module, arenas=[arena], search="alex")
|
|
354
|
+
assert {i.person_email for i in out.items} == {"alex@x.io"}
|
|
355
|
+
|
|
356
|
+
|
|
304
357
|
# ---------------------------------------------------------------------------
|
|
305
358
|
# Pagination.
|
|
306
359
|
# ---------------------------------------------------------------------------
|