@pentatonic-ai/ai-agent-sdk 0.9.1 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/package.json +1 -1
- package/packages/memory/src/__tests__/engine.test.js +124 -0
- package/packages/memory/src/engine.js +69 -0
- package/packages/memory-engine/compat/Dockerfile +12 -1
- package/packages/memory-engine/compat/server.py +116 -14
- package/packages/memory-engine/docker-compose.yml +16 -2
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +36 -14
- package/packages/memory-engine/engine/services/l4/server.py +11 -1
- package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +11 -5
- package/packages/memory-engine/engine/services/l6/l6-document-store.py +11 -5
- package/packages/memory-engine/tests/test_people_list_reader.py +53 -0
package/dist/index.cjs
CHANGED
|
@@ -906,7 +906,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
906
906
|
}
|
|
907
907
|
|
|
908
908
|
// src/telemetry.js
|
|
909
|
-
var VERSION = "0.9.
|
|
909
|
+
var VERSION = "0.9.3";
|
|
910
910
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
911
911
|
function machineId() {
|
|
912
912
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/dist/index.js
CHANGED
|
@@ -875,7 +875,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
875
875
|
}
|
|
876
876
|
|
|
877
877
|
// src/telemetry.js
|
|
878
|
-
var VERSION = "0.9.
|
|
878
|
+
var VERSION = "0.9.3";
|
|
879
879
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
880
880
|
function machineId() {
|
|
881
881
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.3",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -2,6 +2,7 @@ import { describe, it, expect, beforeEach, jest } from "@jest/globals";
|
|
|
2
2
|
import {
|
|
3
3
|
fetchEngine,
|
|
4
4
|
engineStore,
|
|
5
|
+
engineStoreBatch,
|
|
5
6
|
engineSearch,
|
|
6
7
|
engineAggregate,
|
|
7
8
|
enginePeopleList,
|
|
@@ -170,6 +171,129 @@ describe("engine HTTP client", () => {
|
|
|
170
171
|
});
|
|
171
172
|
});
|
|
172
173
|
|
|
174
|
+
describe("engineStoreBatch", () => {
|
|
175
|
+
it("posts to /store-batch with arena fixed per-record", async () => {
|
|
176
|
+
mockOk({ status: "ok", inserted: 2, ids: ["a1", "a2"] });
|
|
177
|
+
await engineStoreBatch("https://e", {
|
|
178
|
+
clientId: "acme",
|
|
179
|
+
records: [
|
|
180
|
+
{ content: "first", metadata: { kind: "note" } },
|
|
181
|
+
{ content: "second", metadata: { kind: "doc" } },
|
|
182
|
+
],
|
|
183
|
+
layerType: "episodic",
|
|
184
|
+
actorUserId: "u-1",
|
|
185
|
+
});
|
|
186
|
+
const body = JSON.parse(calls[0].init.body);
|
|
187
|
+
expect(calls[0].url).toBe("https://e/store-batch");
|
|
188
|
+
expect(body).toEqual({
|
|
189
|
+
records: [
|
|
190
|
+
{
|
|
191
|
+
content: "first",
|
|
192
|
+
metadata: {
|
|
193
|
+
kind: "note",
|
|
194
|
+
arena: "acme",
|
|
195
|
+
layer_type: "episodic",
|
|
196
|
+
actor_user_id: "u-1",
|
|
197
|
+
},
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
content: "second",
|
|
201
|
+
metadata: {
|
|
202
|
+
kind: "doc",
|
|
203
|
+
arena: "acme",
|
|
204
|
+
layer_type: "episodic",
|
|
205
|
+
actor_user_id: "u-1",
|
|
206
|
+
},
|
|
207
|
+
},
|
|
208
|
+
],
|
|
209
|
+
});
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
it("user-scoped arena applies to every record when userId provided", async () => {
|
|
213
|
+
mockOk({ status: "ok", inserted: 3, ids: ["a", "b", "c"] });
|
|
214
|
+
await engineStoreBatch("https://e", {
|
|
215
|
+
clientId: "acme",
|
|
216
|
+
userId: "user-42",
|
|
217
|
+
records: [
|
|
218
|
+
{ content: "a" },
|
|
219
|
+
{ content: "b" },
|
|
220
|
+
{ content: "c" },
|
|
221
|
+
],
|
|
222
|
+
});
|
|
223
|
+
const body = JSON.parse(calls[0].init.body);
|
|
224
|
+
for (const r of body.records) {
|
|
225
|
+
expect(r.metadata.arena).toBe("acme:user-42");
|
|
226
|
+
}
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
it("passes explicit per-record id through unchanged", async () => {
|
|
230
|
+
mockOk({ status: "ok", inserted: 1, ids: ["custom-id"] });
|
|
231
|
+
await engineStoreBatch("https://e", {
|
|
232
|
+
clientId: "acme",
|
|
233
|
+
records: [{ id: "custom-id", content: "x" }],
|
|
234
|
+
});
|
|
235
|
+
const body = JSON.parse(calls[0].init.body);
|
|
236
|
+
expect(body.records[0].id).toBe("custom-id");
|
|
237
|
+
expect(body.records[0].content).toBe("x");
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
it("omits id when not provided so engine hashes the content", async () => {
|
|
241
|
+
mockOk({ status: "ok", inserted: 1, ids: ["server-hashed"] });
|
|
242
|
+
await engineStoreBatch("https://e", {
|
|
243
|
+
clientId: "acme",
|
|
244
|
+
records: [{ content: "x" }],
|
|
245
|
+
});
|
|
246
|
+
const body = JSON.parse(calls[0].init.body);
|
|
247
|
+
expect(body.records[0]).not.toHaveProperty("id");
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
it("does NOT let caller override arena via per-record metadata", async () => {
|
|
251
|
+
mockOk({ status: "ok", inserted: 1, ids: ["x"] });
|
|
252
|
+
await engineStoreBatch("https://e", {
|
|
253
|
+
clientId: "acme",
|
|
254
|
+
records: [
|
|
255
|
+
{ content: "x", metadata: { arena: "tenant-b" } },
|
|
256
|
+
],
|
|
257
|
+
});
|
|
258
|
+
const body = JSON.parse(calls[0].init.body);
|
|
259
|
+
expect(body.records[0].metadata.arena).toBe("acme");
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
it("returns early without HTTP call when records is empty", async () => {
|
|
263
|
+
mockOk({ status: "ok", inserted: 0, ids: [] });
|
|
264
|
+
const out = await engineStoreBatch("https://e", {
|
|
265
|
+
clientId: "acme",
|
|
266
|
+
records: [],
|
|
267
|
+
});
|
|
268
|
+
expect(out).toEqual({ status: "ok", inserted: 0, ids: [] });
|
|
269
|
+
expect(calls.length).toBe(0);
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
it("rejects missing clientId", async () => {
|
|
273
|
+
await expect(
|
|
274
|
+
engineStoreBatch("https://e", { records: [{ content: "x" }] })
|
|
275
|
+
).rejects.toThrow(/clientId/);
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
it("rejects non-array records", async () => {
|
|
279
|
+
await expect(
|
|
280
|
+
engineStoreBatch("https://e", { clientId: "a" })
|
|
281
|
+
).rejects.toThrow(/records/);
|
|
282
|
+
await expect(
|
|
283
|
+
engineStoreBatch("https://e", { clientId: "a", records: "oops" })
|
|
284
|
+
).rejects.toThrow(/records/);
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
it("rejects non-string content with the offending index", async () => {
|
|
288
|
+
await expect(
|
|
289
|
+
engineStoreBatch("https://e", {
|
|
290
|
+
clientId: "a",
|
|
291
|
+
records: [{ content: "ok" }, { content: 123 }],
|
|
292
|
+
})
|
|
293
|
+
).rejects.toThrow(/records\[1\]\.content/);
|
|
294
|
+
});
|
|
295
|
+
});
|
|
296
|
+
|
|
173
297
|
describe("engineSearch", () => {
|
|
174
298
|
it("tenant-only arenas list when no userId", async () => {
|
|
175
299
|
mockOk({ results: [] });
|
|
@@ -206,6 +206,75 @@ export async function engineStore(engineUrl, opts) {
|
|
|
206
206
|
return fetchEngine(engineUrl, "/store", body, { headers });
|
|
207
207
|
}
|
|
208
208
|
|
|
209
|
+
/**
|
|
210
|
+
* Batch-store many records in one engine call.
|
|
211
|
+
*
|
|
212
|
+
* One HTTP round-trip → engine issues one batched embed call covering
|
|
213
|
+
* every record (vs N round-trips + N single-text embeds for `engineStore`).
|
|
214
|
+
* Per `/store-batch` semantics, the response includes per-record ids
|
|
215
|
+
* plus the engine block with per-layer write counts.
|
|
216
|
+
*
|
|
217
|
+
* Arena composition matches `engineStore`: tenant-wide
|
|
218
|
+
* (`arena = clientId`) by default, user-scoped (`arena = clientId:userId`)
|
|
219
|
+
* when `userId` is supplied, overridable via `scope`. The shared arena
|
|
220
|
+
* is applied to **every** record; per-record `metadata` is preserved
|
|
221
|
+
* but cannot override the SDK-fixed arena (last-spread-wins ordering).
|
|
222
|
+
*
|
|
223
|
+
* Each record may carry an explicit `id` (stable dedup key) or omit
|
|
224
|
+
* it and let the engine hash the content. Per-record `metadata` is
|
|
225
|
+
* merged with the shared layerType / actorUserId before posting.
|
|
226
|
+
*
|
|
227
|
+
* @param {string} engineUrl
|
|
228
|
+
* @param {object} opts
|
|
229
|
+
* @param {string} opts.clientId
|
|
230
|
+
* @param {string} [opts.userId]
|
|
231
|
+
* @param {"tenant"|"user"} [opts.scope]
|
|
232
|
+
* @param {Array<{content: string, metadata?: object, id?: string}>} opts.records
|
|
233
|
+
* @param {string} [opts.layerType]
|
|
234
|
+
* @param {string} [opts.actorUserId]
|
|
235
|
+
* @param {Record<string,string>} [opts.headers]
|
|
236
|
+
* @returns {Promise<{status: string, inserted: number, ids: string[], engine?: object, duration_ms?: number}>}
|
|
237
|
+
*/
|
|
238
|
+
export async function engineStoreBatch(engineUrl, opts) {
|
|
239
|
+
const {
|
|
240
|
+
clientId,
|
|
241
|
+
userId,
|
|
242
|
+
scope,
|
|
243
|
+
records,
|
|
244
|
+
layerType,
|
|
245
|
+
actorUserId,
|
|
246
|
+
headers,
|
|
247
|
+
} = opts || {};
|
|
248
|
+
if (!clientId) throw new Error("engineStoreBatch: clientId required");
|
|
249
|
+
if (!Array.isArray(records)) {
|
|
250
|
+
throw new Error("engineStoreBatch: records[] required");
|
|
251
|
+
}
|
|
252
|
+
// Short-circuit empty input — no network round-trip, no error.
|
|
253
|
+
// Matches engineStore's tolerance for trivial inputs at upstream callers.
|
|
254
|
+
if (records.length === 0) {
|
|
255
|
+
return { status: "ok", inserted: 0, ids: [] };
|
|
256
|
+
}
|
|
257
|
+
const arena = composeArena(clientId, userId, scope);
|
|
258
|
+
const body = {
|
|
259
|
+
records: records.map((r, i) => {
|
|
260
|
+
if (typeof r?.content !== "string") {
|
|
261
|
+
throw new Error(`engineStoreBatch: records[${i}].content must be a string`);
|
|
262
|
+
}
|
|
263
|
+
return {
|
|
264
|
+
...(r.id ? { id: r.id } : {}),
|
|
265
|
+
content: r.content,
|
|
266
|
+
metadata: {
|
|
267
|
+
...(r.metadata || {}),
|
|
268
|
+
arena,
|
|
269
|
+
...(layerType ? { layer_type: layerType } : {}),
|
|
270
|
+
...(actorUserId !== undefined ? { actor_user_id: actorUserId } : {}),
|
|
271
|
+
},
|
|
272
|
+
};
|
|
273
|
+
}),
|
|
274
|
+
};
|
|
275
|
+
return fetchEngine(engineUrl, "/store-batch", body, { headers });
|
|
276
|
+
}
|
|
277
|
+
|
|
209
278
|
/**
|
|
210
279
|
* Search the engine, scoped to a tenant.
|
|
211
280
|
*
|
|
@@ -4,7 +4,18 @@ WORKDIR /app
|
|
|
4
4
|
|
|
5
5
|
RUN pip install --no-cache-dir fastapi uvicorn[standard] httpx pydantic
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
# Build context is the memory-engine root (see docker-compose.yml). The
|
|
8
|
+
# shim's server.py side-loads engine/services/_shared/embed_provider.py
|
|
9
|
+
# for shared-embed mode on /store-batch (one embed call across all 4
|
|
10
|
+
# layer indexers vs 4 redundant calls).
|
|
11
|
+
COPY compat/server.py /app/server.py
|
|
12
|
+
# server.py's sys.path.insert resolves "../engine/services" relative to
|
|
13
|
+
# its own location (/app/server.py → /engine/services). Mirror that
|
|
14
|
+
# layout so the import works without changing the runtime code.
|
|
15
|
+
COPY engine/services/_shared /engine/services/_shared
|
|
16
|
+
# Make `_shared` an importable package (mirror the layer services'
|
|
17
|
+
# layout where __init__.py exists or python detects PEP 420 namespace).
|
|
18
|
+
RUN touch /engine/services/__init__.py
|
|
8
19
|
|
|
9
20
|
EXPOSE 8099
|
|
10
21
|
|
|
@@ -34,6 +34,7 @@ Environment:
|
|
|
34
34
|
|
|
35
35
|
import hashlib
|
|
36
36
|
import os
|
|
37
|
+
import sys
|
|
37
38
|
import time
|
|
38
39
|
from datetime import datetime, timezone
|
|
39
40
|
from typing import Any, Optional
|
|
@@ -42,6 +43,17 @@ import httpx
|
|
|
42
43
|
from fastapi import FastAPI, HTTPException
|
|
43
44
|
from pydantic import BaseModel, Field
|
|
44
45
|
|
|
46
|
+
# Reach into the engine/services tree so we can reuse EmbedClient. The
|
|
47
|
+
# tree isn't a real installed package; layer services and the compat
|
|
48
|
+
# shim both side-load it the same way. Keeps the chunking + auto-detect
|
|
49
|
+
# behaviour identical between the shim's pre-embed and the per-layer
|
|
50
|
+
# embeds that previously did the same work N times.
|
|
51
|
+
sys.path.insert(
|
|
52
|
+
0,
|
|
53
|
+
os.path.join(os.path.dirname(__file__), "..", "engine", "services"),
|
|
54
|
+
)
|
|
55
|
+
from _shared.embed_provider import EmbedClient # noqa: E402
|
|
56
|
+
|
|
45
57
|
# ----------------------------------------------------------------------
|
|
46
58
|
# Config
|
|
47
59
|
# ----------------------------------------------------------------------
|
|
@@ -63,6 +75,30 @@ NEO4J_DB = os.environ.get("NEO4J_DB", "neo4j")
|
|
|
63
75
|
|
|
64
76
|
PORT = int(os.environ.get("PORT", "8099"))
|
|
65
77
|
|
|
78
|
+
# Shared-embed mode. When on, /store-batch computes embeddings once at
|
|
79
|
+
# the shim level and forwards them to each layer's /index-batch so the
|
|
80
|
+
# layer skips its own embed call. Cuts gateway RPC count by ~4× (L4 +
|
|
81
|
+
# L5 + L6 + L2-internal all did the same embed work independently).
|
|
82
|
+
# Default ON because all layers in this engine use the same NV-Embed
|
|
83
|
+
# model; disable if you ever wire up per-layer differentiated embedders
|
|
84
|
+
# (e.g. cohere on L5, openai on L4).
|
|
85
|
+
SHARE_EMBEDDINGS = os.environ.get("PME_SHARE_EMBEDDINGS", "true").lower() == "true"
|
|
86
|
+
|
|
87
|
+
_embed_client: EmbedClient | None = None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _get_embed_client() -> EmbedClient:
|
|
91
|
+
"""Lazy-init the shim's EmbedClient using PME_-prefixed env vars
|
|
92
|
+
(matches L2's pattern). Cached for the process lifetime so the
|
|
93
|
+
auto-detect handshake only happens once."""
|
|
94
|
+
global _embed_client
|
|
95
|
+
if _embed_client is None:
|
|
96
|
+
_embed_client = EmbedClient.from_env(
|
|
97
|
+
prefix="PME_",
|
|
98
|
+
default_url=NV_EMBED_URL,
|
|
99
|
+
)
|
|
100
|
+
return _embed_client
|
|
101
|
+
|
|
66
102
|
|
|
67
103
|
# Layer types we surface as the SDK 4-layer projection. Engine stores
|
|
68
104
|
# everything as chunks tagged with arena + layer_type metadata; this
|
|
@@ -252,12 +288,23 @@ async def _embed_batch(texts: list[str]) -> list[list[float]]:
|
|
|
252
288
|
return [d["embedding"] for d in resp.json()["data"]]
|
|
253
289
|
|
|
254
290
|
|
|
255
|
-
async def _index_l4(
|
|
256
|
-
|
|
257
|
-
|
|
291
|
+
async def _index_l4(
|
|
292
|
+
records: list[dict[str, Any]],
|
|
293
|
+
embeddings: list[list[float]] | None = None,
|
|
294
|
+
) -> int:
|
|
295
|
+
"""Index records into the L4 sqlite-vec layer.
|
|
296
|
+
|
|
297
|
+
When `embeddings` is supplied (parallel to records), L4's /index-batch
|
|
298
|
+
skips its own embed call and uses ours — eliminates the redundant
|
|
299
|
+
embed work that previously cost ~850ms per drain alarm. When None,
|
|
300
|
+
L4 embeds itself (backwards-compatible path for older callers / tests
|
|
301
|
+
that don't share embeddings)."""
|
|
302
|
+
payload: dict[str, Any] = {"records": [
|
|
258
303
|
{"id": r.get("id") or hashlib.sha1(r["content"].encode()).hexdigest()[:32],
|
|
259
304
|
"text": r["content"]} for r in records
|
|
260
305
|
]}
|
|
306
|
+
if embeddings is not None:
|
|
307
|
+
payload["embeddings"] = embeddings
|
|
261
308
|
try:
|
|
262
309
|
resp = await _client().post(f"{L4_VEC_URL}/index-batch", json=payload, timeout=120.0)
|
|
263
310
|
resp.raise_for_status()
|
|
@@ -267,13 +314,20 @@ async def _index_l4(records: list[dict[str, Any]]) -> int:
|
|
|
267
314
|
return 0
|
|
268
315
|
|
|
269
316
|
|
|
270
|
-
async def _index_l5(
|
|
317
|
+
async def _index_l5(
|
|
318
|
+
records: list[dict[str, Any]],
|
|
319
|
+
arena: str = "general",
|
|
320
|
+
embeddings: list[list[float]] | None = None,
|
|
321
|
+
) -> int:
|
|
271
322
|
"""Index records into the L5 Milvus comms layer (chats collection).
|
|
272
323
|
|
|
273
324
|
arena is forwarded as a Milvus dynamic field so /search can filter
|
|
274
325
|
by arena natively (vs the shim's defence-in-depth post-filter).
|
|
326
|
+
|
|
327
|
+
When `embeddings` is supplied (parallel to records), L5 skips its
|
|
328
|
+
own embed call — see _index_l4 docstring for the dedup story.
|
|
275
329
|
"""
|
|
276
|
-
payload = {
|
|
330
|
+
payload: dict[str, Any] = {
|
|
277
331
|
"collection": "chats",
|
|
278
332
|
"records": [
|
|
279
333
|
{
|
|
@@ -287,6 +341,8 @@ async def _index_l5(records: list[dict[str, Any]], arena: str = "general") -> in
|
|
|
287
341
|
for r in records
|
|
288
342
|
],
|
|
289
343
|
}
|
|
344
|
+
if embeddings is not None:
|
|
345
|
+
payload["embeddings"] = embeddings
|
|
290
346
|
try:
|
|
291
347
|
resp = await _client().post(f"{L5_MILVUS_URL}/index-batch", json=payload, timeout=60.0)
|
|
292
348
|
resp.raise_for_status()
|
|
@@ -299,9 +355,17 @@ async def _index_l5(records: list[dict[str, Any]], arena: str = "general") -> in
|
|
|
299
355
|
return 0
|
|
300
356
|
|
|
301
357
|
|
|
302
|
-
async def _index_l6(
|
|
303
|
-
|
|
304
|
-
|
|
358
|
+
async def _index_l6(
|
|
359
|
+
records: list[dict[str, Any]],
|
|
360
|
+
arena: str = "general",
|
|
361
|
+
embeddings: list[list[float]] | None = None,
|
|
362
|
+
) -> int:
|
|
363
|
+
"""Index records into the L6 document store.
|
|
364
|
+
|
|
365
|
+
When `embeddings` is supplied (parallel to records), L6 skips its
|
|
366
|
+
own embed call — see _index_l4 docstring for the dedup story.
|
|
367
|
+
"""
|
|
368
|
+
payload: dict[str, Any] = {
|
|
305
369
|
"arena": arena,
|
|
306
370
|
"records": [
|
|
307
371
|
{
|
|
@@ -314,6 +378,8 @@ async def _index_l6(records: list[dict[str, Any]], arena: str = "general") -> in
|
|
|
314
378
|
for r in records
|
|
315
379
|
],
|
|
316
380
|
}
|
|
381
|
+
if embeddings is not None:
|
|
382
|
+
payload["embeddings"] = embeddings
|
|
317
383
|
try:
|
|
318
384
|
resp = await _client().post(f"{L6_DOC_URL}/index-batch", json=payload, timeout=120.0)
|
|
319
385
|
resp.raise_for_status()
|
|
@@ -323,14 +389,22 @@ async def _index_l6(records: list[dict[str, Any]], arena: str = "general") -> in
|
|
|
323
389
|
return 0
|
|
324
390
|
|
|
325
391
|
|
|
326
|
-
async def _index_l2_internal(
|
|
392
|
+
async def _index_l2_internal(
|
|
393
|
+
records: list[dict[str, Any]],
|
|
394
|
+
arena: str = "general",
|
|
395
|
+
embeddings: list[list[float]] | None = None,
|
|
396
|
+
) -> dict:
|
|
327
397
|
"""Populate L2's internal stores: L0 BM25 + L4 QMD vec + L3 Neo4j KG.
|
|
328
398
|
|
|
329
399
|
Without this, L2's RRF fusion runs over empty L0/L4-qmd/L3 layers and
|
|
330
400
|
those zero-result rank lists pollute the score. The L2 proxy exposes
|
|
331
401
|
/index-internal-batch which writes to all three in one round-trip.
|
|
402
|
+
|
|
403
|
+
When `embeddings` is supplied (parallel to records), L2's internal
|
|
404
|
+
embed call (used for L4-QMD population) is skipped — see _index_l4
|
|
405
|
+
docstring for the dedup story.
|
|
332
406
|
"""
|
|
333
|
-
payload = {
|
|
407
|
+
payload: dict[str, Any] = {
|
|
334
408
|
"arena": arena,
|
|
335
409
|
"records": [
|
|
336
410
|
{
|
|
@@ -341,6 +415,8 @@ async def _index_l2_internal(records: list[dict[str, Any]], arena: str = "genera
|
|
|
341
415
|
for r in records
|
|
342
416
|
],
|
|
343
417
|
}
|
|
418
|
+
if embeddings is not None:
|
|
419
|
+
payload["embeddings"] = embeddings
|
|
344
420
|
try:
|
|
345
421
|
resp = await _client().post(f"{L2_PROXY_URL}/index-internal-batch",
|
|
346
422
|
json=payload, timeout=180.0)
|
|
@@ -646,11 +722,37 @@ async def store_batch(req: StoreBatchRequest):
|
|
|
646
722
|
|
|
647
723
|
t0 = time.perf_counter()
|
|
648
724
|
import asyncio
|
|
725
|
+
|
|
726
|
+
# Shared-embed mode: compute embeddings ONCE here, pass them down to
|
|
727
|
+
# every layer so they skip their own embed call. Previously L4 + L5
|
|
728
|
+
# + L6 + L2-internal each re-embedded the same texts in parallel,
|
|
729
|
+
# which fanned 4× the gateway RPCs. The gateway throttles at K≈10
|
|
730
|
+
# concurrent requests, so 40-way fan-out serialised into ~4 rounds
|
|
731
|
+
# of ~850ms each = ~3.5s of pure embed time per /store-batch. With
|
|
732
|
+
# shared embeddings we issue one chunked embed pass (10 sub-calls
|
|
733
|
+
# for N=50 records) and skip the per-layer redundant work entirely.
|
|
734
|
+
# Disabled via PME_SHARE_EMBEDDINGS=false for operators wiring up
|
|
735
|
+
# per-layer differentiated embedders.
|
|
736
|
+
shared_embeddings: list[list[float]] | None = None
|
|
737
|
+
embed_ms = 0.0
|
|
738
|
+
if SHARE_EMBEDDINGS and normalised:
|
|
739
|
+
texts = [r["content"] for r in normalised]
|
|
740
|
+
embed_t0 = time.perf_counter()
|
|
741
|
+
try:
|
|
742
|
+
shared_embeddings = await _get_embed_client().embed_batch_async(texts)
|
|
743
|
+
except Exception as exc:
|
|
744
|
+
# Fall back to per-layer embedding rather than failing the
|
|
745
|
+
# whole batch. The layers' /index-batch still works when
|
|
746
|
+
# `embeddings` is absent.
|
|
747
|
+
print(f"[shim] shared embed failed, falling back to per-layer: {exc}")
|
|
748
|
+
shared_embeddings = None
|
|
749
|
+
embed_ms = (time.perf_counter() - embed_t0) * 1000.0
|
|
750
|
+
|
|
649
751
|
l4_count, l5_count, l6_count, l2_internal = await asyncio.gather(
|
|
650
|
-
_index_l4(normalised),
|
|
651
|
-
_index_l5(normalised, arena=req.arena or "general"),
|
|
652
|
-
_index_l6(normalised, arena=req.arena or "general"),
|
|
653
|
-
_index_l2_internal(normalised, arena=req.arena or "general"),
|
|
752
|
+
_index_l4(normalised, embeddings=shared_embeddings),
|
|
753
|
+
_index_l5(normalised, arena=req.arena or "general", embeddings=shared_embeddings),
|
|
754
|
+
_index_l6(normalised, arena=req.arena or "general", embeddings=shared_embeddings),
|
|
755
|
+
_index_l2_internal(normalised, arena=req.arena or "general", embeddings=shared_embeddings),
|
|
654
756
|
)
|
|
655
757
|
dur_ms = (time.perf_counter() - t0) * 1000.0
|
|
656
758
|
|
|
@@ -212,8 +212,11 @@ services:
|
|
|
212
212
|
compat:
|
|
213
213
|
<<: *engine-base
|
|
214
214
|
build:
|
|
215
|
-
context
|
|
216
|
-
|
|
215
|
+
# Build context is the memory-engine root so the Dockerfile can
|
|
216
|
+
# COPY both compat/server.py and engine/services/_shared (shared
|
|
217
|
+
# EmbedClient for /store-batch dedup).
|
|
218
|
+
context: .
|
|
219
|
+
dockerfile: compat/Dockerfile
|
|
217
220
|
container_name: pme-compat
|
|
218
221
|
ports:
|
|
219
222
|
- "127.0.0.1:${PME_PORT:-8099}:8099"
|
|
@@ -225,6 +228,17 @@ services:
|
|
|
225
228
|
L5_MILVUS_URL: http://l5:8034
|
|
226
229
|
L6_DOC_URL: http://l6:8037
|
|
227
230
|
NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
|
|
231
|
+
# PME_ prefix vars feed the shim's EmbedClient for shared-embed
|
|
232
|
+
# mode on /store-batch (one embed call across all 4 indexers vs
|
|
233
|
+
# 4 redundant calls). Match the L2 config block so both clients
|
|
234
|
+
# hit the same gateway with the same model. Set
|
|
235
|
+
# PME_SHARE_EMBEDDINGS=false to revert to per-layer embedding.
|
|
236
|
+
PME_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
|
|
237
|
+
PME_EMBED_API_KEY: ${EMBED_API_KEY:-}
|
|
238
|
+
PME_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
|
|
239
|
+
PME_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
|
|
240
|
+
PME_NV_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
|
|
241
|
+
PME_SHARE_EMBEDDINGS: ${PME_SHARE_EMBEDDINGS:-true}
|
|
228
242
|
BYPASS_L2_PROXY: ${BYPASS_L2_PROXY:-0}
|
|
229
243
|
extra_hosts:
|
|
230
244
|
- "host.docker.internal:host-gateway"
|
|
@@ -1496,6 +1496,12 @@ async def _embed_batch_local(texts: List[str]) -> List[List[float]]:
|
|
|
1496
1496
|
class IndexInternalBatchRequest(BaseModel):
|
|
1497
1497
|
records: List[Dict[str, Any]] # [{"id": str, "content": str, "metadata": dict}, ...]
|
|
1498
1498
|
arena: Optional[str] = "general"
|
|
1499
|
+
# When supplied (parallel to `records`), skip the L4-QMD embed call
|
|
1500
|
+
# and use these vectors directly. Compat shim populates this when
|
|
1501
|
+
# shared-embed mode is on so we don't duplicate embed work across
|
|
1502
|
+
# layers. Length must match records — defensive bail-out below if
|
|
1503
|
+
# it doesn't.
|
|
1504
|
+
embeddings: Optional[List[List[float]]] = None
|
|
1499
1505
|
|
|
1500
1506
|
|
|
1501
1507
|
@app.post("/index-internal-batch")
|
|
@@ -1575,7 +1581,19 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
|
|
|
1575
1581
|
# ---- L4 QMD vec (qmd.sqlite) ----------------------------------------
|
|
1576
1582
|
l4_inserted = 0
|
|
1577
1583
|
try:
|
|
1578
|
-
|
|
1584
|
+
# Shared-embed shortcut: if the compat shim handed us pre-computed
|
|
1585
|
+
# vectors that line up with our normalised records, use them and
|
|
1586
|
+
# skip our own embed RPC. Fall back to per-layer embedding when
|
|
1587
|
+
# the vectors are absent or the lengths don't match (defensive).
|
|
1588
|
+
shared_embs = req.embeddings
|
|
1589
|
+
if (
|
|
1590
|
+
shared_embs is not None
|
|
1591
|
+
and len(shared_embs) == len(records)
|
|
1592
|
+
and len(records) == len(norm)
|
|
1593
|
+
):
|
|
1594
|
+
embeddings = shared_embs
|
|
1595
|
+
else:
|
|
1596
|
+
embeddings = await _embed_batch_local([n["content"] for n in norm])
|
|
1579
1597
|
if len(embeddings) != len(norm):
|
|
1580
1598
|
log.warning(f"L4 embed count mismatch: {len(embeddings)} != {len(norm)}")
|
|
1581
1599
|
qmd_db = Path(QMD_DB_PATH)
|
|
@@ -2362,22 +2380,20 @@ async def people_list_internal(
|
|
|
2362
2380
|
email_filter_clause = " AND s.person_email IN $emails"
|
|
2363
2381
|
params["emails"] = emails_filter
|
|
2364
2382
|
|
|
2365
|
-
search_clause = ""
|
|
2366
2383
|
if search_pattern:
|
|
2367
|
-
#
|
|
2368
|
-
#
|
|
2369
|
-
#
|
|
2370
|
-
#
|
|
2371
|
-
#
|
|
2372
|
-
#
|
|
2373
|
-
|
|
2384
|
+
# Defer the whole search filter until after the Person
|
|
2385
|
+
# join — person_name only exists after the OPTIONAL
|
|
2386
|
+
# MATCH below. Pre-filtering ChannelStats on email
|
|
2387
|
+
# alone (the earlier two-step approach) silently dropped
|
|
2388
|
+
# name-only matches, e.g. email=ag@x.io / name="Alex
|
|
2389
|
+
# Tong" with search="alex" — the early WHERE failed and
|
|
2390
|
+
# the post-join filter never saw the row.
|
|
2374
2391
|
params["search"] = search_pattern
|
|
2375
2392
|
|
|
2376
2393
|
cypher = (
|
|
2377
2394
|
"MATCH (s:ChannelStat)\n"
|
|
2378
2395
|
"WHERE s.arena IN $arenas"
|
|
2379
2396
|
+ email_filter_clause
|
|
2380
|
-
+ search_clause
|
|
2381
2397
|
+ "\n"
|
|
2382
2398
|
"WITH s.person_email AS person_email,\n"
|
|
2383
2399
|
" collect({channel: s.channel, count: s.count,\n"
|
|
@@ -2393,11 +2409,17 @@ async def people_list_internal(
|
|
|
2393
2409
|
"WITH person_email,\n"
|
|
2394
2410
|
" channels,\n"
|
|
2395
2411
|
" head(collect(DISTINCT p.name)) AS person_name\n"
|
|
2396
|
-
# Apply the
|
|
2397
|
-
#
|
|
2412
|
+
# Apply the search filter now that we have both the
|
|
2413
|
+
# joined name and the email available. `coalesce(name,
|
|
2414
|
+
# '')` keeps people without a Person node in the result
|
|
2415
|
+
# set when their email matches — they fall through the
|
|
2416
|
+
# name probe cleanly instead of bypassing the filter
|
|
2417
|
+
# via a `person_name IS NULL` short-circuit, which was
|
|
2418
|
+
# the prior bug (anyone without a Person node passed
|
|
2419
|
+
# search regardless of term).
|
|
2398
2420
|
+ (
|
|
2399
|
-
"WHERE (
|
|
2400
|
-
" OR toLower(person_email) CONTAINS $search
|
|
2421
|
+
"WHERE toLower(coalesce(person_name, '')) CONTAINS $search\n"
|
|
2422
|
+
" OR toLower(person_email) CONTAINS $search\n"
|
|
2401
2423
|
if search_pattern
|
|
2402
2424
|
else ""
|
|
2403
2425
|
)
|
|
@@ -124,6 +124,10 @@ class SearchRequest(BaseModel):
|
|
|
124
124
|
|
|
125
125
|
class IndexBatchRequest(BaseModel):
|
|
126
126
|
records: list[dict[str, Any]]
|
|
127
|
+
# When supplied (parallel to `records`), skip the embed call and use
|
|
128
|
+
# these vectors directly. Compat shim populates this when shared-embed
|
|
129
|
+
# mode is on so we don't duplicate the embed work across layers.
|
|
130
|
+
embeddings: list[list[float]] | None = None
|
|
127
131
|
|
|
128
132
|
|
|
129
133
|
app = FastAPI(title="L4 sqlite-vec sidecar (OSS)")
|
|
@@ -187,7 +191,13 @@ async def index_batch(req: IndexBatchRequest):
|
|
|
187
191
|
return {"status": "ok", "inserted": 0}
|
|
188
192
|
texts = [(r.get("text") or r.get("content") or "")[:8192] for r in req.records]
|
|
189
193
|
t0 = time.perf_counter()
|
|
190
|
-
|
|
194
|
+
# Shared-embed shortcut: caller (compat shim) computed vectors once
|
|
195
|
+
# and forwards them so we skip the embed RPC. Length must match
|
|
196
|
+
# records — defensive bail if it doesn't.
|
|
197
|
+
if req.embeddings is not None and len(req.embeddings) == len(req.records):
|
|
198
|
+
embs = req.embeddings
|
|
199
|
+
else:
|
|
200
|
+
embs = await _embed_batch(texts)
|
|
191
201
|
embed_ms = (time.perf_counter() - t0) * 1000.0
|
|
192
202
|
|
|
193
203
|
conn = _get_db()
|
|
@@ -629,13 +629,19 @@ def serve(port=8034):
|
|
|
629
629
|
client = get_client()
|
|
630
630
|
ensure_collection(client, collection)
|
|
631
631
|
|
|
632
|
-
#
|
|
632
|
+
# Shared-embed shortcut: caller (compat shim) computed vectors
|
|
633
|
+
# once and forwards them so we skip the embed RPC. Length must
|
|
634
|
+
# match records — fall back to per-layer embed if it doesn't.
|
|
633
635
|
texts = [(r.get("text") or "")[:8192] for r in records]
|
|
636
|
+
shared_embs = req.get("embeddings")
|
|
634
637
|
t0 = _time.time()
|
|
635
|
-
|
|
636
|
-
embs =
|
|
637
|
-
|
|
638
|
-
|
|
638
|
+
if isinstance(shared_embs, list) and len(shared_embs) == len(records):
|
|
639
|
+
embs = shared_embs
|
|
640
|
+
else:
|
|
641
|
+
try:
|
|
642
|
+
embs = _embed_post(texts)
|
|
643
|
+
except Exception as exc:
|
|
644
|
+
return {"status": "error", "error": f"embed failed: {exc}"}
|
|
639
645
|
embed_ms = (_time.time() - t0) * 1000.0
|
|
640
646
|
|
|
641
647
|
# Single batched insert. Mirror every field the chats collection
|
|
@@ -990,12 +990,18 @@ def serve(port: int = DEFAULT_PORT):
|
|
|
990
990
|
|
|
991
991
|
texts = [(r.get("text") or "")[:16000] for r in records]
|
|
992
992
|
|
|
993
|
-
#
|
|
993
|
+
# Shared-embed shortcut: caller (compat shim) computed vectors
|
|
994
|
+
# once and forwards them so we skip the embed RPC. Length must
|
|
995
|
+
# match records — fall back to per-layer embed if it doesn't.
|
|
996
|
+
shared_embs = req.get("embeddings")
|
|
994
997
|
t0 = _time.time()
|
|
995
|
-
|
|
996
|
-
embs =
|
|
997
|
-
|
|
998
|
-
|
|
998
|
+
if isinstance(shared_embs, list) and len(shared_embs) == len(records):
|
|
999
|
+
embs = shared_embs
|
|
1000
|
+
else:
|
|
1001
|
+
try:
|
|
1002
|
+
embs = _embed_post(texts)
|
|
1003
|
+
except Exception as exc:
|
|
1004
|
+
raise HTTPException(status_code=500, detail=f"embed failed: {exc}")
|
|
999
1005
|
embed_ms = (_time.time() - t0) * 1000.0
|
|
1000
1006
|
|
|
1001
1007
|
# Single milvus insert.
|
|
@@ -301,6 +301,59 @@ def test_search_substring_matches_email_or_name(
|
|
|
301
301
|
}
|
|
302
302
|
|
|
303
303
|
|
|
304
|
+
@_skip_no_neo4j
|
|
305
|
+
def test_search_matches_name_when_email_does_not(
|
|
306
|
+
neo4j_driver, proxy_module
|
|
307
|
+
) -> None:
|
|
308
|
+
"""Regression: an early-WHERE on ``ChannelStat`` filtered rows by
|
|
309
|
+
email-only before the Person join, so a person whose NAME matched
|
|
310
|
+
the search term but whose EMAIL didn't was silently dropped. Fixed
|
|
311
|
+
by deferring the whole search filter until after the OPTIONAL MATCH
|
|
312
|
+
on Person. Sentinel case: email ``ag@x.io`` / name ``Alex Tong`` /
|
|
313
|
+
search ``alex`` — must match on name even though email has no
|
|
314
|
+
substring overlap."""
|
|
315
|
+
driver, (arena, _, _) = neo4j_driver
|
|
316
|
+
with driver.session() as session:
|
|
317
|
+
_ensure_indexes(session)
|
|
318
|
+
_write_stat(session, arena, "ag@x.io", "email", name="Alex Tong")
|
|
319
|
+
_write_stat(session, arena, "other@x.io", "email", name="Bea Chen")
|
|
320
|
+
|
|
321
|
+
out = _call_people_list(proxy_module, arenas=[arena], search="alex")
|
|
322
|
+
assert {i.person_email for i in out.items} == {"ag@x.io"}
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
@_skip_no_neo4j
|
|
326
|
+
def test_search_does_not_bypass_filter_when_person_node_missing(
|
|
327
|
+
neo4j_driver, proxy_module
|
|
328
|
+
) -> None:
|
|
329
|
+
"""Regression: the previous WHERE clause had a ``person_name IS
|
|
330
|
+
NULL`` short-circuit that bypassed the search filter for anyone
|
|
331
|
+
without a Person node — they matched any search term. Fixed by
|
|
332
|
+
using ``coalesce(person_name, '')`` so the name probe just fails
|
|
333
|
+
cleanly when no Person record exists, falling through to the email
|
|
334
|
+
probe."""
|
|
335
|
+
driver, (arena, _, _) = neo4j_driver
|
|
336
|
+
with driver.session() as session:
|
|
337
|
+
_ensure_indexes(session)
|
|
338
|
+
# Insert a ChannelStat WITHOUT a Person node — simulates a
|
|
339
|
+
# contact who's been emailed but never had a Person record
|
|
340
|
+
# materialised. Use a raw write so _write_stat's MERGE doesn't
|
|
341
|
+
# auto-create a Person.
|
|
342
|
+
session.run(
|
|
343
|
+
"MERGE (s:ChannelStat {arena: $arena, person_email: $email, channel: 'email'}) "
|
|
344
|
+
"SET s.count = 1, s.inbound = 1, s.outbound = 0, "
|
|
345
|
+
" s.last_seen = '2026-05-10T00:00:00Z', "
|
|
346
|
+
" s.first_seen = '2026-05-10T00:00:00Z'",
|
|
347
|
+
arena=arena, email="orphan@x.io",
|
|
348
|
+
)
|
|
349
|
+
_write_stat(session, arena, "alex@x.io", "email", name="Alex Tong")
|
|
350
|
+
|
|
351
|
+
# Search "alex" must NOT match orphan@x.io — neither name (missing)
|
|
352
|
+
# nor email contains "alex".
|
|
353
|
+
out = _call_people_list(proxy_module, arenas=[arena], search="alex")
|
|
354
|
+
assert {i.person_email for i in out.items} == {"alex@x.io"}
|
|
355
|
+
|
|
356
|
+
|
|
304
357
|
# ---------------------------------------------------------------------------
|
|
305
358
|
# Pagination.
|
|
306
359
|
# ---------------------------------------------------------------------------
|