npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.9.1 → 0.9.3 - Mend

@pentatonic-ai/ai-agent-sdk 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/index.cjs +1 -1
package/dist/index.js +1 -1
package/package.json +1 -1
package/packages/memory/src/__tests__/engine.test.js +124 -0
package/packages/memory/src/engine.js +69 -0
package/packages/memory-engine/compat/Dockerfile +12 -1
package/packages/memory-engine/compat/server.py +116 -14
package/packages/memory-engine/docker-compose.yml +16 -2
package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +36 -14
package/packages/memory-engine/engine/services/l4/server.py +11 -1
package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +11 -5
package/packages/memory-engine/engine/services/l6/l6-document-store.py +11 -5
package/packages/memory-engine/tests/test_people_list_reader.py +53 -0

package/dist/index.cjs CHANGED Viewed

@@ -906,7 +906,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
 }
 // src/telemetry.js
-var VERSION = "0.9.1";
+var VERSION = "0.9.3";
 var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
 function machineId() {
   const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";

package/dist/index.js CHANGED Viewed

@@ -875,7 +875,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
 }
 // src/telemetry.js
-var VERSION = "0.9.1";
+var VERSION = "0.9.3";
 var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
 function machineId() {
   const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pentatonic-ai/ai-agent-sdk",
-  "version": "0.9.1",
+  "version": "0.9.3",
   "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
   "type": "module",
   "main": "./dist/index.cjs",

package/packages/memory/src/__tests__/engine.test.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { describe, it, expect, beforeEach, jest } from "@jest/globals";
 import {
   fetchEngine,
   engineStore,
+  engineStoreBatch,
   engineSearch,
   engineAggregate,
   enginePeopleList,
@@ -170,6 +171,129 @@ describe("engine HTTP client", () => {
     });
   });
+  describe("engineStoreBatch", () => {
+    it("posts to /store-batch with arena fixed per-record", async () => {
+      mockOk({ status: "ok", inserted: 2, ids: ["a1", "a2"] });
+      await engineStoreBatch("https://e", {
+        clientId: "acme",
+        records: [
+          { content: "first", metadata: { kind: "note" } },
+          { content: "second", metadata: { kind: "doc" } },
+        ],
+        layerType: "episodic",
+        actorUserId: "u-1",
+      });
+      const body = JSON.parse(calls[0].init.body);
+      expect(calls[0].url).toBe("https://e/store-batch");
+      expect(body).toEqual({
+        records: [
+          {
+            content: "first",
+            metadata: {
+              kind: "note",
+              arena: "acme",
+              layer_type: "episodic",
+              actor_user_id: "u-1",
+            },
+          },
+          {
+            content: "second",
+            metadata: {
+              kind: "doc",
+              arena: "acme",
+              layer_type: "episodic",
+              actor_user_id: "u-1",
+            },
+          },
+        ],
+      });
+    });
+    it("user-scoped arena applies to every record when userId provided", async () => {
+      mockOk({ status: "ok", inserted: 3, ids: ["a", "b", "c"] });
+      await engineStoreBatch("https://e", {
+        clientId: "acme",
+        userId: "user-42",
+        records: [
+          { content: "a" },
+          { content: "b" },
+          { content: "c" },
+        ],
+      });
+      const body = JSON.parse(calls[0].init.body);
+      for (const r of body.records) {
+        expect(r.metadata.arena).toBe("acme:user-42");
+      }
+    });
+    it("passes explicit per-record id through unchanged", async () => {
+      mockOk({ status: "ok", inserted: 1, ids: ["custom-id"] });
+      await engineStoreBatch("https://e", {
+        clientId: "acme",
+        records: [{ id: "custom-id", content: "x" }],
+      });
+      const body = JSON.parse(calls[0].init.body);
+      expect(body.records[0].id).toBe("custom-id");
+      expect(body.records[0].content).toBe("x");
+    });
+    it("omits id when not provided so engine hashes the content", async () => {
+      mockOk({ status: "ok", inserted: 1, ids: ["server-hashed"] });
+      await engineStoreBatch("https://e", {
+        clientId: "acme",
+        records: [{ content: "x" }],
+      });
+      const body = JSON.parse(calls[0].init.body);
+      expect(body.records[0]).not.toHaveProperty("id");
+    });
+    it("does NOT let caller override arena via per-record metadata", async () => {
+      mockOk({ status: "ok", inserted: 1, ids: ["x"] });
+      await engineStoreBatch("https://e", {
+        clientId: "acme",
+        records: [
+          { content: "x", metadata: { arena: "tenant-b" } },
+        ],
+      });
+      const body = JSON.parse(calls[0].init.body);
+      expect(body.records[0].metadata.arena).toBe("acme");
+    });
+    it("returns early without HTTP call when records is empty", async () => {
+      mockOk({ status: "ok", inserted: 0, ids: [] });
+      const out = await engineStoreBatch("https://e", {
+        clientId: "acme",
+        records: [],
+      });
+      expect(out).toEqual({ status: "ok", inserted: 0, ids: [] });
+      expect(calls.length).toBe(0);
+    });
+    it("rejects missing clientId", async () => {
+      await expect(
+        engineStoreBatch("https://e", { records: [{ content: "x" }] })
+      ).rejects.toThrow(/clientId/);
+    });
+    it("rejects non-array records", async () => {
+      await expect(
+        engineStoreBatch("https://e", { clientId: "a" })
+      ).rejects.toThrow(/records/);
+      await expect(
+        engineStoreBatch("https://e", { clientId: "a", records: "oops" })
+      ).rejects.toThrow(/records/);
+    });
+    it("rejects non-string content with the offending index", async () => {
+      await expect(
+        engineStoreBatch("https://e", {
+          clientId: "a",
+          records: [{ content: "ok" }, { content: 123 }],
+        })
+      ).rejects.toThrow(/records\[1\]\.content/);
+    });
+  });
   describe("engineSearch", () => {
     it("tenant-only arenas list when no userId", async () => {
       mockOk({ results: [] });

package/packages/memory/src/engine.js CHANGED Viewed

@@ -206,6 +206,75 @@ export async function engineStore(engineUrl, opts) {
   return fetchEngine(engineUrl, "/store", body, { headers });
 }
+/**
+ * Batch-store many records in one engine call.
+ *
+ * One HTTP round-trip → engine issues one batched embed call covering
+ * every record (vs N round-trips + N single-text embeds for `engineStore`).
+ * Per `/store-batch` semantics, the response includes per-record ids
+ * plus the engine block with per-layer write counts.
+ *
+ * Arena composition matches `engineStore`: tenant-wide
+ * (`arena = clientId`) by default, user-scoped (`arena = clientId:userId`)
+ * when `userId` is supplied, overridable via `scope`. The shared arena
+ * is applied to **every** record; per-record `metadata` is preserved
+ * but cannot override the SDK-fixed arena (last-spread-wins ordering).
+ *
+ * Each record may carry an explicit `id` (stable dedup key) or omit
+ * it and let the engine hash the content. Per-record `metadata` is
+ * merged with the shared layerType / actorUserId before posting.
+ *
+ * @param {string} engineUrl
+ * @param {object} opts
+ * @param {string}  opts.clientId
+ * @param {string}  [opts.userId]
+ * @param {"tenant"|"user"} [opts.scope]
+ * @param {Array<{content: string, metadata?: object, id?: string}>} opts.records
+ * @param {string}  [opts.layerType]
+ * @param {string}  [opts.actorUserId]
+ * @param {Record<string,string>} [opts.headers]
+ * @returns {Promise<{status: string, inserted: number, ids: string[], engine?: object, duration_ms?: number}>}
+ */
+export async function engineStoreBatch(engineUrl, opts) {
+  const {
+    clientId,
+    userId,
+    scope,
+    records,
+    layerType,
+    actorUserId,
+    headers,
+  } = opts || {};
+  if (!clientId) throw new Error("engineStoreBatch: clientId required");
+  if (!Array.isArray(records)) {
+    throw new Error("engineStoreBatch: records[] required");
+  }
+  // Short-circuit empty input — no network round-trip, no error.
+  // Matches engineStore's tolerance for trivial inputs at upstream callers.
+  if (records.length === 0) {
+    return { status: "ok", inserted: 0, ids: [] };
+  }
+  const arena = composeArena(clientId, userId, scope);
+  const body = {
+    records: records.map((r, i) => {
+      if (typeof r?.content !== "string") {
+        throw new Error(`engineStoreBatch: records[${i}].content must be a string`);
+      }
+      return {
+        ...(r.id ? { id: r.id } : {}),
+        content: r.content,
+        metadata: {
+          ...(r.metadata || {}),
+          arena,
+          ...(layerType ? { layer_type: layerType } : {}),
+          ...(actorUserId !== undefined ? { actor_user_id: actorUserId } : {}),
+        },
+      };
+    }),
+  };
+  return fetchEngine(engineUrl, "/store-batch", body, { headers });
+}
 /**
  * Search the engine, scoped to a tenant.
  *

package/packages/memory-engine/compat/Dockerfile CHANGED Viewed

@@ -4,7 +4,18 @@ WORKDIR /app
 RUN pip install --no-cache-dir fastapi uvicorn[standard] httpx pydantic
-COPY server.py /app/server.py
+# Build context is the memory-engine root (see docker-compose.yml). The
+# shim's server.py side-loads engine/services/_shared/embed_provider.py
+# for shared-embed mode on /store-batch (one embed call across all 4
+# layer indexers vs 4 redundant calls).
+COPY compat/server.py /app/server.py
+# server.py's sys.path.insert resolves "../engine/services" relative to
+# its own location (/app/server.py → /engine/services). Mirror that
+# layout so the import works without changing the runtime code.
+COPY engine/services/_shared /engine/services/_shared
+# Make `_shared` an importable package (mirror the layer services'
+# layout where __init__.py exists or python detects PEP 420 namespace).
+RUN touch /engine/services/__init__.py
 EXPOSE 8099

package/packages/memory-engine/compat/server.py CHANGED Viewed

@@ -34,6 +34,7 @@ Environment:
 import hashlib
 import os
+import sys
 import time
 from datetime import datetime, timezone
 from typing import Any, Optional
@@ -42,6 +43,17 @@ import httpx
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel, Field
+# Reach into the engine/services tree so we can reuse EmbedClient. The
+# tree isn't a real installed package; layer services and the compat
+# shim both side-load it the same way. Keeps the chunking + auto-detect
+# behaviour identical between the shim's pre-embed and the per-layer
+# embeds that previously did the same work N times.
+sys.path.insert(
+    0,
+    os.path.join(os.path.dirname(__file__), "..", "engine", "services"),
+)
+from _shared.embed_provider import EmbedClient  # noqa: E402
 # ----------------------------------------------------------------------
 # Config
 # ----------------------------------------------------------------------
@@ -63,6 +75,30 @@ NEO4J_DB = os.environ.get("NEO4J_DB", "neo4j")
 PORT = int(os.environ.get("PORT", "8099"))
+# Shared-embed mode. When on, /store-batch computes embeddings once at
+# the shim level and forwards them to each layer's /index-batch so the
+# layer skips its own embed call. Cuts gateway RPC count by ~4× (L4 +
+# L5 + L6 + L2-internal all did the same embed work independently).
+# Default ON because all layers in this engine use the same NV-Embed
+# model; disable if you ever wire up per-layer differentiated embedders
+# (e.g. cohere on L5, openai on L4).
+SHARE_EMBEDDINGS = os.environ.get("PME_SHARE_EMBEDDINGS", "true").lower() == "true"
+_embed_client: EmbedClient | None = None
+def _get_embed_client() -> EmbedClient:
+    """Lazy-init the shim's EmbedClient using PME_-prefixed env vars
+    (matches L2's pattern). Cached for the process lifetime so the
+    auto-detect handshake only happens once."""
+    global _embed_client
+    if _embed_client is None:
+        _embed_client = EmbedClient.from_env(
+            prefix="PME_",
+            default_url=NV_EMBED_URL,
+        )
+    return _embed_client
 # Layer types we surface as the SDK 4-layer projection. Engine stores
 # everything as chunks tagged with arena + layer_type metadata; this
@@ -252,12 +288,23 @@ async def _embed_batch(texts: list[str]) -> list[list[float]]:
     return [d["embedding"] for d in resp.json()["data"]]
-async def _index_l4(records: list[dict[str, Any]]) -> int:
-    """Index records into the L4 sqlite-vec layer."""
-    payload = {"records": [
+async def _index_l4(
+    records: list[dict[str, Any]],
+    embeddings: list[list[float]] | None = None,
+) -> int:
+    """Index records into the L4 sqlite-vec layer.
+    When `embeddings` is supplied (parallel to records), L4's /index-batch
+    skips its own embed call and uses ours — eliminates the redundant
+    embed work that previously cost ~850ms per drain alarm. When None,
+    L4 embeds itself (backwards-compatible path for older callers / tests
+    that don't share embeddings)."""
+    payload: dict[str, Any] = {"records": [
         {"id": r.get("id") or hashlib.sha1(r["content"].encode()).hexdigest()[:32],
          "text": r["content"]} for r in records
     ]}
+    if embeddings is not None:
+        payload["embeddings"] = embeddings
     try:
         resp = await _client().post(f"{L4_VEC_URL}/index-batch", json=payload, timeout=120.0)
         resp.raise_for_status()
@@ -267,13 +314,20 @@ async def _index_l4(records: list[dict[str, Any]]) -> int:
         return 0
-async def _index_l5(records: list[dict[str, Any]], arena: str = "general") -> int:
+async def _index_l5(
+    records: list[dict[str, Any]],
+    arena: str = "general",
+    embeddings: list[list[float]] | None = None,
+) -> int:
     """Index records into the L5 Milvus comms layer (chats collection).
     arena is forwarded as a Milvus dynamic field so /search can filter
     by arena natively (vs the shim's defence-in-depth post-filter).
+    When `embeddings` is supplied (parallel to records), L5 skips its
+    own embed call — see _index_l4 docstring for the dedup story.
     """
-    payload = {
+    payload: dict[str, Any] = {
         "collection": "chats",
         "records": [
             {
@@ -287,6 +341,8 @@ async def _index_l5(records: list[dict[str, Any]], arena: str = "general") -> in
             for r in records
         ],
     }
+    if embeddings is not None:
+        payload["embeddings"] = embeddings
     try:
         resp = await _client().post(f"{L5_MILVUS_URL}/index-batch", json=payload, timeout=60.0)
         resp.raise_for_status()
@@ -299,9 +355,17 @@ async def _index_l5(records: list[dict[str, Any]], arena: str = "general") -> in
         return 0
-async def _index_l6(records: list[dict[str, Any]], arena: str = "general") -> int:
-    """Index records into the L6 document store."""
-    payload = {
+async def _index_l6(
+    records: list[dict[str, Any]],
+    arena: str = "general",
+    embeddings: list[list[float]] | None = None,
+) -> int:
+    """Index records into the L6 document store.
+    When `embeddings` is supplied (parallel to records), L6 skips its
+    own embed call — see _index_l4 docstring for the dedup story.
+    """
+    payload: dict[str, Any] = {
         "arena": arena,
         "records": [
             {
@@ -314,6 +378,8 @@ async def _index_l6(records: list[dict[str, Any]], arena: str = "general") -> in
             for r in records
         ],
     }
+    if embeddings is not None:
+        payload["embeddings"] = embeddings
     try:
         resp = await _client().post(f"{L6_DOC_URL}/index-batch", json=payload, timeout=120.0)
         resp.raise_for_status()
@@ -323,14 +389,22 @@ async def _index_l6(records: list[dict[str, Any]], arena: str = "general") -> in
         return 0
-async def _index_l2_internal(records: list[dict[str, Any]], arena: str = "general") -> dict:
+async def _index_l2_internal(
+    records: list[dict[str, Any]],
+    arena: str = "general",
+    embeddings: list[list[float]] | None = None,
+) -> dict:
     """Populate L2's internal stores: L0 BM25 + L4 QMD vec + L3 Neo4j KG.
     Without this, L2's RRF fusion runs over empty L0/L4-qmd/L3 layers and
     those zero-result rank lists pollute the score. The L2 proxy exposes
     /index-internal-batch which writes to all three in one round-trip.
+    When `embeddings` is supplied (parallel to records), L2's internal
+    embed call (used for L4-QMD population) is skipped — see _index_l4
+    docstring for the dedup story.
     """
-    payload = {
+    payload: dict[str, Any] = {
         "arena": arena,
         "records": [
             {
@@ -341,6 +415,8 @@ async def _index_l2_internal(records: list[dict[str, Any]], arena: str = "genera
             for r in records
         ],
     }
+    if embeddings is not None:
+        payload["embeddings"] = embeddings
     try:
         resp = await _client().post(f"{L2_PROXY_URL}/index-internal-batch",
                                     json=payload, timeout=180.0)
@@ -646,11 +722,37 @@ async def store_batch(req: StoreBatchRequest):
     t0 = time.perf_counter()
     import asyncio
+    # Shared-embed mode: compute embeddings ONCE here, pass them down to
+    # every layer so they skip their own embed call. Previously L4 + L5
+    # + L6 + L2-internal each re-embedded the same texts in parallel,
+    # which fanned 4× the gateway RPCs. The gateway throttles at K≈10
+    # concurrent requests, so 40-way fan-out serialised into ~4 rounds
+    # of ~850ms each = ~3.5s of pure embed time per /store-batch. With
+    # shared embeddings we issue one chunked embed pass (10 sub-calls
+    # for N=50 records) and skip the per-layer redundant work entirely.
+    # Disabled via PME_SHARE_EMBEDDINGS=false for operators wiring up
+    # per-layer differentiated embedders.
+    shared_embeddings: list[list[float]] | None = None
+    embed_ms = 0.0
+    if SHARE_EMBEDDINGS and normalised:
+        texts = [r["content"] for r in normalised]
+        embed_t0 = time.perf_counter()
+        try:
+            shared_embeddings = await _get_embed_client().embed_batch_async(texts)
+        except Exception as exc:
+            # Fall back to per-layer embedding rather than failing the
+            # whole batch. The layers' /index-batch still works when
+            # `embeddings` is absent.
+            print(f"[shim] shared embed failed, falling back to per-layer: {exc}")
+            shared_embeddings = None
+        embed_ms = (time.perf_counter() - embed_t0) * 1000.0
     l4_count, l5_count, l6_count, l2_internal = await asyncio.gather(
-        _index_l4(normalised),
-        _index_l5(normalised, arena=req.arena or "general"),
-        _index_l6(normalised, arena=req.arena or "general"),
-        _index_l2_internal(normalised, arena=req.arena or "general"),
+        _index_l4(normalised, embeddings=shared_embeddings),
+        _index_l5(normalised, arena=req.arena or "general", embeddings=shared_embeddings),
+        _index_l6(normalised, arena=req.arena or "general", embeddings=shared_embeddings),
+        _index_l2_internal(normalised, arena=req.arena or "general", embeddings=shared_embeddings),
     )
     dur_ms = (time.perf_counter() - t0) * 1000.0

package/packages/memory-engine/docker-compose.yml CHANGED Viewed

@@ -212,8 +212,11 @@ services:
   compat:
     <<: *engine-base
     build:
-      context: ./compat
-      dockerfile: Dockerfile
+      # Build context is the memory-engine root so the Dockerfile can
+      # COPY both compat/server.py and engine/services/_shared (shared
+      # EmbedClient for /store-batch dedup).
+      context: .
+      dockerfile: compat/Dockerfile
     container_name: pme-compat
     ports:
       - "127.0.0.1:${PME_PORT:-8099}:8099"
@@ -225,6 +228,17 @@ services:
       L5_MILVUS_URL: http://l5:8034
       L6_DOC_URL: http://l6:8037
       NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
+      # PME_ prefix vars feed the shim's EmbedClient for shared-embed
+      # mode on /store-batch (one embed call across all 4 indexers vs
+      # 4 redundant calls). Match the L2 config block so both clients
+      # hit the same gateway with the same model. Set
+      # PME_SHARE_EMBEDDINGS=false to revert to per-layer embedding.
+      PME_NV_EMBED_URL: ${NV_EMBED_URL:-http://host.docker.internal:8041/v1/embeddings}
+      PME_EMBED_API_KEY: ${EMBED_API_KEY:-}
+      PME_EMBED_PROVIDER: ${EMBED_PROVIDER:-openai}
+      PME_EMBED_AUTODETECT: ${EMBED_AUTODETECT:-true}
+      PME_NV_EMBED_MODEL: ${EMBED_MODEL_NAME:-nv-embed-v2}
+      PME_SHARE_EMBEDDINGS: ${PME_SHARE_EMBEDDINGS:-true}
       BYPASS_L2_PROXY: ${BYPASS_L2_PROXY:-0}
     extra_hosts:
       - "host.docker.internal:host-gateway"

package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py CHANGED Viewed

@@ -1496,6 +1496,12 @@ async def _embed_batch_local(texts: List[str]) -> List[List[float]]:
 class IndexInternalBatchRequest(BaseModel):
     records: List[Dict[str, Any]]  # [{"id": str, "content": str, "metadata": dict}, ...]
     arena: Optional[str] = "general"
+    # When supplied (parallel to `records`), skip the L4-QMD embed call
+    # and use these vectors directly. Compat shim populates this when
+    # shared-embed mode is on so we don't duplicate embed work across
+    # layers. Length must match records — defensive bail-out below if
+    # it doesn't.
+    embeddings: Optional[List[List[float]]] = None
 @app.post("/index-internal-batch")
@@ -1575,7 +1581,19 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
     # ---- L4 QMD vec (qmd.sqlite) ----------------------------------------
     l4_inserted = 0
     try:
-        embeddings = await _embed_batch_local([n["content"] for n in norm])
+        # Shared-embed shortcut: if the compat shim handed us pre-computed
+        # vectors that line up with our normalised records, use them and
+        # skip our own embed RPC. Fall back to per-layer embedding when
+        # the vectors are absent or the lengths don't match (defensive).
+        shared_embs = req.embeddings
+        if (
+            shared_embs is not None
+            and len(shared_embs) == len(records)
+            and len(records) == len(norm)
+        ):
+            embeddings = shared_embs
+        else:
+            embeddings = await _embed_batch_local([n["content"] for n in norm])
         if len(embeddings) != len(norm):
             log.warning(f"L4 embed count mismatch: {len(embeddings)} != {len(norm)}")
         qmd_db = Path(QMD_DB_PATH)
@@ -2362,22 +2380,20 @@ async def people_list_internal(
                 email_filter_clause = " AND s.person_email IN $emails"
                 params["emails"] = emails_filter
-            search_clause = ""
             if search_pattern:
-                # Match against person_email; person_name is resolved
-                # via the OPTIONAL MATCH on Person below, so we can't
-                # apply it inside the initial WHERE without joining first.
-                # Two-step: filter on email here, then re-filter after
-                # joining the Person. (Works at our scale; revisit if
-                # we ever need search to scale to 10k+ people.)
-                search_clause = " AND toLower(s.person_email) CONTAINS $search"
+                # Defer the whole search filter until after the Person
+                # join — person_name only exists after the OPTIONAL
+                # MATCH below. Pre-filtering ChannelStats on email
+                # alone (the earlier two-step approach) silently dropped
+                # name-only matches, e.g. email=ag@x.io / name="Alex
+                # Tong" with search="alex" — the early WHERE failed and
+                # the post-join filter never saw the row.
                 params["search"] = search_pattern
             cypher = (
                 "MATCH (s:ChannelStat)\n"
                 "WHERE s.arena IN $arenas"
                 + email_filter_clause
-                + search_clause
                 + "\n"
                 "WITH s.person_email AS person_email,\n"
                 "     collect({channel: s.channel, count: s.count,\n"
@@ -2393,11 +2409,17 @@ async def people_list_internal(
                 "WITH person_email,\n"
                 "     channels,\n"
                 "     head(collect(DISTINCT p.name)) AS person_name\n"
-                # Apply the name-side of the search filter now that we
-                # have the joined name.
+                # Apply the search filter now that we have both the
+                # joined name and the email available. `coalesce(name,
+                # '')` keeps people without a Person node in the result
+                # set when their email matches — they fall through the
+                # name probe cleanly instead of bypassing the filter
+                # via a `person_name IS NULL` short-circuit, which was
+                # the prior bug (anyone without a Person node passed
+                # search regardless of term).
                 + (
-                    "WHERE ($search IS NULL OR person_name IS NULL OR toLower(person_name) CONTAINS $search\n"
-                    "       OR toLower(person_email) CONTAINS $search)\n"
+                    "WHERE toLower(coalesce(person_name, '')) CONTAINS $search\n"
+                    "       OR toLower(person_email) CONTAINS $search\n"
                     if search_pattern
                     else ""
                 )

package/packages/memory-engine/engine/services/l4/server.py CHANGED Viewed

@@ -124,6 +124,10 @@ class SearchRequest(BaseModel):
 class IndexBatchRequest(BaseModel):
     records: list[dict[str, Any]]
+    # When supplied (parallel to `records`), skip the embed call and use
+    # these vectors directly. Compat shim populates this when shared-embed
+    # mode is on so we don't duplicate the embed work across layers.
+    embeddings: list[list[float]] | None = None
 app = FastAPI(title="L4 sqlite-vec sidecar (OSS)")
@@ -187,7 +191,13 @@ async def index_batch(req: IndexBatchRequest):
         return {"status": "ok", "inserted": 0}
     texts = [(r.get("text") or r.get("content") or "")[:8192] for r in req.records]
     t0 = time.perf_counter()
-    embs = await _embed_batch(texts)
+    # Shared-embed shortcut: caller (compat shim) computed vectors once
+    # and forwards them so we skip the embed RPC. Length must match
+    # records — defensive bail if it doesn't.
+    if req.embeddings is not None and len(req.embeddings) == len(req.records):
+        embs = req.embeddings
+    else:
+        embs = await _embed_batch(texts)
     embed_ms = (time.perf_counter() - t0) * 1000.0
     conn = _get_db()

package/packages/memory-engine/engine/services/l5/l5-comms-layer.py CHANGED Viewed

@@ -629,13 +629,19 @@ def serve(port=8034):
         client = get_client()
         ensure_collection(client, collection)
-        # Single batched embed call.
+        # Shared-embed shortcut: caller (compat shim) computed vectors
+        # once and forwards them so we skip the embed RPC. Length must
+        # match records — fall back to per-layer embed if it doesn't.
         texts = [(r.get("text") or "")[:8192] for r in records]
+        shared_embs = req.get("embeddings")
         t0 = _time.time()
-        try:
-            embs = _embed_post(texts)
-        except Exception as exc:
-            return {"status": "error", "error": f"embed failed: {exc}"}
+        if isinstance(shared_embs, list) and len(shared_embs) == len(records):
+            embs = shared_embs
+        else:
+            try:
+                embs = _embed_post(texts)
+            except Exception as exc:
+                return {"status": "error", "error": f"embed failed: {exc}"}
         embed_ms = (_time.time() - t0) * 1000.0
         # Single batched insert. Mirror every field the chats collection

package/packages/memory-engine/engine/services/l6/l6-document-store.py CHANGED Viewed

@@ -990,12 +990,18 @@ def serve(port: int = DEFAULT_PORT):
         texts = [(r.get("text") or "")[:16000] for r in records]
-        # Single batched embed call (OpenAI-compat first, lambda-gateway fallback).
+        # Shared-embed shortcut: caller (compat shim) computed vectors
+        # once and forwards them so we skip the embed RPC. Length must
+        # match records — fall back to per-layer embed if it doesn't.
+        shared_embs = req.get("embeddings")
         t0 = _time.time()
-        try:
-            embs = _embed_post(texts)
-        except Exception as exc:
-            raise HTTPException(status_code=500, detail=f"embed failed: {exc}")
+        if isinstance(shared_embs, list) and len(shared_embs) == len(records):
+            embs = shared_embs
+        else:
+            try:
+                embs = _embed_post(texts)
+            except Exception as exc:
+                raise HTTPException(status_code=500, detail=f"embed failed: {exc}")
         embed_ms = (_time.time() - t0) * 1000.0
         # Single milvus insert.

package/packages/memory-engine/tests/test_people_list_reader.py CHANGED Viewed

@@ -301,6 +301,59 @@ def test_search_substring_matches_email_or_name(
     }
+@_skip_no_neo4j
+def test_search_matches_name_when_email_does_not(
+    neo4j_driver, proxy_module
+) -> None:
+    """Regression: an early-WHERE on ``ChannelStat`` filtered rows by
+    email-only before the Person join, so a person whose NAME matched
+    the search term but whose EMAIL didn't was silently dropped. Fixed
+    by deferring the whole search filter until after the OPTIONAL MATCH
+    on Person. Sentinel case: email ``ag@x.io`` / name ``Alex Tong`` /
+    search ``alex`` — must match on name even though email has no
+    substring overlap."""
+    driver, (arena, _, _) = neo4j_driver
+    with driver.session() as session:
+        _ensure_indexes(session)
+        _write_stat(session, arena, "ag@x.io", "email", name="Alex Tong")
+        _write_stat(session, arena, "other@x.io", "email", name="Bea Chen")
+    out = _call_people_list(proxy_module, arenas=[arena], search="alex")
+    assert {i.person_email for i in out.items} == {"ag@x.io"}
+@_skip_no_neo4j
+def test_search_does_not_bypass_filter_when_person_node_missing(
+    neo4j_driver, proxy_module
+) -> None:
+    """Regression: the previous WHERE clause had a ``person_name IS
+    NULL`` short-circuit that bypassed the search filter for anyone
+    without a Person node — they matched any search term. Fixed by
+    using ``coalesce(person_name, '')`` so the name probe just fails
+    cleanly when no Person record exists, falling through to the email
+    probe."""
+    driver, (arena, _, _) = neo4j_driver
+    with driver.session() as session:
+        _ensure_indexes(session)
+        # Insert a ChannelStat WITHOUT a Person node — simulates a
+        # contact who's been emailed but never had a Person record
+        # materialised. Use a raw write so _write_stat's MERGE doesn't
+        # auto-create a Person.
+        session.run(
+            "MERGE (s:ChannelStat {arena: $arena, person_email: $email, channel: 'email'}) "
+            "SET s.count = 1, s.inbound = 1, s.outbound = 0, "
+            "    s.last_seen = '2026-05-10T00:00:00Z', "
+            "    s.first_seen = '2026-05-10T00:00:00Z'",
+            arena=arena, email="orphan@x.io",
+        )
+        _write_stat(session, arena, "alex@x.io", "email", name="Alex Tong")
+    # Search "alex" must NOT match orphan@x.io — neither name (missing)
+    # nor email contains "alex".
+    out = _call_people_list(proxy_module, arenas=[arena], search="alex")
+    assert {i.person_email for i in out.items} == {"alex@x.io"}
 # ---------------------------------------------------------------------------
 # Pagination.
 # ---------------------------------------------------------------------------