@pentatonic-ai/ai-agent-sdk 0.8.8 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -906,7 +906,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
906
906
  }
907
907
 
908
908
  // src/telemetry.js
909
- var VERSION = "0.8.8";
909
+ var VERSION = "0.9.1";
910
910
  var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
911
911
  function machineId() {
912
912
  const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
package/dist/index.js CHANGED
@@ -875,7 +875,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
875
875
  }
876
876
 
877
877
  // src/telemetry.js
878
- var VERSION = "0.8.8";
878
+ var VERSION = "0.9.1";
879
879
  var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
880
880
  function machineId() {
881
881
  const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.8.8",
3
+ "version": "0.9.1",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -6,14 +6,9 @@
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "@pentatonic/memory",
9
- "version": "0.1.0",
10
- "license": "MIT",
11
9
  "dependencies": {
12
10
  "@modelcontextprotocol/sdk": "^1.0.0",
13
11
  "pg": "^8.13.0"
14
- },
15
- "bin": {
16
- "memory-server": "src/server.js"
17
12
  }
18
13
  },
19
14
  "node_modules/@hono/node-server": {
@@ -436,9 +431,9 @@
436
431
  "license": "MIT"
437
432
  },
438
433
  "node_modules/fast-uri": {
439
- "version": "3.1.0",
440
- "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz",
441
- "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==",
434
+ "version": "3.1.2",
435
+ "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.2.tgz",
436
+ "integrity": "sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ==",
442
437
  "funding": [
443
438
  {
444
439
  "type": "github",
@@ -4,6 +4,7 @@ import {
4
4
  engineStore,
5
5
  engineSearch,
6
6
  engineAggregate,
7
+ enginePeopleList,
7
8
  engineForget,
8
9
  composeArena,
9
10
  composeArenas,
@@ -281,6 +282,17 @@ describe("engine HTTP client", () => {
281
282
  expect(sent["CF-Access-Client-Id"]).toBe("tes-worker.id");
282
283
  });
283
284
 
285
+ it("enginePeopleList forwards opts.headers", async () => {
286
+ mockOk({ total_count: 0, has_more: false, items: [] });
287
+ await enginePeopleList("https://e", {
288
+ clientId: "acme",
289
+ headers: cfAccess,
290
+ });
291
+ const sent = calls[0].init.headers;
292
+ expect(sent["CF-Access-Client-Id"]).toBe("tes-worker.id");
293
+ expect(sent["CF-Access-Client-Secret"]).toBe("shh-it-secret");
294
+ });
295
+
284
296
  it("no headers sent when opts.headers omitted (back-compat)", async () => {
285
297
  mockOk({});
286
298
  await engineStore("https://e", { clientId: "acme", content: "x" });
@@ -457,6 +469,142 @@ describe("engine HTTP client", () => {
457
469
  });
458
470
  });
459
471
 
472
+ describe("enginePeopleList", () => {
473
+ const emptyPage = { total_count: 0, has_more: false, items: [] };
474
+
475
+ it("posts to /people-list with arenas list (tenant + user-scope) when userId is set", async () => {
476
+ mockOk(emptyPage);
477
+ await enginePeopleList("https://e", {
478
+ clientId: "acme",
479
+ userId: "user-42",
480
+ });
481
+ const body = JSON.parse(calls[0].init.body);
482
+ expect(calls[0].url).toBe("https://e/people-list");
483
+ // Mirrors engineSearch's arena composition: tenant + user-scope.
484
+ // The engine dedups by person_email, so cross-arena spans are
485
+ // safe and expected for this query.
486
+ expect(body.arenas).toEqual(["acme", "acme:user-42"]);
487
+ });
488
+
489
+ it("uses tenant-only arenas when no userId", async () => {
490
+ mockOk(emptyPage);
491
+ await enginePeopleList("https://e", { clientId: "acme" });
492
+ const body = JSON.parse(calls[0].init.body);
493
+ expect(body.arenas).toEqual(["acme"]);
494
+ });
495
+
496
+ it("uses explicit arenas list when provided, ignoring clientId/userId", async () => {
497
+ // Cross-user view: caller passes the arenas it wants spanned.
498
+ // Backs the "people known by Philip OR Jeanne" use case.
499
+ mockOk(emptyPage);
500
+ await enginePeopleList("https://e", {
501
+ clientId: "ignored",
502
+ userId: "also-ignored",
503
+ arenas: ["acme:user-philip", "acme:user-jeanne"],
504
+ });
505
+ const body = JSON.parse(calls[0].init.body);
506
+ expect(body.arenas).toEqual(["acme:user-philip", "acme:user-jeanne"]);
507
+ });
508
+
509
+ it("throws when neither clientId nor arenas are provided", async () => {
510
+ mockOk(emptyPage);
511
+ await expect(enginePeopleList("https://e", {})).rejects.toThrow(
512
+ /clientId or arenas required/,
513
+ );
514
+ });
515
+
516
+ it("forwards emails for batched-mode lookup", async () => {
517
+ mockOk(emptyPage);
518
+ await enginePeopleList("https://e", {
519
+ clientId: "acme",
520
+ emails: ["alex@x.io", "bea@y.io"],
521
+ });
522
+ const body = JSON.parse(calls[0].init.body);
523
+ expect(body.emails).toEqual(["alex@x.io", "bea@y.io"]);
524
+ });
525
+
526
+ it("omits emails when empty list (engine treats missing field as 'no filter')", async () => {
527
+ mockOk(emptyPage);
528
+ await enginePeopleList("https://e", { clientId: "acme", emails: [] });
529
+ const body = JSON.parse(calls[0].init.body);
530
+ expect(body).not.toHaveProperty("emails");
531
+ });
532
+
533
+ it("forwards search substring when provided", async () => {
534
+ mockOk(emptyPage);
535
+ await enginePeopleList("https://e", {
536
+ clientId: "acme",
537
+ search: "pentatonic",
538
+ });
539
+ const body = JSON.parse(calls[0].init.body);
540
+ expect(body.search).toBe("pentatonic");
541
+ });
542
+
543
+ it("uses defaults for limit/offset/orderBy", async () => {
544
+ mockOk(emptyPage);
545
+ await enginePeopleList("https://e", { clientId: "acme" });
546
+ const body = JSON.parse(calls[0].init.body);
547
+ expect(body.limit).toBe(200);
548
+ expect(body.offset).toBe(0);
549
+ expect(body.order_by).toBe("last_seen_desc");
550
+ });
551
+
552
+ it("forwards limit/offset/orderBy when provided", async () => {
553
+ mockOk(emptyPage);
554
+ await enginePeopleList("https://e", {
555
+ clientId: "acme",
556
+ limit: 50,
557
+ offset: 100,
558
+ orderBy: "total_desc",
559
+ });
560
+ const body = JSON.parse(calls[0].init.body);
561
+ expect(body.limit).toBe(50);
562
+ expect(body.offset).toBe(100);
563
+ expect(body.order_by).toBe("total_desc");
564
+ });
565
+
566
+ it("returns the response body verbatim", async () => {
567
+ const expected = {
568
+ total_count: 2,
569
+ has_more: false,
570
+ items: [
571
+ {
572
+ person_email: "alex@x.io",
573
+ person_name: "Alex Tong",
574
+ total: 42,
575
+ inbound: 20,
576
+ outbound: 22,
577
+ last_seen: "2026-05-13T10:00:00Z",
578
+ first_seen: "2024-01-01T00:00:00Z",
579
+ channels: [
580
+ {
581
+ channel: "email",
582
+ count: 30,
583
+ inbound: 15,
584
+ outbound: 15,
585
+ last_seen: "2026-05-13T10:00:00Z",
586
+ first_seen: "2024-01-01T00:00:00Z",
587
+ },
588
+ ],
589
+ },
590
+ {
591
+ person_email: "bea@y.io",
592
+ person_name: null,
593
+ total: 5,
594
+ inbound: 5,
595
+ outbound: 0,
596
+ last_seen: "2026-04-01T00:00:00Z",
597
+ first_seen: "2025-12-01T00:00:00Z",
598
+ channels: [],
599
+ },
600
+ ],
601
+ };
602
+ mockOk(expected);
603
+ const out = await enginePeopleList("https://e", { clientId: "acme" });
604
+ expect(out).toEqual(expected);
605
+ });
606
+ });
607
+
460
608
  describe("engineForget", () => {
461
609
  it("forwards id when provided", async () => {
462
610
  mockOk({ deleted: 1 });
@@ -346,3 +346,89 @@ export async function engineAggregate(engineUrl, opts) {
346
346
  };
347
347
  return fetchEngine(engineUrl, "/aggregate", body, { headers });
348
348
  }
349
+
350
+ /**
351
+ * Corpus-level "all people I've communicated with" projection.
352
+ *
353
+ * Distinct from ``engineAggregate``, which returns the per-channel
354
+ * breakdown for ONE person: ``enginePeopleList`` returns one row
355
+ * per Person across one or more arenas, with the per-channel
356
+ * breakdown nested. Backs the Pip Relationships UI list page (and
357
+ * any future "people known to X" query) without the per-person
358
+ * round-trip that 8k contacts would otherwise require.
359
+ *
360
+ * Multi-arena is intentional: cross-user views like "people known
361
+ * by Philip OR Jeanne" are a legitimate use case here, and the
362
+ * engine dedups by person_email so the response is collapsed
363
+ * before paginating.
364
+ *
365
+ * ``arenas`` defaults to ``composeArenas(clientId, userId)`` — the
366
+ * usual `[clientId, clientId:userId]` pair, matching engineSearch.
367
+ * Pass an explicit arena list when you need cross-user spans.
368
+ *
369
+ * ``emails`` is a batched-mode filter (return only these emails).
370
+ * Used by Pip's nightly health-recompute to fetch facets for a
371
+ * known set of contacts in one call.
372
+ *
373
+ * @param {string} engineUrl
374
+ * @param {object} opts
375
+ * @param {string} [opts.clientId] required unless ``arenas`` is set
376
+ * @param {string} [opts.userId] appended to default arena
377
+ * @param {string[]} [opts.arenas] explicit arena list; overrides clientId/userId
378
+ * @param {string[]} [opts.emails] batched-mode filter
379
+ * @param {string} [opts.search] case-insensitive substring on name/email
380
+ * @param {number} [opts.limit=200]
381
+ * @param {number} [opts.offset=0]
382
+ * @param {("last_seen_desc"|"last_seen_asc"|"total_desc"|"total_asc"|"name_asc"|"name_desc")} [opts.orderBy="last_seen_desc"]
383
+ * @param {Record<string,string>} [opts.headers] forwarded HTTP headers (CF Access etc.)
384
+ * @returns {Promise<{
385
+ * total_count: number,
386
+ * has_more: boolean,
387
+ * items: Array<{
388
+ * person_email: string,
389
+ * person_name: string|null,
390
+ * total: number,
391
+ * inbound: number,
392
+ * outbound: number,
393
+ * last_seen: string|null,
394
+ * first_seen: string|null,
395
+ * channels: Array<{
396
+ * channel: string,
397
+ * count: number,
398
+ * inbound: number,
399
+ * outbound: number,
400
+ * last_seen: string|null,
401
+ * first_seen: string|null,
402
+ * }>,
403
+ * }>,
404
+ * }>}
405
+ */
406
+ export async function enginePeopleList(engineUrl, opts) {
407
+ const {
408
+ clientId,
409
+ userId,
410
+ arenas,
411
+ emails,
412
+ search,
413
+ limit,
414
+ offset,
415
+ orderBy,
416
+ headers,
417
+ } = opts || {};
418
+ let arenaList = arenas;
419
+ if (!arenaList || arenaList.length === 0) {
420
+ if (!clientId) {
421
+ throw new Error("enginePeopleList: clientId or arenas required");
422
+ }
423
+ arenaList = composeArenas(clientId, userId);
424
+ }
425
+ const body = {
426
+ arenas: arenaList,
427
+ limit: typeof limit === "number" ? limit : 200,
428
+ offset: typeof offset === "number" ? offset : 0,
429
+ order_by: orderBy || "last_seen_desc",
430
+ ...(emails && emails.length > 0 ? { emails } : {}),
431
+ ...(search ? { search } : {}),
432
+ };
433
+ return fetchEngine(engineUrl, "/people-list", body, { headers });
434
+ }
@@ -140,6 +140,34 @@ class AggregateRequest(BaseModel):
140
140
  group_by: Optional[list[str]] = None
141
141
 
142
142
 
143
+ class PeopleListRequest(BaseModel):
144
+ """Public-facing /people-list request.
145
+
146
+ Corpus-level "all people across these arenas" projection backing
147
+ the Pip Relationships UI list page. Distinct from /aggregate
148
+ (which is per-person): /people-list returns one row per Person
149
+ across the whole arena set, with per-channel breakdown nested.
150
+
151
+ Multi-arena is supported here (unlike /aggregate) because the
152
+ UI use case — "people known by Philip OR Jeanne" — is a
153
+ legitimate cross-user view. The L2 proxy does the dedup by
154
+ person_email.
155
+
156
+ `emails`: batched-mode filter. Return only these emails.
157
+ `search`: case-insensitive substring on person_name and
158
+ person_email.
159
+ `order_by`: whitelisted sort key; see the L2 proxy for the
160
+ allowlist.
161
+ """
162
+
163
+ arenas: list[str]
164
+ emails: Optional[list[str]] = None
165
+ search: Optional[str] = None
166
+ limit: int = 200
167
+ offset: int = 0
168
+ order_by: str = "last_seen_desc"
169
+
170
+
143
171
  # ----------------------------------------------------------------------
144
172
  # Engine clients (one per layer)
145
173
  # ----------------------------------------------------------------------
@@ -1089,6 +1117,55 @@ async def aggregate(req: AggregateRequest) -> dict[str, Any]:
1089
1117
  raise HTTPException(status_code=502, detail=f"aggregate upstream: {exc}")
1090
1118
 
1091
1119
 
1120
+ @app.post("/people-list")
1121
+ async def people_list(req: PeopleListRequest) -> dict[str, Any]:
1122
+ """List all people across one or more arenas.
1123
+
1124
+ Pass-through to the L2 proxy's /people-list-internal which runs a
1125
+ single Cypher pass over the ChannelStat denorm keyed on
1126
+ (arena, person_email, channel). Returns paginated, filtered,
1127
+ sorted rows with per-channel breakdown.
1128
+
1129
+ Shim's job: shape validation + arena enforcement. The real
1130
+ aggregation lives in L3 over ChannelStat — see PRs #28-33 for
1131
+ the writer side that maintains those nodes.
1132
+
1133
+ Multi-arena: legitimate here (unlike /aggregate). The UI
1134
+ fetches "people known by Philip OR Jeanne" by passing both
1135
+ arenas; the L2 proxy collapses duplicates by person_email.
1136
+ """
1137
+ arenas = [a.strip() for a in (req.arenas or []) if a and a.strip()]
1138
+ if not arenas:
1139
+ raise HTTPException(status_code=400, detail="at least one arena is required")
1140
+ payload: dict[str, Any] = {
1141
+ "arenas": arenas,
1142
+ "limit": req.limit,
1143
+ "offset": req.offset,
1144
+ "order_by": req.order_by,
1145
+ }
1146
+ if req.emails:
1147
+ # Lowercase here so the L2 path can rely on exact-match; the
1148
+ # writer side also lowercases person_email on the ChannelStat
1149
+ # node, so this stays consistent.
1150
+ payload["emails"] = [e.strip().lower() for e in req.emails if e and e.strip()]
1151
+ if req.search:
1152
+ payload["search"] = req.search.strip()
1153
+ try:
1154
+ r = await _client().post(
1155
+ f"{L2_PROXY_URL}/people-list-internal", json=payload, timeout=15.0,
1156
+ )
1157
+ if r.status_code != 200:
1158
+ raise HTTPException(
1159
+ status_code=r.status_code,
1160
+ detail=f"people-list failed: {r.text[:200]}",
1161
+ )
1162
+ return r.json()
1163
+ except HTTPException:
1164
+ raise
1165
+ except Exception as exc:
1166
+ raise HTTPException(status_code=502, detail=f"people-list upstream: {exc}")
1167
+
1168
+
1092
1169
  # ----------------------------------------------------------------------
1093
1170
  # Entrypoint
1094
1171
  # ----------------------------------------------------------------------
@@ -211,6 +211,7 @@ class EmbedClient:
211
211
  autodetect: bool = True,
212
212
  timeout: float = 120.0,
213
213
  env_prefix: str = "",
214
+ max_batch: int = 5,
214
215
  ) -> None:
215
216
  self._configured_provider = provider
216
217
  self._provider = provider
@@ -222,6 +223,12 @@ class EmbedClient:
222
223
  self._autodetect = autodetect
223
224
  self._env_prefix = env_prefix
224
225
  self._detected = False
226
+ # 0 = unlimited (no chunking). Positive = max texts per upstream call;
227
+ # larger inputs are split into multiple calls (concurrent in async path)
228
+ # and the results concatenated. Defaults to 5 because that's the per-call
229
+ # cap observed on Pentatonic AI Gateway — above which it 502s and the
230
+ # caller silently loses vector writes (see test_chunking_* tests).
231
+ self._max_batch = max(0, max_batch)
225
232
 
226
233
  # ------------------------------------------------------------------
227
234
  # Construction
@@ -248,6 +255,7 @@ class EmbedClient:
248
255
  {prefix}EMBED_PROVIDER default 'openai'
249
256
  {prefix}EMBED_AUTODETECT default 'true'
250
257
  {prefix}EMBED_TIMEOUT default '120'
258
+ {prefix}EMBED_MAX_BATCH default '5' (gateway-safe; '0' disables chunking)
251
259
  """
252
260
  url_var = url_var or f"{prefix}NV_EMBED_URL"
253
261
  key_var = key_var or f"{prefix}EMBED_API_KEY"
@@ -259,6 +267,7 @@ class EmbedClient:
259
267
  provider_name = os.environ.get(f"{prefix}EMBED_PROVIDER", "openai")
260
268
  autodetect = os.environ.get(f"{prefix}EMBED_AUTODETECT", "true").lower() == "true"
261
269
  timeout = float(os.environ.get(f"{prefix}EMBED_TIMEOUT", "120"))
270
+ max_batch = int(os.environ.get(f"{prefix}EMBED_MAX_BATCH", "5"))
262
271
 
263
272
  provider = resolve_provider(provider_name, env_prefix=prefix)
264
273
  return cls(
@@ -269,6 +278,7 @@ class EmbedClient:
269
278
  autodetect=autodetect,
270
279
  timeout=timeout,
271
280
  env_prefix=prefix,
281
+ max_batch=max_batch,
272
282
  )
273
283
 
274
284
  # ------------------------------------------------------------------
@@ -307,10 +317,21 @@ class EmbedClient:
307
317
  # ------------------------------------------------------------------
308
318
 
309
319
  def embed_batch(self, texts: list[str]) -> list[list[float]]:
310
- """Embed a list of texts. Empty list returns empty list."""
320
+ """Embed a list of texts. Empty list returns empty list.
321
+
322
+ Splits into chunks of `max_batch` (default 5) and posts each
323
+ sequentially when the input exceeds the limit. Results are
324
+ concatenated in input order. `max_batch=0` disables chunking.
325
+ """
311
326
  if not texts:
312
327
  return []
313
- return self._post_with_autodetect(texts, async_mode=False)
328
+ if self._max_batch == 0 or len(texts) <= self._max_batch:
329
+ return self._post_with_autodetect(texts, async_mode=False)
330
+ out: list[list[float]] = []
331
+ for start in range(0, len(texts), self._max_batch):
332
+ chunk = texts[start:start + self._max_batch]
333
+ out.extend(self._post_with_autodetect(chunk, async_mode=False))
334
+ return out
314
335
 
315
336
  def embed_one(self, text: str) -> list[float]:
316
337
  return self.embed_batch([text])[0]
@@ -320,9 +341,25 @@ class EmbedClient:
320
341
  # ------------------------------------------------------------------
321
342
 
322
343
  async def embed_batch_async(self, texts: list[str]) -> list[list[float]]:
344
+ """Async embed. Chunks are fired concurrently via asyncio.gather
345
+ when the input exceeds `max_batch`; raises the first error if any
346
+ chunk fails (matching the un-chunked semantics)."""
323
347
  if not texts:
324
348
  return []
325
- return await self._post_with_autodetect_async(texts)
349
+ if self._max_batch == 0 or len(texts) <= self._max_batch:
350
+ return await self._post_with_autodetect_async(texts)
351
+ import asyncio
352
+ chunks = [
353
+ texts[start:start + self._max_batch]
354
+ for start in range(0, len(texts), self._max_batch)
355
+ ]
356
+ results = await asyncio.gather(
357
+ *(self._post_with_autodetect_async(chunk) for chunk in chunks)
358
+ )
359
+ out: list[list[float]] = []
360
+ for r in results:
361
+ out.extend(r)
362
+ return out
326
363
 
327
364
  async def embed_one_async(self, text: str) -> list[float]:
328
365
  out = await self.embed_batch_async([text])
@@ -2200,6 +2200,283 @@ async def aggregate_internal(req: AggregateInternalRequest) -> AggregateInternal
2200
2200
  raise HTTPException(status_code=500, detail=f"aggregate failed: {e}")
2201
2201
 
2202
2202
 
2203
+ # ── /people-list-internal ───────────────────────────────────────────────
2204
+ #
2205
+ # Corpus-level "all people in these arenas" projection backing the Pip
2206
+ # Relationships UI list page. Different aggregation level from
2207
+ # /aggregate-internal: that one returns buckets PER ONE PERSON; this one
2208
+ # returns one row PER PERSON across the whole arena, sorted by recency.
2209
+ #
2210
+ # Reads the ChannelStat denorm directly. ChannelStats are keyed on
2211
+ # (arena, person_email, channel) and maintained on every store by the
2212
+ # writer block above (~line 1645 onwards). Each person has one
2213
+ # ChannelStat row per channel they appear on. Aggregating over
2214
+ # person_email collapses to one row per person; the per-channel
2215
+ # breakdown rides along as a nested list for callers who want both.
2216
+ #
2217
+ # Falls back to an edge walk path only when ChannelStats are absent
2218
+ # for the arena (older tenant predating the rollup writer). In normal
2219
+ # operation the fast path serves every read.
2220
+
2221
+ class PeopleListInternalRequest(BaseModel):
2222
+ """List all people communicated with across one or more arenas.
2223
+
2224
+ Multi-arena support exists so the Pip Relationships UI can answer
2225
+ "which colleagues do Philip AND Jeanne both know" without N round-
2226
+ trips. The endpoint is invariant to arena ordering — duplicates
2227
+ across arenas are collapsed by person_email.
2228
+
2229
+ `emails` is a batched-mode filter (return only these emails). Used
2230
+ by Pip's nightly health-recompute to fetch facets for a known set
2231
+ of contacts in one call.
2232
+
2233
+ `search` matches person_email and person_name as case-insensitive
2234
+ substrings. Empty/null → no filter. Server-side, so the caller
2235
+ doesn't have to overfetch.
2236
+
2237
+ Pagination is `limit`/`offset`. Default limit matches the Pip UI
2238
+ page size; large limits are fine since the underlying read is one
2239
+ Cypher query against an indexed denorm.
2240
+ """
2241
+
2242
+ arenas: List[str]
2243
+ emails: Optional[List[str]] = None
2244
+ search: Optional[str] = None
2245
+ limit: int = 200
2246
+ offset: int = 0
2247
+ # Sort key. Whitelisted; unknown → ValidationError so callers can't
2248
+ # smuggle Cypher fragments into the ORDER BY.
2249
+ order_by: str = "last_seen_desc"
2250
+
2251
+
2252
+ class ChannelStatBucket(BaseModel):
2253
+ """Per-channel breakdown for one person, denormalised from ChannelStat."""
2254
+
2255
+ channel: str
2256
+ count: int
2257
+ inbound: int
2258
+ outbound: int
2259
+ last_seen: Optional[str] = None
2260
+ first_seen: Optional[str] = None
2261
+
2262
+
2263
+ class PeopleListEntry(BaseModel):
2264
+ """One person's rollup in the corpus-level list response."""
2265
+
2266
+ person_email: str
2267
+ person_name: Optional[str] = None
2268
+ total: int
2269
+ inbound: int
2270
+ outbound: int
2271
+ last_seen: Optional[str] = None
2272
+ first_seen: Optional[str] = None
2273
+ channels: List[ChannelStatBucket]
2274
+
2275
+
2276
+ class PeopleListInternalResponse(BaseModel):
2277
+ """Paginated people-list result.
2278
+
2279
+ `total_count` is the distinct-people count after filters, before
2280
+ pagination — so the caller can paint a "Showing 1-200 of 1206"
2281
+ header without a second round-trip. `has_more` is the derived
2282
+ pagination signal.
2283
+ """
2284
+
2285
+ total_count: int
2286
+ has_more: bool
2287
+ items: List[PeopleListEntry]
2288
+
2289
+
2290
+ # Whitelist for order_by to keep the ORDER BY clause injection-safe.
2291
+ # Values are templated into Cypher after `WITH row` so each one must
2292
+ # qualify properties with `row.` (or wrap in a function that does).
2293
+ _PEOPLE_LIST_ORDER_BY = {
2294
+ "last_seen_desc": "row.last_seen DESC",
2295
+ "last_seen_asc": "row.last_seen ASC",
2296
+ "total_desc": "row.total DESC",
2297
+ "total_asc": "row.total ASC",
2298
+ "name_asc": "coalesce(row.person_name, row.person_email) ASC",
2299
+ "name_desc": "coalesce(row.person_name, row.person_email) DESC",
2300
+ }
2301
+
2302
+
2303
+ @app.post("/people-list-internal", response_model=PeopleListInternalResponse)
2304
+ async def people_list_internal(
2305
+ req: PeopleListInternalRequest,
2306
+ ) -> PeopleListInternalResponse:
2307
+ """Corpus-level "all people I've communicated with" projection.
2308
+
2309
+ Single Cypher pass over ChannelStat keyed on (arena, person_email).
2310
+ Returns paginated, filtered, sorted rows with per-channel
2311
+ breakdown for each person.
2312
+
2313
+ Authentication is enforced by the surrounding TES module
2314
+ (`authorizeClient` in resolvers.js); this endpoint itself trusts
2315
+ its caller by virtue of running inside the engine network.
2316
+ """
2317
+
2318
+ arenas = [a.strip() for a in (req.arenas or []) if a and a.strip()]
2319
+ if not arenas:
2320
+ raise HTTPException(status_code=400, detail="at least one arena is required")
2321
+
2322
+ if req.order_by not in _PEOPLE_LIST_ORDER_BY:
2323
+ raise HTTPException(
2324
+ status_code=400,
2325
+ detail=f"order_by must be one of: {sorted(_PEOPLE_LIST_ORDER_BY)}",
2326
+ )
2327
+ order_clause = _PEOPLE_LIST_ORDER_BY[req.order_by]
2328
+
2329
+ limit = max(1, min(req.limit or 200, 2000))
2330
+ offset = max(0, req.offset or 0)
2331
+ emails_filter = (
2332
+ [e.strip().lower() for e in req.emails if e and e.strip()]
2333
+ if req.emails
2334
+ else None
2335
+ )
2336
+ search_pattern = (req.search or "").strip().lower() or None
2337
+
2338
+ driver = get_neo4j_driver()
2339
+
2340
+ try:
2341
+ async with driver.session() as session:
2342
+ # Fast path: read ChannelStat directly. Aggregation walks
2343
+ # the (arena, person_email) compound index — O(channels)
2344
+ # per person, one row per channel.
2345
+ #
2346
+ # Two-stage shape: first WITH collects per-person rollups
2347
+ # (sum across channels + collect the per-channel buckets);
2348
+ # second WITH applies the filter, sort, total, and pagination.
2349
+ params: Dict[str, Any] = {
2350
+ "arenas": arenas,
2351
+ "limit": limit,
2352
+ "offset": offset,
2353
+ }
2354
+
2355
+ # Build the filter clause. `arena IN $arenas` is the
2356
+ # multi-arena gate. `s.person_email IN $emails` is the
2357
+ # batched-mode filter; toLower handles caller-side case
2358
+ # drift. `search` matches against person_email or
2359
+ # (joined) person_name.
2360
+ email_filter_clause = ""
2361
+ if emails_filter:
2362
+ email_filter_clause = " AND s.person_email IN $emails"
2363
+ params["emails"] = emails_filter
2364
+
2365
+ search_clause = ""
2366
+ if search_pattern:
2367
+ # Match against person_email; person_name is resolved
2368
+ # via the OPTIONAL MATCH on Person below, so we can't
2369
+ # apply it inside the initial WHERE without joining first.
2370
+ # Two-step: filter on email here, then re-filter after
2371
+ # joining the Person. (Works at our scale; revisit if
2372
+ # we ever need search to scale to 10k+ people.)
2373
+ search_clause = " AND toLower(s.person_email) CONTAINS $search"
2374
+ params["search"] = search_pattern
2375
+
2376
+ cypher = (
2377
+ "MATCH (s:ChannelStat)\n"
2378
+ "WHERE s.arena IN $arenas"
2379
+ + email_filter_clause
2380
+ + search_clause
2381
+ + "\n"
2382
+ "WITH s.person_email AS person_email,\n"
2383
+ " collect({channel: s.channel, count: s.count,\n"
2384
+ " inbound: s.inbound, outbound: s.outbound,\n"
2385
+ " last_seen: s.last_seen, first_seen: s.first_seen,\n"
2386
+ " arena: s.arena}) AS channels\n"
2387
+ # Resolve display name from the typed-Person node when
2388
+ # one exists. OPTIONAL so people who only have edge data
2389
+ # (no Person node yet) still show up — name falls back
2390
+ # to email at the caller.
2391
+ "OPTIONAL MATCH (p:Person {email: person_email})\n"
2392
+ "WHERE p.arena IN $arenas\n"
2393
+ "WITH person_email,\n"
2394
+ " channels,\n"
2395
+ " head(collect(DISTINCT p.name)) AS person_name\n"
2396
+ # Apply the name-side of the search filter now that we
2397
+ # have the joined name.
2398
+ + (
2399
+ "WHERE ($search IS NULL OR person_name IS NULL OR toLower(person_name) CONTAINS $search\n"
2400
+ " OR toLower(person_email) CONTAINS $search)\n"
2401
+ if search_pattern
2402
+ else ""
2403
+ )
2404
+ + "WITH person_email, person_name, channels,\n"
2405
+ " reduce(t = 0, x IN channels | t + coalesce(x.count, 0)) AS total,\n"
2406
+ " reduce(t = 0, x IN channels | t + coalesce(x.inbound, 0)) AS inbound,\n"
2407
+ " reduce(t = 0, x IN channels | t + coalesce(x.outbound, 0)) AS outbound,\n"
2408
+ " reduce(latest = '', x IN channels |\n"
2409
+ " CASE WHEN x.last_seen IS NOT NULL AND toString(x.last_seen) > latest\n"
2410
+ " THEN toString(x.last_seen) ELSE latest END) AS last_seen_raw,\n"
2411
+ " reduce(earliest = '', x IN channels |\n"
2412
+ " CASE WHEN x.first_seen IS NOT NULL AND (earliest = '' OR toString(x.first_seen) < earliest)\n"
2413
+ " THEN toString(x.first_seen) ELSE earliest END) AS first_seen_raw\n"
2414
+ "WITH person_email, person_name, channels, total, inbound, outbound,\n"
2415
+ " CASE WHEN last_seen_raw = '' THEN null ELSE last_seen_raw END AS last_seen,\n"
2416
+ " CASE WHEN first_seen_raw = '' THEN null ELSE first_seen_raw END AS first_seen\n"
2417
+ # collect-into-list and then split is the standard
2418
+ # Cypher trick for getting both the total count and a
2419
+ # paginated slice from one query.
2420
+ "WITH collect({person_email: person_email, person_name: person_name,\n"
2421
+ " channels: channels, total: total,\n"
2422
+ " inbound: inbound, outbound: outbound,\n"
2423
+ " last_seen: last_seen, first_seen: first_seen}) AS all_rows\n"
2424
+ "WITH all_rows, size(all_rows) AS total_count\n"
2425
+ "UNWIND all_rows AS row\n"
2426
+ f"WITH row, total_count ORDER BY {order_clause}\n"
2427
+ "SKIP $offset LIMIT $limit\n"
2428
+ "RETURN total_count, collect(row) AS page\n"
2429
+ )
2430
+
2431
+ res = await session.run(cypher, **params)
2432
+ record = await res.single()
2433
+ if record is None:
2434
+ return PeopleListInternalResponse(
2435
+ total_count=0, has_more=False, items=[]
2436
+ )
2437
+
2438
+ total_count = int(record["total_count"] or 0)
2439
+ page = record["page"] or []
2440
+
2441
+ items: List[PeopleListEntry] = []
2442
+ for row in page:
2443
+ channels = [
2444
+ ChannelStatBucket(
2445
+ channel=ch.get("channel") or "unknown",
2446
+ count=int(ch.get("count") or 0),
2447
+ inbound=int(ch.get("inbound") or 0),
2448
+ outbound=int(ch.get("outbound") or 0),
2449
+ last_seen=str(ch.get("last_seen")) if ch.get("last_seen") else None,
2450
+ first_seen=str(ch.get("first_seen")) if ch.get("first_seen") else None,
2451
+ )
2452
+ for ch in (row.get("channels") or [])
2453
+ ]
2454
+ items.append(
2455
+ PeopleListEntry(
2456
+ person_email=row.get("person_email"),
2457
+ person_name=row.get("person_name"),
2458
+ total=int(row.get("total") or 0),
2459
+ inbound=int(row.get("inbound") or 0),
2460
+ outbound=int(row.get("outbound") or 0),
2461
+ last_seen=str(row.get("last_seen")) if row.get("last_seen") else None,
2462
+ first_seen=str(row.get("first_seen")) if row.get("first_seen") else None,
2463
+ channels=channels,
2464
+ )
2465
+ )
2466
+
2467
+ return PeopleListInternalResponse(
2468
+ total_count=total_count,
2469
+ has_more=(offset + len(items)) < total_count,
2470
+ items=items,
2471
+ )
2472
+
2473
+ except HTTPException:
2474
+ raise
2475
+ except Exception as e:
2476
+ log.error(f"people-list-internal failed: {e}")
2477
+ raise HTTPException(status_code=500, detail=f"people-list failed: {e}")
2478
+
2479
+
2203
2480
  @app.get("/index-internal-stats")
2204
2481
  async def index_internal_stats() -> dict:
2205
2482
  """Quick sanity check that the L0/L4-qmd/L3 stores are populated."""
@@ -352,3 +352,141 @@ def test_url_without_path_gets_provider_default(recorder):
352
352
  )
353
353
  client.embed_batch(["x"])
354
354
  assert recorder.calls[0]["url"] == "https://lambda-gateway.pentatonic.com/v1/embed"
355
+
356
+
357
+ # ----------------------------------------------------------------------
358
+ # Chunking — work around the Pentatonic AI Gateway's per-call cap of 5
359
+ # texts. Above the cap the gateway 502s; without chunking the layer's
360
+ # /index-batch handler raises, the compat shim swallows it, and vector
361
+ # writes silently drop. Chunking splits the request into chunks of
362
+ # `max_batch` so each call stays within the gateway's limit.
363
+ # ----------------------------------------------------------------------
364
+
365
+
366
+ class _PentatonicEchoStub:
367
+ """httpx.post replacement that returns one embedding per input text,
368
+ matching real gateway behaviour. Each response embedding encodes the
369
+ input index so tests can assert order preservation across chunks."""
370
+
371
+ def __init__(self):
372
+ self.calls: list[dict] = []
373
+ self._offset = 0 # running input-index counter across calls
374
+
375
+ def __call__(self, url, *, json, headers, timeout):
376
+ self.calls.append({"url": url, "json": json, "headers": headers, "timeout": timeout})
377
+ n = len(json.get("input") or [])
378
+ embs = [[float(self._offset + i)] for i in range(n)]
379
+ self._offset += n
380
+ return _FakeResponse(200, {"embeddings": embs})
381
+
382
+
383
+ def test_chunking_below_max_batch_makes_one_call(monkeypatch):
384
+ """N <= max_batch sends one request, no chunking overhead."""
385
+ stub = _PentatonicEchoStub()
386
+ monkeypatch.setattr(httpx, "post", stub)
387
+ client = EmbedClient(
388
+ url="https://lambda-gateway.pentatonic.com/v1/embed",
389
+ api_key="k", model="m",
390
+ provider=PROVIDERS["pentatonic-gateway"],
391
+ max_batch=5,
392
+ )
393
+ out = client.embed_batch([f"t{i}" for i in range(5)])
394
+ assert len(out) == 5
395
+ assert len(stub.calls) == 1
396
+ assert len(stub.calls[0]["json"]["input"]) == 5
397
+
398
+
399
+ def test_chunking_above_max_batch_splits_into_calls(monkeypatch):
400
+ """N > max_batch is split into len(N)/max_batch posts; results are
401
+ concatenated in input order so the caller can't tell."""
402
+ stub = _PentatonicEchoStub()
403
+ monkeypatch.setattr(httpx, "post", stub)
404
+ client = EmbedClient(
405
+ url="https://lambda-gateway.pentatonic.com/v1/embed",
406
+ api_key="k", model="m",
407
+ provider=PROVIDERS["pentatonic-gateway"],
408
+ max_batch=5,
409
+ )
410
+ out = client.embed_batch([f"t{i}" for i in range(12)])
411
+ # 12 texts → chunks of [5, 5, 2] → 3 calls
412
+ assert len(stub.calls) == 3
413
+ assert [len(c["json"]["input"]) for c in stub.calls] == [5, 5, 2]
414
+ # Stub returns one vector per input. Each vector encodes its
415
+ # cross-chunk input index → assert order preserved.
416
+ assert len(out) == 12
417
+ assert out == [[float(i)] for i in range(12)]
418
+
419
+
420
+ def test_chunking_disabled_with_max_batch_zero(monkeypatch):
421
+ """max_batch=0 means no chunking — old behaviour (one big call)."""
422
+ stub = _PentatonicEchoStub()
423
+ monkeypatch.setattr(httpx, "post", stub)
424
+ client = EmbedClient(
425
+ url="https://lambda-gateway.pentatonic.com/v1/embed",
426
+ api_key="k", model="m",
427
+ provider=PROVIDERS["pentatonic-gateway"],
428
+ max_batch=0,
429
+ )
430
+ client.embed_batch([f"t{i}" for i in range(20)])
431
+ assert len(stub.calls) == 1
432
+ assert len(stub.calls[0]["json"]["input"]) == 20
433
+
434
+
435
+ def test_chunking_propagates_first_error(recorder):
436
+ """If a chunk fails (e.g., gateway 502), the whole call raises with
437
+ the first error — matching the un-chunked semantics. We don't return
438
+ a partial vector list because the caller's downstream `for r, emb, txt
439
+ in zip(...)` loop would silently drop the failed records."""
440
+ # Pentatonic gateway 502 on every call (simulates the real bug)
441
+ recorder.respond(
442
+ "https://lambda-gateway.pentatonic.com/v1/embed",
443
+ _FakeResponse(502, "<html>...bad gateway...</html>"),
444
+ )
445
+ client = EmbedClient(
446
+ url="https://lambda-gateway.pentatonic.com/v1/embed",
447
+ api_key="k", model="m",
448
+ provider=PROVIDERS["pentatonic-gateway"],
449
+ max_batch=5,
450
+ )
451
+ with pytest.raises(EmbedHTTPError) as exc:
452
+ client.embed_batch([f"t{i}" for i in range(8)])
453
+ assert exc.value.status == 502
454
+
455
+
456
+ class _OpenAIEchoStub:
457
+ """OpenAI-shaped stub: returns one embedding per input as
458
+ {data: [{embedding: [...]}]}."""
459
+
460
+ def __init__(self):
461
+ self.calls: list[dict] = []
462
+
463
+ def __call__(self, url, *, json, headers, timeout):
464
+ self.calls.append({"url": url, "json": json, "headers": headers, "timeout": timeout})
465
+ n = len(json.get("input") or [])
466
+ return _FakeResponse(200, {"data": [{"embedding": [0.0]} for _ in range(n)]})
467
+
468
+
469
+ def test_from_env_reads_max_batch(monkeypatch):
470
+ """{prefix}EMBED_MAX_BATCH overrides the default of 5."""
471
+ monkeypatch.setenv("L4_NV_EMBED_URL", "https://gw/v1/embeddings")
472
+ monkeypatch.setenv("L4_EMBED_API_KEY", "k")
473
+ monkeypatch.setenv("L4_EMBED_MAX_BATCH", "3")
474
+ stub = _OpenAIEchoStub()
475
+ monkeypatch.setattr(httpx, "post", stub)
476
+ client = EmbedClient.from_env(prefix="L4_")
477
+ client.embed_batch([f"t{i}" for i in range(7)])
478
+ # 7 with chunk=3 → [3, 3, 1] → 3 calls
479
+ assert len(stub.calls) == 3
480
+ assert [len(c["json"]["input"]) for c in stub.calls] == [3, 3, 1]
481
+
482
+
483
+ def test_from_env_default_max_batch_is_five(monkeypatch):
484
+ """Default max_batch=5 matches the observed Pentatonic Gateway cap."""
485
+ monkeypatch.setenv("L4_NV_EMBED_URL", "https://gw/v1/embeddings")
486
+ monkeypatch.setenv("L4_EMBED_API_KEY", "k")
487
+ stub = _OpenAIEchoStub()
488
+ monkeypatch.setattr(httpx, "post", stub)
489
+ client = EmbedClient.from_env(prefix="L4_")
490
+ client.embed_batch([f"t{i}" for i in range(10)])
491
+ # 10 with default chunk=5 → [5, 5] → 2 calls
492
+ assert len(stub.calls) == 2
@@ -0,0 +1,379 @@
1
+ """Integration tests for the /people-list-internal endpoint.
2
+
3
+ Sister file to ``test_channel_stat_reader.py``: that one covers
4
+ ``aggregate_internal`` (per-person aggregate); this one covers
5
+ ``people_list_internal`` (corpus-level aggregate — one row per
6
+ Person across one or more arenas).
7
+
8
+ The endpoint backs the Pip Relationships UI list page. Where
9
+ ``aggregate_internal`` returns the per-channel breakdown FOR one
10
+ person, ``people_list_internal`` returns one row PER PERSON across
11
+ the whole arena set, with the per-channel breakdown nested.
12
+
13
+ Gated on NEO4J_TEST_URI + NEO4J_TEST_PASSWORD; skip cleanly when
14
+ those env vars are absent so unit-only test runs stay fast.
15
+
16
+ Run:
17
+
18
+ cd packages/memory-engine
19
+ NEO4J_TEST_URI=bolt://localhost:17687 \\
20
+ NEO4J_TEST_PASSWORD=testpassword \\
21
+ .venv/bin/python -m pytest tests/test_people_list_reader.py -v
22
+ """
23
+ from __future__ import annotations
24
+
25
+ import asyncio
26
+ import importlib.util
27
+ import os
28
+ import sys
29
+ import uuid
30
+ from pathlib import Path
31
+
32
+ import pytest
33
+
34
+
35
+ _NEO4J_URI = os.environ.get("NEO4J_TEST_URI")
36
+ _NEO4J_USER = os.environ.get("NEO4J_TEST_USER", "neo4j")
37
+ _NEO4J_PASSWORD = os.environ.get("NEO4J_TEST_PASSWORD")
38
+
39
+ _skip_no_neo4j = pytest.mark.skipif(
40
+ not (_NEO4J_URI and _NEO4J_PASSWORD),
41
+ reason="set NEO4J_TEST_URI + NEO4J_TEST_PASSWORD to run integration tests",
42
+ )
43
+
44
+
45
+ ENGINE_ROOT = Path(__file__).resolve().parent.parent / "engine" / "services" / "l2"
46
+ sys.path.insert(0, str(ENGINE_ROOT))
47
+
48
+
49
+ @pytest.fixture(scope="module")
50
+ def proxy_module():
51
+ """Mirror of the helper in test_channel_stat_reader.py — load
52
+ l2-hybridrag-proxy as a module so we can call the FastAPI handler
53
+ directly without HTTP. Override NEO4J_URI/NEO4J_AUTH at runtime
54
+ rather than at import time."""
55
+ spec = importlib.util.spec_from_file_location(
56
+ "l2_proxy_module",
57
+ ENGINE_ROOT / "l2-hybridrag-proxy.py",
58
+ )
59
+ assert spec and spec.loader
60
+ try:
61
+ mod = importlib.util.module_from_spec(spec)
62
+ spec.loader.exec_module(mod)
63
+ except ImportError:
64
+ pytest.skip("l2 proxy deps unavailable in this venv (fine for unit-only runs)")
65
+ mod.NEO4J_URI = _NEO4J_URI
66
+ mod.NEO4J_AUTH = (_NEO4J_USER, _NEO4J_PASSWORD)
67
+ return mod
68
+
69
+
70
+ @pytest.fixture
71
+ def neo4j_driver():
72
+ """Per-test driver + cleanup. Three arenas so multi-arena tests
73
+ have something to span across."""
74
+ from neo4j import GraphDatabase
75
+
76
+ driver = GraphDatabase.driver(_NEO4J_URI, auth=(_NEO4J_USER, _NEO4J_PASSWORD))
77
+ arenas = [
78
+ f"pl_a_{uuid.uuid4().hex[:8]}",
79
+ f"pl_b_{uuid.uuid4().hex[:8]}",
80
+ f"pl_c_{uuid.uuid4().hex[:8]}",
81
+ ]
82
+ yield driver, arenas
83
+ with driver.session() as session:
84
+ for arena in arenas:
85
+ session.run(
86
+ "MATCH (n) WHERE n.arena = $arena DETACH DELETE n",
87
+ arena=arena,
88
+ )
89
+ driver.close()
90
+
91
+
92
+ def _ensure_indexes(session) -> None:
93
+ """Idempotent index + constraint setup matching the writer block."""
94
+ session.run(
95
+ "CREATE INDEX channelstat_arena_email IF NOT EXISTS "
96
+ "FOR (s:ChannelStat) ON (s.arena, s.person_email)"
97
+ )
98
+ session.run(
99
+ "CREATE CONSTRAINT channelstat_unique IF NOT EXISTS "
100
+ "FOR (s:ChannelStat) REQUIRE (s.arena, s.person_email, s.channel) IS UNIQUE"
101
+ )
102
+
103
+
104
+ def _write_stat(
105
+ session,
106
+ arena: str,
107
+ email: str,
108
+ channel: str,
109
+ count: int = 1,
110
+ inbound: int = 0,
111
+ outbound: int = 0,
112
+ last_seen: str = "2026-05-10T00:00:00Z",
113
+ first_seen: str = "2024-01-01T00:00:00Z",
114
+ name: str | None = None,
115
+ ) -> None:
116
+ """Insert a ChannelStat node + matching Person (with optional
117
+ name). Skips the Chunk + COMMUNICATED edge — those aren't read
118
+ by ``people_list_internal`` since it reads the denorm directly."""
119
+ session.run(
120
+ """
121
+ MERGE (s:ChannelStat {arena: $arena, person_email: $email, channel: $channel})
122
+ SET s.count = $count,
123
+ s.inbound = $inbound,
124
+ s.outbound = $outbound,
125
+ s.last_seen = $last_seen,
126
+ s.first_seen = $first_seen
127
+ """,
128
+ arena=arena, email=email, channel=channel,
129
+ count=count, inbound=inbound, outbound=outbound,
130
+ last_seen=last_seen, first_seen=first_seen,
131
+ )
132
+ # Person node carries the display name. Email is the join key.
133
+ # OPTIONAL MATCH in the reader joins on email + arena.
134
+ session.run(
135
+ """
136
+ MERGE (p:Entity:Person {arena: $arena, email: $email})
137
+ SET p.name = $name
138
+ """,
139
+ arena=arena, email=email, name=name,
140
+ )
141
+
142
+
143
+ def _call_people_list(proxy_module, **kwargs):
144
+ """Invoke people_list_internal directly. Same shape as
145
+ _call_aggregate in the sister file."""
146
+ req = proxy_module.PeopleListInternalRequest(**kwargs)
147
+ return asyncio.run(proxy_module.people_list_internal(req))
148
+
149
+
150
+ # ---------------------------------------------------------------------------
151
+ # Single-arena basic behaviour.
152
+ # ---------------------------------------------------------------------------
153
+
154
+
155
+ @_skip_no_neo4j
156
+ def test_returns_one_row_per_person_with_channels_nested(
157
+ neo4j_driver, proxy_module
158
+ ) -> None:
159
+ """Three ChannelStats for two people in one arena → two list rows.
160
+ Channels collapse into the nested ``channels`` list per person."""
161
+ driver, (arena, _, _) = neo4j_driver
162
+ with driver.session() as session:
163
+ _ensure_indexes(session)
164
+ # Alex: email + slack
165
+ _write_stat(session, arena, "alex@x.io", "email", count=3, inbound=2, outbound=1,
166
+ last_seen="2026-05-10T00:00:00Z", name="Alex Tong")
167
+ _write_stat(session, arena, "alex@x.io", "slack", count=1, inbound=1, outbound=0,
168
+ last_seen="2026-05-08T00:00:00Z", name="Alex Tong")
169
+ # Bea: email only
170
+ _write_stat(session, arena, "bea@y.io", "email", count=5, inbound=5, outbound=0,
171
+ last_seen="2026-05-09T00:00:00Z", name="Bea Chen")
172
+
173
+ out = _call_people_list(proxy_module, arenas=[arena])
174
+ assert out.total_count == 2
175
+ assert out.has_more is False
176
+ emails = sorted(item.person_email for item in out.items)
177
+ assert emails == ["alex@x.io", "bea@y.io"]
178
+ alex = next(item for item in out.items if item.person_email == "alex@x.io")
179
+ assert alex.person_name == "Alex Tong"
180
+ assert alex.total == 4 # 3 email + 1 slack
181
+ assert alex.inbound == 3
182
+ assert alex.outbound == 1
183
+ assert alex.last_seen == "2026-05-10T00:00:00Z"
184
+ assert {ch.channel for ch in alex.channels} == {"email", "slack"}
185
+
186
+
187
+ @_skip_no_neo4j
188
+ def test_default_order_is_last_seen_desc(neo4j_driver, proxy_module) -> None:
189
+ """Default sort: most-recently-active person first. Backs the
190
+ Relationships UI's default landing view."""
191
+ driver, (arena, _, _) = neo4j_driver
192
+ with driver.session() as session:
193
+ _ensure_indexes(session)
194
+ _write_stat(session, arena, "old@x.io", "email", last_seen="2025-01-01T00:00:00Z")
195
+ _write_stat(session, arena, "new@x.io", "email", last_seen="2026-05-12T00:00:00Z")
196
+ _write_stat(session, arena, "mid@x.io", "email", last_seen="2026-01-01T00:00:00Z")
197
+
198
+ out = _call_people_list(proxy_module, arenas=[arena])
199
+ assert [i.person_email for i in out.items] == ["new@x.io", "mid@x.io", "old@x.io"]
200
+
201
+
202
+ # ---------------------------------------------------------------------------
203
+ # Multi-arena behaviour.
204
+ # ---------------------------------------------------------------------------
205
+
206
+
207
+ @_skip_no_neo4j
208
+ def test_multi_arena_returns_persons_from_both_arenas(
209
+ neo4j_driver, proxy_module
210
+ ) -> None:
211
+ """A vendor who appears in arena A AND arena B should be one row
212
+ with both arenas' channel data. Backs the "people known by Philip
213
+ OR Jeanne" use case."""
214
+ driver, (arena_a, arena_b, _) = neo4j_driver
215
+ with driver.session() as session:
216
+ _ensure_indexes(session)
217
+ # Same vendor in both arenas
218
+ _write_stat(session, arena_a, "vendor@v.io", "email", count=2,
219
+ last_seen="2026-05-10T00:00:00Z", name="Vendor Co")
220
+ _write_stat(session, arena_b, "vendor@v.io", "slack", count=3,
221
+ last_seen="2026-05-11T00:00:00Z", name="Vendor Co")
222
+ # Unique-to-A person
223
+ _write_stat(session, arena_a, "only@a.io", "email", last_seen="2026-05-09T00:00:00Z")
224
+
225
+ out = _call_people_list(proxy_module, arenas=[arena_a, arena_b])
226
+ emails = sorted(i.person_email for i in out.items)
227
+ assert emails == ["only@a.io", "vendor@v.io"]
228
+ vendor = next(i for i in out.items if i.person_email == "vendor@v.io")
229
+ # Total across both arenas
230
+ assert vendor.total == 5
231
+ # Both channels surface
232
+ assert {ch.channel for ch in vendor.channels} == {"email", "slack"}
233
+ # last_seen is the max across arenas
234
+ assert vendor.last_seen == "2026-05-11T00:00:00Z"
235
+
236
+
237
+ @_skip_no_neo4j
238
+ def test_arena_filter_excludes_other_arenas(neo4j_driver, proxy_module) -> None:
239
+ """A person in arena C must NOT appear when only A+B are requested."""
240
+ driver, (arena_a, arena_b, arena_c) = neo4j_driver
241
+ with driver.session() as session:
242
+ _ensure_indexes(session)
243
+ _write_stat(session, arena_a, "a-only@x.io", "email")
244
+ _write_stat(session, arena_c, "c-only@x.io", "email")
245
+
246
+ out = _call_people_list(proxy_module, arenas=[arena_a, arena_b])
247
+ emails = {i.person_email for i in out.items}
248
+ assert "a-only@x.io" in emails
249
+ assert "c-only@x.io" not in emails
250
+
251
+
252
+ # ---------------------------------------------------------------------------
253
+ # Filters.
254
+ # ---------------------------------------------------------------------------
255
+
256
+
257
+ @_skip_no_neo4j
258
+ def test_emails_filter_restricts_to_listed_addresses(
259
+ neo4j_driver, proxy_module
260
+ ) -> None:
261
+ """``emails`` is the batched-mode filter — used by Pip's nightly
262
+ health-recompute to fetch facets for many specific people in one
263
+ call. Cuts 8k×9 SQL queries to ~9 GraphQL calls."""
264
+ driver, (arena, _, _) = neo4j_driver
265
+ with driver.session() as session:
266
+ _ensure_indexes(session)
267
+ _write_stat(session, arena, "alex@x.io", "email")
268
+ _write_stat(session, arena, "bea@y.io", "email")
269
+ _write_stat(session, arena, "carl@z.io", "email")
270
+
271
+ out = _call_people_list(
272
+ proxy_module,
273
+ arenas=[arena],
274
+ emails=["alex@x.io", "carl@z.io"],
275
+ )
276
+ emails = sorted(i.person_email for i in out.items)
277
+ assert emails == ["alex@x.io", "carl@z.io"]
278
+
279
+
280
+ @_skip_no_neo4j
281
+ def test_search_substring_matches_email_or_name(
282
+ neo4j_driver, proxy_module
283
+ ) -> None:
284
+ """Search is case-insensitive substring on person_name and
285
+ person_email. Backs the Relationships UI search box."""
286
+ driver, (arena, _, _) = neo4j_driver
287
+ with driver.session() as session:
288
+ _ensure_indexes(session)
289
+ _write_stat(session, arena, "alex@pentatonic.com", "email", name="Alex Tong")
290
+ _write_stat(session, arena, "bea@pentatonic.com", "email", name="Bea Chen")
291
+ _write_stat(session, arena, "carl@external.com", "email", name="Carl X")
292
+
293
+ # Search by name fragment
294
+ out = _call_people_list(proxy_module, arenas=[arena], search="alex")
295
+ assert {i.person_email for i in out.items} == {"alex@pentatonic.com"}
296
+
297
+ # Search by email-domain fragment matches everyone at pentatonic
298
+ out = _call_people_list(proxy_module, arenas=[arena], search="pentatonic")
299
+ assert {i.person_email for i in out.items} == {
300
+ "alex@pentatonic.com", "bea@pentatonic.com",
301
+ }
302
+
303
+
304
+ # ---------------------------------------------------------------------------
305
+ # Pagination.
306
+ # ---------------------------------------------------------------------------
307
+
308
+
309
+ @_skip_no_neo4j
310
+ def test_pagination_limit_and_offset(neo4j_driver, proxy_module) -> None:
311
+ """`limit` slices the page; `total_count` is the unfiltered count
312
+ BEFORE pagination so the UI can render "Showing N of M"."""
313
+ driver, (arena, _, _) = neo4j_driver
314
+ with driver.session() as session:
315
+ _ensure_indexes(session)
316
+ # 5 people, last_seen ascending so a desc sort puts e first
317
+ for i, letter in enumerate("abcde"):
318
+ _write_stat(
319
+ session, arena, f"{letter}@x.io", "email",
320
+ last_seen=f"2026-05-{10 + i:02d}T00:00:00Z",
321
+ )
322
+
323
+ page1 = _call_people_list(proxy_module, arenas=[arena], limit=2, offset=0)
324
+ page2 = _call_people_list(proxy_module, arenas=[arena], limit=2, offset=2)
325
+ page3 = _call_people_list(proxy_module, arenas=[arena], limit=2, offset=4)
326
+ assert page1.total_count == 5 == page2.total_count == page3.total_count
327
+ assert page1.has_more is True
328
+ assert page2.has_more is True
329
+ assert page3.has_more is False
330
+ assert [i.person_email for i in page1.items] == ["e@x.io", "d@x.io"]
331
+ assert [i.person_email for i in page2.items] == ["c@x.io", "b@x.io"]
332
+ assert [i.person_email for i in page3.items] == ["a@x.io"]
333
+
334
+
335
+ @_skip_no_neo4j
336
+ def test_order_by_total_desc(neo4j_driver, proxy_module) -> None:
337
+ driver, (arena, _, _) = neo4j_driver
338
+ with driver.session() as session:
339
+ _ensure_indexes(session)
340
+ _write_stat(session, arena, "many@x.io", "email", count=100)
341
+ _write_stat(session, arena, "few@x.io", "email", count=5)
342
+ _write_stat(session, arena, "mid@x.io", "email", count=50)
343
+
344
+ out = _call_people_list(proxy_module, arenas=[arena], order_by="total_desc")
345
+ assert [i.person_email for i in out.items] == [
346
+ "many@x.io", "mid@x.io", "few@x.io",
347
+ ]
348
+
349
+
350
+ # ---------------------------------------------------------------------------
351
+ # Validation.
352
+ # ---------------------------------------------------------------------------
353
+
354
+
355
+ @_skip_no_neo4j
356
+ def test_empty_arenas_list_rejected(neo4j_driver, proxy_module) -> None:
357
+ """An empty arenas list should 400, not silently return everything.
358
+ Multi-tenant safety: a missing/empty filter must not become an
359
+ 'all tenants' query."""
360
+ from fastapi import HTTPException
361
+
362
+ with pytest.raises(HTTPException) as exc:
363
+ _call_people_list(proxy_module, arenas=[])
364
+ assert exc.value.status_code == 400
365
+
366
+
367
+ @_skip_no_neo4j
368
+ def test_unknown_order_by_rejected(neo4j_driver, proxy_module) -> None:
369
+ """Whitelisted sort keys — anything else 400s. Belt-and-braces
370
+ against ORDER BY templating becoming an injection vector."""
371
+ from fastapi import HTTPException
372
+
373
+ driver, (arena, _, _) = neo4j_driver
374
+ with pytest.raises(HTTPException) as exc:
375
+ _call_people_list(
376
+ proxy_module, arenas=[arena],
377
+ order_by="totally_made_up",
378
+ )
379
+ assert exc.value.status_code == 400