@pentatonic-ai/ai-agent-sdk 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env python3
2
+ """Wipe pre-arena-scoping :Entity nodes from the L3 Neo4j graph.
3
+
4
+ Run this AFTER the engine has been deployed with the arena-scoped
5
+ writer paths, not before. Sequence:
6
+
7
+ 1. Deploy l2-hybridrag-proxy with arena-scoped MERGE patterns.
8
+ 2. Verify new ingest is creating arena-tagged entities (run
9
+ --dry-run on this script first; it reports legacy vs new).
10
+ 3. Run this script with --confirm to wipe legacy entities.
11
+ 4. Future ingest re-extracts entities from existing :Chunk nodes
12
+ on-demand (search-side touches them; new stores recreate them
13
+ from scratch under the right arena).
14
+
15
+ Why wipe vs. backfill: pre-fix entities collapsed cross-tenant by name,
16
+ so their MENTIONS edges connect to chunks across multiple arenas.
17
+ Splitting them by mentions is doable but error-prone (edge cases for
18
+ many-arena entities, orphans, no-mention entities). The Hebbian weights
19
+ on those edges were also corrupted by cross-tenant traffic, so they
20
+ weren't worth saving. Chunks are preserved either way — re-extraction
21
+ is cheap.
22
+
23
+ Usage:
24
+ # report counts only
25
+ python wipe-legacy-l3-entities.py --neo4j-uri bolt://localhost:7687
26
+
27
+ # actually wipe
28
+ python wipe-legacy-l3-entities.py --neo4j-uri bolt://localhost:7687 --confirm
29
+
30
+ The script is idempotent — running it twice on a clean graph deletes
31
+ zero rows.
32
+ """
33
+ from __future__ import annotations
34
+
35
+ import argparse
36
+ import os
37
+ import sys
38
+
39
+ try:
40
+ from neo4j import GraphDatabase
41
+ except ImportError:
42
+ print("ERROR: neo4j driver not installed. `pip install neo4j` first.", file=sys.stderr)
43
+ sys.exit(1)
44
+
45
+
46
+ def main() -> int:
47
+ parser = argparse.ArgumentParser(description=__doc__.split("\n\n")[0])
48
+ parser.add_argument(
49
+ "--neo4j-uri",
50
+ default=os.environ.get("NEO4J_URI", "bolt://localhost:7687"),
51
+ help="Neo4j bolt URI (default: NEO4J_URI env or bolt://localhost:7687)",
52
+ )
53
+ parser.add_argument(
54
+ "--neo4j-user",
55
+ default=os.environ.get("NEO4J_USER", "neo4j"),
56
+ )
57
+ parser.add_argument(
58
+ "--neo4j-password",
59
+ default=os.environ.get("NEO4J_PASSWORD"),
60
+ )
61
+ parser.add_argument(
62
+ "--confirm",
63
+ action="store_true",
64
+ help="Actually delete. Without this flag, runs in dry-run mode "
65
+ "(reports counts only).",
66
+ )
67
+ args = parser.parse_args()
68
+
69
+ if not args.neo4j_password:
70
+ print("ERROR: --neo4j-password or NEO4J_PASSWORD env required", file=sys.stderr)
71
+ return 2
72
+
73
+ driver = GraphDatabase.driver(
74
+ args.neo4j_uri, auth=(args.neo4j_user, args.neo4j_password),
75
+ )
76
+ try:
77
+ with driver.session() as session:
78
+ # Count legacy vs arena-scoped entities so the operator can
79
+ # eyeball whether the new writer path has actually started
80
+ # producing arena-scoped rows before deleting anything.
81
+ legacy = session.run(
82
+ "MATCH (e:Entity) WHERE e.arena IS NULL RETURN count(e) AS n"
83
+ ).single()["n"]
84
+ scoped = session.run(
85
+ "MATCH (e:Entity) WHERE e.arena IS NOT NULL RETURN count(e) AS n"
86
+ ).single()["n"]
87
+ print(f"L3 Entity nodes: {legacy} legacy (no arena) / {scoped} arena-scoped")
88
+
89
+ # Same breakdown for chunks — they should already all be
90
+ # arena-tagged but worth verifying before/after.
91
+ chunk_legacy = session.run(
92
+ "MATCH (c:Chunk) WHERE c.arena IS NULL RETURN count(c) AS n"
93
+ ).single()["n"]
94
+ chunk_scoped = session.run(
95
+ "MATCH (c:Chunk) WHERE c.arena IS NOT NULL RETURN count(c) AS n"
96
+ ).single()["n"]
97
+ print(f"L3 Chunk nodes: {chunk_legacy} legacy (no arena) / {chunk_scoped} arena-scoped")
98
+
99
+ if not args.confirm:
100
+ print("\nDry run — pass --confirm to actually delete legacy entities.")
101
+ return 0
102
+
103
+ if legacy == 0:
104
+ print("\nNothing to do — all entities already arena-scoped.")
105
+ return 0
106
+
107
+ print(f"\nWiping {legacy} legacy entities…")
108
+ result = session.run(
109
+ "MATCH (e:Entity) WHERE e.arena IS NULL DETACH DELETE e RETURN count(e) AS n"
110
+ ).single()
111
+ deleted = result["n"]
112
+ print(f"Deleted {deleted} legacy entities.")
113
+
114
+ # Drop the old single-key entity index now that the data
115
+ # is gone — safe to do because the new writer path uses
116
+ # the compound (arena, name) index.
117
+ try:
118
+ session.run("DROP INDEX entity_name IF EXISTS")
119
+ print("Dropped legacy `entity_name` index.")
120
+ except Exception as e: # noqa: BLE001
121
+ print(f"Note: could not drop entity_name index ({e}); ok if absent.")
122
+ return 0
123
+ finally:
124
+ driver.close()
125
+
126
+
127
+ if __name__ == "__main__":
128
+ sys.exit(main())
@@ -217,16 +217,40 @@ print("yes" if any("Eclipse" in r.get("content","") for r in data) else "no")')
217
217
  || fail "tenant-y lost the shared phrase (id collision?)"
218
218
 
219
219
  # ---------------------------------------------------------------------------
220
- # /forget — by metadata_contains. Cleans up so reruns are idempotent.
220
+ # /forget — tenant-scoped delete by arena.
221
+ #
222
+ # Pre-arena, /forget would unconditionally trigger a global wipe of
223
+ # L0+L4+L3 inside the shim, regardless of what filters were passed. The
224
+ # smoke test happily asserted "deleted >= 1" because the shim returned
225
+ # the count of every row globally, even rows the caller didn't ask to
226
+ # remove. That was the right thing for a bench reset, the wrong thing
227
+ # for any real tenant operation.
228
+ #
229
+ # Now /forget honours the arena in the metadata filter:
230
+ # - {metadata_contains: {arena: <tenant>}} → tenant-scoped wipe
231
+ # - {metadata_contains: {<other>: <val>}} → L6 targeted only
232
+ # - {} (empty body) → bench reset (gated
233
+ # by GLOBAL_WIPE in the
234
+ # internal endpoint)
235
+ #
236
+ # We assert here that wiping arena=e2e-tenant-a actually removes ≥1 row,
237
+ # AND that arena=e2e-tenant-b's rows survived — proves real tenant
238
+ # isolation rather than just "shim returned non-zero".
221
239
  # ---------------------------------------------------------------------------
222
240
 
223
241
  echo ""
224
- echo "=== /forget probe=e2e-arena ==="
242
+ echo "=== /forget arena=e2e-tenant-a ==="
225
243
  F=$(curl -sf -X POST "$BASE/forget" -H "Content-Type: application/json" \
226
- -d '{"metadata_contains":{"probe":"e2e-arena"}}')
244
+ -d '{"metadata_contains":{"arena":"e2e-tenant-a"}}')
227
245
  deleted=$(echo "$F" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("deleted",0))')
228
246
  echo " deleted: $deleted"
229
- [ "$deleted" -ge "1" ] && ok "/forget removed at least 1 row" || fail "/forget"
247
+ [ "$deleted" -ge "1" ] && ok "/forget removed tenant-a rows" || fail "/forget tenant-a wipe"
248
+
249
+ # tenant-b should still have its rows.
250
+ B=$(curl -sf -X POST "$BASE/search" -H "Content-Type: application/json" \
251
+ -d '{"arena":"e2e-tenant-b","query":"Borealis","limit":4}')
252
+ b_hits=$(echo "$B" | python3 -c 'import json,sys; print(len(json.load(sys.stdin).get("results",[])))')
253
+ [ "$b_hits" -ge "1" ] && ok "/forget left tenant-b intact" || fail "/forget leaked into tenant-b"
230
254
 
231
255
  echo ""
232
256
  echo "=== Result ==="
@@ -0,0 +1,333 @@
1
+ """Tests for the /aggregate endpoint and its L2 proxy backend.
2
+
3
+ Two flavours:
4
+
5
+ - Pure-unit tests around the request validation + group_by
6
+ whitelisting logic. Hermetic, fast, no Neo4j needed. Run on
7
+ every PR.
8
+
9
+ - Neo4j-backed integration tests (run when ``NEO4J_TEST_URI`` and
10
+ ``NEO4J_TEST_PASSWORD`` env vars are set). These prove the
11
+ aggregate Cypher actually returns the right buckets for typed-
12
+ Person + COMMUNICATED graphs and stays inside the caller's
13
+ arena.
14
+
15
+ Run:
16
+
17
+ cd packages/memory-engine
18
+ .venv/bin/python -m pytest tests/test_aggregate.py -v
19
+
20
+ Run with Neo4j:
21
+
22
+ NEO4J_TEST_URI=bolt://localhost:17687 \\
23
+ NEO4J_TEST_PASSWORD=testpassword \\
24
+ .venv/bin/python -m pytest tests/test_aggregate.py -v
25
+ """
26
+ from __future__ import annotations
27
+
28
+ import os
29
+ import sys
30
+ import uuid
31
+ from pathlib import Path
32
+
33
+ import pytest
34
+
35
+
36
+ ENGINE_ROOT = Path(__file__).resolve().parent.parent / "engine" / "services" / "l2"
37
+ sys.path.insert(0, str(ENGINE_ROOT))
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Unit tests — validation surface around the public shape.
42
+ # ---------------------------------------------------------------------------
43
+
44
+
45
+ def test_group_by_keys_whitelist_keeps_supported_keys_in_order() -> None:
46
+ """The L2 proxy templates group_by keys directly into Cypher; the
47
+ whitelist is the safety rail. Pin its contents + ordering
48
+ behaviour so a future change can't accidentally accept arbitrary
49
+ property names."""
50
+ # Import lazily so the unit tests don't pull pymilvus etc.
51
+ import importlib.util
52
+
53
+ spec = importlib.util.spec_from_file_location(
54
+ "l2_proxy_module",
55
+ ENGINE_ROOT / "l2-hybridrag-proxy.py",
56
+ )
57
+ assert spec and spec.loader
58
+ # The module imports fastapi/neo4j/etc. at import time. Skip when
59
+ # those aren't available — the whitelist is also asserted via the
60
+ # integration tests.
61
+ try:
62
+ mod = importlib.util.module_from_spec(spec)
63
+ spec.loader.exec_module(mod)
64
+ except ImportError:
65
+ pytest.skip("l2 proxy deps unavailable in this venv (fine for unit-only runs)")
66
+
67
+ # Public contract: only channel + direction are supported today.
68
+ # Adding more is a deliberate decision; this assertion is a guard
69
+ # against adding without thinking.
70
+ assert mod._AGGREGATE_GROUP_BY_KEYS == {"channel", "direction"}
71
+
72
+
73
+ # ---------------------------------------------------------------------------
74
+ # Neo4j-backed integration tests.
75
+ # ---------------------------------------------------------------------------
76
+
77
+
78
+ _NEO4J_URI = os.environ.get("NEO4J_TEST_URI")
79
+ _NEO4J_USER = os.environ.get("NEO4J_TEST_USER", "neo4j")
80
+ _NEO4J_PASSWORD = os.environ.get("NEO4J_TEST_PASSWORD")
81
+
82
+ _skip_no_neo4j = pytest.mark.skipif(
83
+ not (_NEO4J_URI and _NEO4J_PASSWORD),
84
+ reason="set NEO4J_TEST_URI + NEO4J_TEST_PASSWORD to run integration tests",
85
+ )
86
+
87
+
88
+ @pytest.fixture
89
+ def neo4j_driver():
90
+ """Open a Neo4j driver and clean test data on teardown.
91
+
92
+ Uses a randomised arena pair so concurrent test runs don't trample
93
+ each other; tears down by deleting nodes scoped to those arenas
94
+ (never a global wipe — must stay safe against a populated dev
95
+ database).
96
+ """
97
+ from neo4j import GraphDatabase
98
+
99
+ driver = GraphDatabase.driver(_NEO4J_URI, auth=(_NEO4J_USER, _NEO4J_PASSWORD))
100
+ arenas = [f"agg_a_{uuid.uuid4().hex[:8]}", f"agg_b_{uuid.uuid4().hex[:8]}"]
101
+ yield driver, arenas
102
+ with driver.session() as session:
103
+ for arena in arenas:
104
+ session.run(
105
+ "MATCH (n) WHERE n.arena = $arena DETACH DELETE n",
106
+ arena=arena,
107
+ )
108
+ driver.close()
109
+
110
+
111
+ def _seed_person_communications(
112
+ session,
113
+ arena: str,
114
+ email: str,
115
+ edges: list[tuple[str, str, str]],
116
+ ) -> None:
117
+ """Materialise (:Person {arena, email})-[:COMMUNICATED {channel,
118
+ direction, occurred_at}]->(:Chunk {arena}) edges from a list of
119
+ (channel, direction, occurred_at) tuples. Mirrors the exact shape
120
+ that engine/services/l2/l2-hybridrag-proxy.py:_index-internal-batch
121
+ writes on STORE_MEMORY ingest."""
122
+ for i, (channel, direction, occurred_at) in enumerate(edges):
123
+ session.run(
124
+ """
125
+ MERGE (c:Chunk {id: $cid})
126
+ SET c.arena = $arena, c.text = 't', c.path = 'p',
127
+ c.created_at = $occurred_at
128
+ MERGE (p:Entity:Person {arena: $arena, email: $email})
129
+ ON CREATE SET p.created_at = $occurred_at
130
+ MERGE (p)-[r:COMMUNICATED]->(c)
131
+ ON CREATE SET r.channel = $channel,
132
+ r.direction = $direction,
133
+ r.occurred_at = $occurred_at,
134
+ r.weight = 1.0
135
+ """,
136
+ cid=f"chunk_{arena}_{i}", arena=arena, email=email,
137
+ channel=channel, direction=direction, occurred_at=occurred_at,
138
+ )
139
+
140
+
141
+ def _aggregate_request(
142
+ session,
143
+ arena: str,
144
+ contact_email: str,
145
+ group_by: list[str] | None = None,
146
+ ) -> dict:
147
+ """Run the same Cypher that /aggregate-internal would. Built
148
+ inline so tests stay free of FastAPI plumbing."""
149
+ # Mirror the real endpoint: an explicit empty list means "no
150
+ # group_by" (one global bucket), None means default to channel.
151
+ requested = ["channel"] if group_by is None else group_by
152
+ safe_group_by = []
153
+ seen = set()
154
+ for k in requested:
155
+ if k in {"channel", "direction"} and k not in seen:
156
+ seen.add(k)
157
+ safe_group_by.append(k)
158
+
159
+ if safe_group_by:
160
+ with_keys = ", ".join(f"r.{k} AS {k}" for k in safe_group_by)
161
+ return_keys = ", ".join(safe_group_by)
162
+ cypher = (
163
+ "MATCH (p:Person {arena: $arena})-[r:COMMUNICATED]->(c:Chunk {arena: $arena})\n"
164
+ "WHERE p.email = $contact_email\n"
165
+ f"WITH {with_keys}, r.direction AS _direction, r.occurred_at AS _occurred_at\n"
166
+ f"RETURN {return_keys},\n"
167
+ "count(*) AS count,\n"
168
+ "sum(CASE WHEN _direction = 'inbound' THEN 1 ELSE 0 END) AS inbound,\n"
169
+ "sum(CASE WHEN _direction = 'outbound' THEN 1 ELSE 0 END) AS outbound,\n"
170
+ "max(_occurred_at) AS last_seen,\n"
171
+ "min(_occurred_at) AS first_seen\n"
172
+ "ORDER BY count DESC\n"
173
+ )
174
+ else:
175
+ cypher = (
176
+ "MATCH (p:Person {arena: $arena})-[r:COMMUNICATED]->(c:Chunk {arena: $arena})\n"
177
+ "WHERE p.email = $contact_email\n"
178
+ "WITH r.direction AS _direction, r.occurred_at AS _occurred_at\n"
179
+ "RETURN count(*) AS count,\n"
180
+ "sum(CASE WHEN _direction = 'inbound' THEN 1 ELSE 0 END) AS inbound,\n"
181
+ "sum(CASE WHEN _direction = 'outbound' THEN 1 ELSE 0 END) AS outbound,\n"
182
+ "max(_occurred_at) AS last_seen,\n"
183
+ "min(_occurred_at) AS first_seen\n"
184
+ )
185
+
186
+ buckets = []
187
+ total = 0
188
+ latest = None
189
+ for rec in session.run(cypher, arena=arena, contact_email=contact_email):
190
+ count = int(rec["count"] or 0)
191
+ total += count
192
+ last_seen = rec["last_seen"]
193
+ if last_seen and (latest is None or str(last_seen) > latest):
194
+ latest = str(last_seen)
195
+ keys = {k: rec[k] for k in safe_group_by} if safe_group_by else {}
196
+ buckets.append({
197
+ "keys": keys,
198
+ "count": count,
199
+ "inbound": int(rec["inbound"] or 0),
200
+ "outbound": int(rec["outbound"] or 0),
201
+ "last_seen": str(last_seen) if last_seen else None,
202
+ "first_seen": str(rec["first_seen"]) if rec["first_seen"] else None,
203
+ })
204
+ return {"arena": arena, "total": total, "last_seen": latest, "buckets": buckets}
205
+
206
+
207
+ @_skip_no_neo4j
208
+ def test_aggregate_groups_by_channel_with_direction_counters(neo4j_driver) -> None:
209
+ """5 emails (3 inbound, 2 outbound) + 2 slack (1/1) yields two
210
+ buckets ordered by count, each with correct inbound/outbound
211
+ splits and time bounds."""
212
+ driver, (arena_a, _) = neo4j_driver
213
+ email = "alex.tong@pentatonic.com"
214
+
215
+ with driver.session() as session:
216
+ _seed_person_communications(
217
+ session, arena_a, email,
218
+ [
219
+ ("email", "inbound", "2026-05-08T09:00:00Z"),
220
+ ("email", "outbound", "2026-05-09T10:00:00Z"),
221
+ ("email", "inbound", "2026-05-07T08:00:00Z"),
222
+ ("email", "outbound", "2026-05-09T11:00:00Z"),
223
+ ("email", "inbound", "2026-05-06T12:00:00Z"),
224
+ ("slack", "inbound", "2026-05-09T15:00:00Z"),
225
+ ("slack", "outbound", "2026-05-08T11:00:00Z"),
226
+ ],
227
+ )
228
+
229
+ out = _aggregate_request(session, arena_a, email, ["channel"])
230
+ assert out["total"] == 7
231
+ assert out["last_seen"] == "2026-05-09T15:00:00Z"
232
+ assert len(out["buckets"]) == 2
233
+
234
+ # Busiest first (5 > 2).
235
+ assert out["buckets"][0]["keys"] == {"channel": "email"}
236
+ assert out["buckets"][0]["count"] == 5
237
+ assert out["buckets"][0]["inbound"] == 3
238
+ assert out["buckets"][0]["outbound"] == 2
239
+
240
+ assert out["buckets"][1]["keys"] == {"channel": "slack"}
241
+ assert out["buckets"][1]["count"] == 2
242
+ assert out["buckets"][1]["inbound"] == 1
243
+ assert out["buckets"][1]["outbound"] == 1
244
+
245
+
246
+ @_skip_no_neo4j
247
+ def test_aggregate_arena_isolation(neo4j_driver) -> None:
248
+ """A's aggregate never sees B's edges, even when both arenas have
249
+ a Person with the same email — the shared name was the bug
250
+ pattern that motivated the typed-Person work in #28."""
251
+ driver, (arena_a, arena_b) = neo4j_driver
252
+ email = "shared@example.com"
253
+
254
+ with driver.session() as session:
255
+ _seed_person_communications(
256
+ session, arena_a, email,
257
+ [("email", "inbound", "2026-05-09T09:00:00Z")],
258
+ )
259
+ _seed_person_communications(
260
+ session, arena_b, email,
261
+ [
262
+ ("email", "inbound", "2026-05-09T10:00:00Z"),
263
+ ("slack", "outbound", "2026-05-09T11:00:00Z"),
264
+ ],
265
+ )
266
+
267
+ out_a = _aggregate_request(session, arena_a, email)
268
+ assert out_a["total"] == 1
269
+ assert len(out_a["buckets"]) == 1
270
+
271
+ out_b = _aggregate_request(session, arena_b, email)
272
+ assert out_b["total"] == 2
273
+ assert len(out_b["buckets"]) == 2
274
+
275
+
276
+ @_skip_no_neo4j
277
+ def test_aggregate_with_no_group_by_returns_single_bucket(neo4j_driver) -> None:
278
+ """No group_by → one global bucket with overall totals only."""
279
+ driver, (arena_a, _) = neo4j_driver
280
+ email = "alex@x.io"
281
+
282
+ with driver.session() as session:
283
+ _seed_person_communications(
284
+ session, arena_a, email,
285
+ [
286
+ ("email", "inbound", "2026-05-09T09:00:00Z"),
287
+ ("slack", "outbound", "2026-05-09T11:00:00Z"),
288
+ ("email", "outbound", "2026-05-09T15:00:00Z"),
289
+ ],
290
+ )
291
+
292
+ out = _aggregate_request(session, arena_a, email, group_by=[])
293
+ assert out["total"] == 3
294
+ assert len(out["buckets"]) == 1
295
+ assert out["buckets"][0]["keys"] == {}
296
+ assert out["buckets"][0]["inbound"] == 1
297
+ assert out["buckets"][0]["outbound"] == 2
298
+
299
+
300
+ @_skip_no_neo4j
301
+ def test_aggregate_returns_empty_when_person_node_missing(neo4j_driver) -> None:
302
+ """When the typed-Person nodes don't exist for this contact yet
303
+ (older memories, tenants pre-#28), the response is total: 0 with
304
+ no buckets — caller falls back to whatever it had before. This is
305
+ the deliberate non-fallback at this layer; the over-fetch v1
306
+ lives in TES."""
307
+ driver, (arena_a, _) = neo4j_driver
308
+ out = _aggregate_request(
309
+ driver.session().__enter__(), arena_a, "no-one@example.com",
310
+ )
311
+ assert out["total"] == 0
312
+ assert out["buckets"] == []
313
+
314
+
315
+ @_skip_no_neo4j
316
+ def test_aggregate_unknown_group_by_keys_are_silently_dropped(neo4j_driver) -> None:
317
+ """Defence-in-depth: even if a future caller passes
318
+ group_by=['evil_property_name'], we never template that string
319
+ into Cypher. Aggregate falls back to no group_by (one global
320
+ bucket) rather than failing — the whitelist is already on the
321
+ real endpoint, but the test pins the behaviour at the helper
322
+ level too."""
323
+ driver, (arena_a, _) = neo4j_driver
324
+ email = "alex@x.io"
325
+ with driver.session() as session:
326
+ _seed_person_communications(
327
+ session, arena_a, email,
328
+ [("email", "inbound", "2026-05-09T09:00:00Z")],
329
+ )
330
+ out = _aggregate_request(session, arena_a, email, ["evil; DROP TABLE"])
331
+ # No supported keys remained → single global bucket.
332
+ assert len(out["buckets"]) == 1
333
+ assert out["buckets"][0]["keys"] == {}