@pentatonic-ai/ai-agent-sdk 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,308 @@
1
+ """Integration tests for ChannelStat rollup writes + reader fast-path.
2
+
3
+ These exercise the actual Cypher running against a live Neo4j: the
4
+ writer block in /index-internal-batch maintains (:ChannelStat) nodes
5
+ on every store with contact_email metadata, and /aggregate-internal
6
+ reads from those nodes when present (falling back to the edge walk
7
+ when stats haven't been populated yet).
8
+
9
+ Gated on NEO4J_TEST_URI + NEO4J_TEST_PASSWORD; skip cleanly when
10
+ those env vars are absent so unit-only test runs stay fast.
11
+
12
+ Run:
13
+
14
+ cd packages/memory-engine
15
+ NEO4J_TEST_URI=bolt://localhost:17687 \\
16
+ NEO4J_TEST_PASSWORD=testpassword \\
17
+ .venv/bin/python -m pytest tests/test_channel_stat_rollups.py -v
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import os
22
+ import uuid
23
+
24
+ import pytest
25
+
26
+
27
+ _NEO4J_URI = os.environ.get("NEO4J_TEST_URI")
28
+ _NEO4J_USER = os.environ.get("NEO4J_TEST_USER", "neo4j")
29
+ _NEO4J_PASSWORD = os.environ.get("NEO4J_TEST_PASSWORD")
30
+
31
+ _skip_no_neo4j = pytest.mark.skipif(
32
+ not (_NEO4J_URI and _NEO4J_PASSWORD),
33
+ reason="set NEO4J_TEST_URI + NEO4J_TEST_PASSWORD to run integration tests",
34
+ )
35
+
36
+
37
+ @pytest.fixture
38
+ def neo4j_driver():
39
+ """Per-test driver + cleanup. Two arenas so isolation tests can
40
+ run side by side without trampling each other."""
41
+ from neo4j import GraphDatabase
42
+
43
+ driver = GraphDatabase.driver(_NEO4J_URI, auth=(_NEO4J_USER, _NEO4J_PASSWORD))
44
+ arenas = [f"stat_a_{uuid.uuid4().hex[:8]}", f"stat_b_{uuid.uuid4().hex[:8]}"]
45
+ yield driver, arenas
46
+ with driver.session() as session:
47
+ for arena in arenas:
48
+ # ChannelStat lives alongside Person/Chunk in the arena;
49
+ # the per-arena DETACH DELETE catches all of them.
50
+ session.run(
51
+ "MATCH (n) WHERE n.arena = $arena DETACH DELETE n",
52
+ arena=arena,
53
+ )
54
+ driver.close()
55
+
56
+
57
+ def _store_chunk_with_email(
58
+ session,
59
+ arena: str,
60
+ cid: str,
61
+ email: str,
62
+ channel: str,
63
+ direction: str,
64
+ occurred_at: str,
65
+ ) -> None:
66
+ """Mirror exactly the Cypher /index-internal-batch runs for the
67
+ email-keyed Person path, including the ChannelStat rollup. The
68
+ real call sites this is testing are inside that endpoint; this
69
+ helper lets the integration test exercise the same Cypher
70
+ without spinning up the FastAPI app."""
71
+ now_iso = "2026-05-10T00:00:00Z"
72
+ # Ensure the chunk exists (matches what /index-internal-batch
73
+ # does immediately before writing the Person edge).
74
+ session.run(
75
+ """
76
+ MERGE (c:Chunk {id: $cid})
77
+ SET c.text = 't', c.path = 'p', c.arena = $arena,
78
+ c.created_at = $now
79
+ """,
80
+ cid=cid, arena=arena, now=now_iso,
81
+ )
82
+ session.run(
83
+ """
84
+ MERGE (p:Entity:Person {arena: $arena, email: $email})
85
+ ON CREATE SET p.created_at = $now,
86
+ p.first_seen = $occurred_at,
87
+ p.last_seen = $occurred_at
88
+ ON MATCH SET p.last_seen = CASE
89
+ WHEN $occurred_at > coalesce(p.last_seen, '')
90
+ THEN $occurred_at
91
+ ELSE p.last_seen END
92
+ WITH p
93
+ MATCH (c:Chunk {arena: $arena, id: $cid})
94
+ MERGE (p)-[r:COMMUNICATED]->(c)
95
+ ON CREATE SET r.channel = $channel,
96
+ r.direction = $direction,
97
+ r.occurred_at = $occurred_at,
98
+ r.weight = 1.0,
99
+ r._counted = false
100
+ WITH p, r
101
+ FOREACH (_ IN CASE WHEN r._counted = false THEN [1] ELSE [] END |
102
+ MERGE (s:ChannelStat {arena: $arena, person_email: $email, channel: $channel})
103
+ ON CREATE SET s.count = 0,
104
+ s.inbound = 0,
105
+ s.outbound = 0,
106
+ s.first_seen = $occurred_at,
107
+ s.last_seen = $occurred_at,
108
+ s.created_at = $now
109
+ SET s.count = s.count + 1,
110
+ s.inbound = s.inbound + (CASE WHEN $direction = 'inbound' THEN 1 ELSE 0 END),
111
+ s.outbound = s.outbound + (CASE WHEN $direction = 'outbound' THEN 1 ELSE 0 END),
112
+ s.first_seen = CASE
113
+ WHEN $occurred_at < coalesce(s.first_seen, $occurred_at)
114
+ THEN $occurred_at
115
+ ELSE s.first_seen END,
116
+ s.last_seen = CASE
117
+ WHEN $occurred_at > coalesce(s.last_seen, '')
118
+ THEN $occurred_at
119
+ ELSE s.last_seen END,
120
+ s.updated_at = $now
121
+ MERGE (p)-[:HAS_STAT]->(s)
122
+ SET r._counted = true
123
+ )
124
+ """,
125
+ arena=arena, email=email, cid=cid,
126
+ channel=channel, direction=direction,
127
+ occurred_at=occurred_at, now=now_iso,
128
+ )
129
+
130
+
131
+ def _read_stats(session, arena: str, email: str) -> list[dict]:
132
+ return [
133
+ dict(rec)
134
+ for rec in session.run(
135
+ "MATCH (s:ChannelStat {arena: $arena, person_email: $email})\n"
136
+ "RETURN s.channel AS channel, s.count AS count,\n"
137
+ " s.inbound AS inbound, s.outbound AS outbound,\n"
138
+ " s.first_seen AS first_seen, s.last_seen AS last_seen\n"
139
+ "ORDER BY s.channel\n",
140
+ arena=arena, email=email,
141
+ )
142
+ ]
143
+
144
+
145
+ # ---------------------------------------------------------------------------
146
+ # Writer behaviour.
147
+ # ---------------------------------------------------------------------------
148
+
149
+
150
+ @_skip_no_neo4j
151
+ def test_first_store_creates_channelstat_with_count_one(neo4j_driver) -> None:
152
+ driver, (arena, _) = neo4j_driver
153
+ with driver.session() as session:
154
+ _store_chunk_with_email(
155
+ session, arena, cid="c1", email="alex@x.io",
156
+ channel="email", direction="inbound",
157
+ occurred_at="2026-05-09T10:00:00Z",
158
+ )
159
+ stats = _read_stats(session, arena, "alex@x.io")
160
+ assert len(stats) == 1
161
+ assert stats[0]["channel"] == "email"
162
+ assert stats[0]["count"] == 1
163
+ assert stats[0]["inbound"] == 1
164
+ assert stats[0]["outbound"] == 0
165
+ assert stats[0]["first_seen"] == "2026-05-09T10:00:00Z"
166
+ assert stats[0]["last_seen"] == "2026-05-09T10:00:00Z"
167
+
168
+
169
+ @_skip_no_neo4j
170
+ def test_repeated_stores_accumulate_counts_and_widen_time_bounds(neo4j_driver) -> None:
171
+ driver, (arena, _) = neo4j_driver
172
+ with driver.session() as session:
173
+ _store_chunk_with_email(
174
+ session, arena, cid="c1", email="alex@x.io",
175
+ channel="email", direction="inbound",
176
+ occurred_at="2026-05-08T09:00:00Z",
177
+ )
178
+ _store_chunk_with_email(
179
+ session, arena, cid="c2", email="alex@x.io",
180
+ channel="email", direction="outbound",
181
+ occurred_at="2026-05-09T10:00:00Z",
182
+ )
183
+ _store_chunk_with_email(
184
+ session, arena, cid="c3", email="alex@x.io",
185
+ channel="email", direction="inbound",
186
+ occurred_at="2026-05-07T08:00:00Z", # earliest — should drive first_seen
187
+ )
188
+ stats = _read_stats(session, arena, "alex@x.io")
189
+ assert len(stats) == 1
190
+ assert stats[0]["count"] == 3
191
+ assert stats[0]["inbound"] == 2
192
+ assert stats[0]["outbound"] == 1
193
+ assert stats[0]["first_seen"] == "2026-05-07T08:00:00Z"
194
+ assert stats[0]["last_seen"] == "2026-05-09T10:00:00Z"
195
+
196
+
197
+ @_skip_no_neo4j
198
+ def test_per_channel_buckets_kept_separate(neo4j_driver) -> None:
199
+ driver, (arena, _) = neo4j_driver
200
+ with driver.session() as session:
201
+ _store_chunk_with_email(
202
+ session, arena, "c1", "alex@x.io",
203
+ "email", "inbound", "2026-05-09T10:00:00Z",
204
+ )
205
+ _store_chunk_with_email(
206
+ session, arena, "c2", "alex@x.io",
207
+ "slack", "outbound", "2026-05-09T11:00:00Z",
208
+ )
209
+ _store_chunk_with_email(
210
+ session, arena, "c3", "alex@x.io",
211
+ "slack", "inbound", "2026-05-09T12:00:00Z",
212
+ )
213
+ stats = sorted(_read_stats(session, arena, "alex@x.io"), key=lambda s: s["channel"])
214
+ assert [s["channel"] for s in stats] == ["email", "slack"]
215
+ email = next(s for s in stats if s["channel"] == "email")
216
+ slack = next(s for s in stats if s["channel"] == "slack")
217
+ assert email["count"] == 1
218
+ assert slack["count"] == 2
219
+ assert slack["inbound"] == 1
220
+ assert slack["outbound"] == 1
221
+
222
+
223
+ @_skip_no_neo4j
224
+ def test_replay_of_same_chunk_does_not_double_count(neo4j_driver) -> None:
225
+ """Idempotency under retry: storing the same (chunk_id, email)
226
+ twice should not double the count. Guarded by the r._counted
227
+ flag set on first edge creation."""
228
+ driver, (arena, _) = neo4j_driver
229
+ with driver.session() as session:
230
+ for _ in range(3):
231
+ _store_chunk_with_email(
232
+ session, arena, "c1", "alex@x.io",
233
+ "email", "inbound", "2026-05-09T10:00:00Z",
234
+ )
235
+ stats = _read_stats(session, arena, "alex@x.io")
236
+ assert len(stats) == 1
237
+ # Three identical stores → still count == 1.
238
+ assert stats[0]["count"] == 1
239
+ assert stats[0]["inbound"] == 1
240
+
241
+
242
+ @_skip_no_neo4j
243
+ def test_arena_isolation_on_channelstat(neo4j_driver) -> None:
244
+ """A's stats never reflect B's writes, even when both arenas
245
+ have the same email — the multi-tenancy invariant that #28
246
+ landed for Person/Chunk extends to ChannelStat."""
247
+ driver, (arena_a, arena_b) = neo4j_driver
248
+ with driver.session() as session:
249
+ _store_chunk_with_email(
250
+ session, arena_a, "c1", "shared@example.com",
251
+ "email", "inbound", "2026-05-09T10:00:00Z",
252
+ )
253
+ _store_chunk_with_email(
254
+ session, arena_b, "c2", "shared@example.com",
255
+ "email", "inbound", "2026-05-09T11:00:00Z",
256
+ )
257
+ _store_chunk_with_email(
258
+ session, arena_b, "c3", "shared@example.com",
259
+ "slack", "outbound", "2026-05-09T12:00:00Z",
260
+ )
261
+ a_stats = _read_stats(session, arena_a, "shared@example.com")
262
+ b_stats = _read_stats(session, arena_b, "shared@example.com")
263
+ # A only saw the email message.
264
+ assert len(a_stats) == 1
265
+ assert a_stats[0]["channel"] == "email"
266
+ assert a_stats[0]["count"] == 1
267
+ # B saw both.
268
+ assert len(b_stats) == 2
269
+
270
+
271
+ @_skip_no_neo4j
272
+ def test_communicated_edge_has_counted_flag_after_write(neo4j_driver) -> None:
273
+ """The idempotency flag must be true after the first store so
274
+ re-runs of the writer (which use the same MERGE pattern) skip
275
+ the increment subquery."""
276
+ driver, (arena, _) = neo4j_driver
277
+ with driver.session() as session:
278
+ _store_chunk_with_email(
279
+ session, arena, "c1", "alex@x.io",
280
+ "email", "inbound", "2026-05-09T10:00:00Z",
281
+ )
282
+ rec = session.run(
283
+ "MATCH (:Person {arena: $arena, email: $email})-[r:COMMUNICATED]->(:Chunk {arena: $arena, id: $cid})\n"
284
+ "RETURN r._counted AS counted",
285
+ arena=arena, email="alex@x.io", cid="c1",
286
+ ).single()
287
+ assert rec["counted"] is True
288
+
289
+
290
+ @_skip_no_neo4j
291
+ def test_has_stat_edge_links_person_to_channelstat(neo4j_driver) -> None:
292
+ """The Person→ChannelStat traversal exists for ops queries
293
+ ('show me all stat rollups for this person') and for cascade
294
+ deletes when a tenant is offboarded."""
295
+ driver, (arena, _) = neo4j_driver
296
+ with driver.session() as session:
297
+ _store_chunk_with_email(
298
+ session, arena, "c1", "alex@x.io",
299
+ "email", "inbound", "2026-05-09T10:00:00Z",
300
+ )
301
+ rows = list(session.run(
302
+ "MATCH (p:Person {arena: $arena, email: $email})-[:HAS_STAT]->(s:ChannelStat {arena: $arena})\n"
303
+ "RETURN s.channel AS channel, s.count AS count\n",
304
+ arena=arena, email="alex@x.io",
305
+ ))
306
+ assert len(rows) == 1
307
+ assert rows[0]["channel"] == "email"
308
+ assert rows[0]["count"] == 1
@@ -0,0 +1,48 @@
1
+ """Regression test for the nv_embed /health probe URL construction.
2
+
3
+ Before v0.8.1 the probe was built via str.replace("/v1/embeddings", "/health"),
4
+ which silently produced the wrong URL when NV_EMBED_URL was anything other
5
+ than ".../v1/embeddings" (e.g. /v1/embed for the Pentatonic AI Gateway, or
6
+ a bare host). The probe would end up GET-ing the embed endpoint itself and
7
+ the gateway would return 401 — making /health falsely report a degraded
8
+ nv_embed layer even when embeddings worked fine.
9
+
10
+ The fix replaces the substring replace with a urlparse-based path swap.
11
+ """
12
+
13
+ from urllib.parse import urlparse, urlunparse
14
+
15
+
16
+ def probe_url(nv_embed_url: str) -> str:
17
+ """Re-implementation of the probe URL builder from compat/server.py.
18
+ Kept tiny + isolated so it can be unit-tested without booting FastAPI."""
19
+ u = urlparse(nv_embed_url)
20
+ return urlunparse((u.scheme, u.netloc, "/health", "", "", ""))
21
+
22
+
23
+ def test_probe_with_v1_embeddings_path():
24
+ assert probe_url("https://nv-embed:8041/v1/embeddings") == "https://nv-embed:8041/health"
25
+
26
+
27
+ def test_probe_with_v1_embed_path():
28
+ """The bug case — gateway moved to /v1/embed and old replace() left URL untouched."""
29
+ assert probe_url("https://lambda-gateway.pentatonic.com/v1/embed") == (
30
+ "https://lambda-gateway.pentatonic.com/health"
31
+ )
32
+
33
+
34
+ def test_probe_with_bare_host():
35
+ """No path at all — provider supplies path_default, but probe just wants /health."""
36
+ assert probe_url("https://lambda-gateway.pentatonic.com") == (
37
+ "https://lambda-gateway.pentatonic.com/health"
38
+ )
39
+
40
+
41
+ def test_probe_strips_query_and_fragment():
42
+ assert probe_url("https://gw.example.com/v1/embeddings?token=x#frag") == (
43
+ "https://gw.example.com/health"
44
+ )
45
+
46
+
47
+ def test_probe_preserves_port():
48
+ assert probe_url("http://nv-embed:8041/v1/embeddings") == "http://nv-embed:8041/health"