@pentatonic-ai/ai-agent-sdk 0.8.0 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/packages/memory/openclaw-plugin/index.js +7 -0
- package/packages/memory/openclaw-plugin/openclaw.plugin.json +9 -1
- package/packages/memory/openclaw-plugin/package.json +1 -1
- package/packages/memory/src/__tests__/engine.test.js +142 -0
- package/packages/memory/src/engine.js +65 -0
- package/packages/memory-engine/compat/server.py +98 -7
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +596 -58
- package/packages/memory-engine/scripts/wipe-legacy-l3-entities.py +128 -0
- package/packages/memory-engine/tests/e2e_arena.sh +28 -4
- package/packages/memory-engine/tests/test_aggregate.py +333 -0
- package/packages/memory-engine/tests/test_arena_safety.py +232 -0
- package/packages/memory-engine/tests/test_channel_stat_reader.py +437 -0
- package/packages/memory-engine/tests/test_channel_stat_rollups.py +308 -0
- package/packages/memory-engine/tests/test_compat_nv_embed_probe.py +48 -0
- package/packages/memory-engine/tests/test_l3_arena_isolation.py +412 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
"""Integration tests for ChannelStat rollup writes + reader fast-path.
|
|
2
|
+
|
|
3
|
+
These exercise the actual Cypher running against a live Neo4j: the
|
|
4
|
+
writer block in /index-internal-batch maintains (:ChannelStat) nodes
|
|
5
|
+
on every store with contact_email metadata, and /aggregate-internal
|
|
6
|
+
reads from those nodes when present (falling back to the edge walk
|
|
7
|
+
when stats haven't been populated yet).
|
|
8
|
+
|
|
9
|
+
Gated on NEO4J_TEST_URI + NEO4J_TEST_PASSWORD; skip cleanly when
|
|
10
|
+
those env vars are absent so unit-only test runs stay fast.
|
|
11
|
+
|
|
12
|
+
Run:
|
|
13
|
+
|
|
14
|
+
cd packages/memory-engine
|
|
15
|
+
NEO4J_TEST_URI=bolt://localhost:17687 \\
|
|
16
|
+
NEO4J_TEST_PASSWORD=testpassword \\
|
|
17
|
+
.venv/bin/python -m pytest tests/test_channel_stat_rollups.py -v
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
import uuid
|
|
23
|
+
|
|
24
|
+
import pytest
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
_NEO4J_URI = os.environ.get("NEO4J_TEST_URI")
|
|
28
|
+
_NEO4J_USER = os.environ.get("NEO4J_TEST_USER", "neo4j")
|
|
29
|
+
_NEO4J_PASSWORD = os.environ.get("NEO4J_TEST_PASSWORD")
|
|
30
|
+
|
|
31
|
+
_skip_no_neo4j = pytest.mark.skipif(
|
|
32
|
+
not (_NEO4J_URI and _NEO4J_PASSWORD),
|
|
33
|
+
reason="set NEO4J_TEST_URI + NEO4J_TEST_PASSWORD to run integration tests",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@pytest.fixture
|
|
38
|
+
def neo4j_driver():
|
|
39
|
+
"""Per-test driver + cleanup. Two arenas so isolation tests can
|
|
40
|
+
run side by side without trampling each other."""
|
|
41
|
+
from neo4j import GraphDatabase
|
|
42
|
+
|
|
43
|
+
driver = GraphDatabase.driver(_NEO4J_URI, auth=(_NEO4J_USER, _NEO4J_PASSWORD))
|
|
44
|
+
arenas = [f"stat_a_{uuid.uuid4().hex[:8]}", f"stat_b_{uuid.uuid4().hex[:8]}"]
|
|
45
|
+
yield driver, arenas
|
|
46
|
+
with driver.session() as session:
|
|
47
|
+
for arena in arenas:
|
|
48
|
+
# ChannelStat lives alongside Person/Chunk in the arena;
|
|
49
|
+
# the per-arena DETACH DELETE catches all of them.
|
|
50
|
+
session.run(
|
|
51
|
+
"MATCH (n) WHERE n.arena = $arena DETACH DELETE n",
|
|
52
|
+
arena=arena,
|
|
53
|
+
)
|
|
54
|
+
driver.close()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _store_chunk_with_email(
|
|
58
|
+
session,
|
|
59
|
+
arena: str,
|
|
60
|
+
cid: str,
|
|
61
|
+
email: str,
|
|
62
|
+
channel: str,
|
|
63
|
+
direction: str,
|
|
64
|
+
occurred_at: str,
|
|
65
|
+
) -> None:
|
|
66
|
+
"""Mirror exactly the Cypher /index-internal-batch runs for the
|
|
67
|
+
email-keyed Person path, including the ChannelStat rollup. The
|
|
68
|
+
real call sites this is testing are inside that endpoint; this
|
|
69
|
+
helper lets the integration test exercise the same Cypher
|
|
70
|
+
without spinning up the FastAPI app."""
|
|
71
|
+
now_iso = "2026-05-10T00:00:00Z"
|
|
72
|
+
# Ensure the chunk exists (matches what /index-internal-batch
|
|
73
|
+
# does immediately before writing the Person edge).
|
|
74
|
+
session.run(
|
|
75
|
+
"""
|
|
76
|
+
MERGE (c:Chunk {id: $cid})
|
|
77
|
+
SET c.text = 't', c.path = 'p', c.arena = $arena,
|
|
78
|
+
c.created_at = $now
|
|
79
|
+
""",
|
|
80
|
+
cid=cid, arena=arena, now=now_iso,
|
|
81
|
+
)
|
|
82
|
+
session.run(
|
|
83
|
+
"""
|
|
84
|
+
MERGE (p:Entity:Person {arena: $arena, email: $email})
|
|
85
|
+
ON CREATE SET p.created_at = $now,
|
|
86
|
+
p.first_seen = $occurred_at,
|
|
87
|
+
p.last_seen = $occurred_at
|
|
88
|
+
ON MATCH SET p.last_seen = CASE
|
|
89
|
+
WHEN $occurred_at > coalesce(p.last_seen, '')
|
|
90
|
+
THEN $occurred_at
|
|
91
|
+
ELSE p.last_seen END
|
|
92
|
+
WITH p
|
|
93
|
+
MATCH (c:Chunk {arena: $arena, id: $cid})
|
|
94
|
+
MERGE (p)-[r:COMMUNICATED]->(c)
|
|
95
|
+
ON CREATE SET r.channel = $channel,
|
|
96
|
+
r.direction = $direction,
|
|
97
|
+
r.occurred_at = $occurred_at,
|
|
98
|
+
r.weight = 1.0,
|
|
99
|
+
r._counted = false
|
|
100
|
+
WITH p, r
|
|
101
|
+
FOREACH (_ IN CASE WHEN r._counted = false THEN [1] ELSE [] END |
|
|
102
|
+
MERGE (s:ChannelStat {arena: $arena, person_email: $email, channel: $channel})
|
|
103
|
+
ON CREATE SET s.count = 0,
|
|
104
|
+
s.inbound = 0,
|
|
105
|
+
s.outbound = 0,
|
|
106
|
+
s.first_seen = $occurred_at,
|
|
107
|
+
s.last_seen = $occurred_at,
|
|
108
|
+
s.created_at = $now
|
|
109
|
+
SET s.count = s.count + 1,
|
|
110
|
+
s.inbound = s.inbound + (CASE WHEN $direction = 'inbound' THEN 1 ELSE 0 END),
|
|
111
|
+
s.outbound = s.outbound + (CASE WHEN $direction = 'outbound' THEN 1 ELSE 0 END),
|
|
112
|
+
s.first_seen = CASE
|
|
113
|
+
WHEN $occurred_at < coalesce(s.first_seen, $occurred_at)
|
|
114
|
+
THEN $occurred_at
|
|
115
|
+
ELSE s.first_seen END,
|
|
116
|
+
s.last_seen = CASE
|
|
117
|
+
WHEN $occurred_at > coalesce(s.last_seen, '')
|
|
118
|
+
THEN $occurred_at
|
|
119
|
+
ELSE s.last_seen END,
|
|
120
|
+
s.updated_at = $now
|
|
121
|
+
MERGE (p)-[:HAS_STAT]->(s)
|
|
122
|
+
SET r._counted = true
|
|
123
|
+
)
|
|
124
|
+
""",
|
|
125
|
+
arena=arena, email=email, cid=cid,
|
|
126
|
+
channel=channel, direction=direction,
|
|
127
|
+
occurred_at=occurred_at, now=now_iso,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _read_stats(session, arena: str, email: str) -> list[dict]:
|
|
132
|
+
return [
|
|
133
|
+
dict(rec)
|
|
134
|
+
for rec in session.run(
|
|
135
|
+
"MATCH (s:ChannelStat {arena: $arena, person_email: $email})\n"
|
|
136
|
+
"RETURN s.channel AS channel, s.count AS count,\n"
|
|
137
|
+
" s.inbound AS inbound, s.outbound AS outbound,\n"
|
|
138
|
+
" s.first_seen AS first_seen, s.last_seen AS last_seen\n"
|
|
139
|
+
"ORDER BY s.channel\n",
|
|
140
|
+
arena=arena, email=email,
|
|
141
|
+
)
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# ---------------------------------------------------------------------------
|
|
146
|
+
# Writer behaviour.
|
|
147
|
+
# ---------------------------------------------------------------------------
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@_skip_no_neo4j
|
|
151
|
+
def test_first_store_creates_channelstat_with_count_one(neo4j_driver) -> None:
|
|
152
|
+
driver, (arena, _) = neo4j_driver
|
|
153
|
+
with driver.session() as session:
|
|
154
|
+
_store_chunk_with_email(
|
|
155
|
+
session, arena, cid="c1", email="alex@x.io",
|
|
156
|
+
channel="email", direction="inbound",
|
|
157
|
+
occurred_at="2026-05-09T10:00:00Z",
|
|
158
|
+
)
|
|
159
|
+
stats = _read_stats(session, arena, "alex@x.io")
|
|
160
|
+
assert len(stats) == 1
|
|
161
|
+
assert stats[0]["channel"] == "email"
|
|
162
|
+
assert stats[0]["count"] == 1
|
|
163
|
+
assert stats[0]["inbound"] == 1
|
|
164
|
+
assert stats[0]["outbound"] == 0
|
|
165
|
+
assert stats[0]["first_seen"] == "2026-05-09T10:00:00Z"
|
|
166
|
+
assert stats[0]["last_seen"] == "2026-05-09T10:00:00Z"
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@_skip_no_neo4j
|
|
170
|
+
def test_repeated_stores_accumulate_counts_and_widen_time_bounds(neo4j_driver) -> None:
|
|
171
|
+
driver, (arena, _) = neo4j_driver
|
|
172
|
+
with driver.session() as session:
|
|
173
|
+
_store_chunk_with_email(
|
|
174
|
+
session, arena, cid="c1", email="alex@x.io",
|
|
175
|
+
channel="email", direction="inbound",
|
|
176
|
+
occurred_at="2026-05-08T09:00:00Z",
|
|
177
|
+
)
|
|
178
|
+
_store_chunk_with_email(
|
|
179
|
+
session, arena, cid="c2", email="alex@x.io",
|
|
180
|
+
channel="email", direction="outbound",
|
|
181
|
+
occurred_at="2026-05-09T10:00:00Z",
|
|
182
|
+
)
|
|
183
|
+
_store_chunk_with_email(
|
|
184
|
+
session, arena, cid="c3", email="alex@x.io",
|
|
185
|
+
channel="email", direction="inbound",
|
|
186
|
+
occurred_at="2026-05-07T08:00:00Z", # earliest — should drive first_seen
|
|
187
|
+
)
|
|
188
|
+
stats = _read_stats(session, arena, "alex@x.io")
|
|
189
|
+
assert len(stats) == 1
|
|
190
|
+
assert stats[0]["count"] == 3
|
|
191
|
+
assert stats[0]["inbound"] == 2
|
|
192
|
+
assert stats[0]["outbound"] == 1
|
|
193
|
+
assert stats[0]["first_seen"] == "2026-05-07T08:00:00Z"
|
|
194
|
+
assert stats[0]["last_seen"] == "2026-05-09T10:00:00Z"
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@_skip_no_neo4j
|
|
198
|
+
def test_per_channel_buckets_kept_separate(neo4j_driver) -> None:
|
|
199
|
+
driver, (arena, _) = neo4j_driver
|
|
200
|
+
with driver.session() as session:
|
|
201
|
+
_store_chunk_with_email(
|
|
202
|
+
session, arena, "c1", "alex@x.io",
|
|
203
|
+
"email", "inbound", "2026-05-09T10:00:00Z",
|
|
204
|
+
)
|
|
205
|
+
_store_chunk_with_email(
|
|
206
|
+
session, arena, "c2", "alex@x.io",
|
|
207
|
+
"slack", "outbound", "2026-05-09T11:00:00Z",
|
|
208
|
+
)
|
|
209
|
+
_store_chunk_with_email(
|
|
210
|
+
session, arena, "c3", "alex@x.io",
|
|
211
|
+
"slack", "inbound", "2026-05-09T12:00:00Z",
|
|
212
|
+
)
|
|
213
|
+
stats = sorted(_read_stats(session, arena, "alex@x.io"), key=lambda s: s["channel"])
|
|
214
|
+
assert [s["channel"] for s in stats] == ["email", "slack"]
|
|
215
|
+
email = next(s for s in stats if s["channel"] == "email")
|
|
216
|
+
slack = next(s for s in stats if s["channel"] == "slack")
|
|
217
|
+
assert email["count"] == 1
|
|
218
|
+
assert slack["count"] == 2
|
|
219
|
+
assert slack["inbound"] == 1
|
|
220
|
+
assert slack["outbound"] == 1
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
@_skip_no_neo4j
|
|
224
|
+
def test_replay_of_same_chunk_does_not_double_count(neo4j_driver) -> None:
|
|
225
|
+
"""Idempotency under retry: storing the same (chunk_id, email)
|
|
226
|
+
twice should not double the count. Guarded by the r._counted
|
|
227
|
+
flag set on first edge creation."""
|
|
228
|
+
driver, (arena, _) = neo4j_driver
|
|
229
|
+
with driver.session() as session:
|
|
230
|
+
for _ in range(3):
|
|
231
|
+
_store_chunk_with_email(
|
|
232
|
+
session, arena, "c1", "alex@x.io",
|
|
233
|
+
"email", "inbound", "2026-05-09T10:00:00Z",
|
|
234
|
+
)
|
|
235
|
+
stats = _read_stats(session, arena, "alex@x.io")
|
|
236
|
+
assert len(stats) == 1
|
|
237
|
+
# Three identical stores → still count == 1.
|
|
238
|
+
assert stats[0]["count"] == 1
|
|
239
|
+
assert stats[0]["inbound"] == 1
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
@_skip_no_neo4j
|
|
243
|
+
def test_arena_isolation_on_channelstat(neo4j_driver) -> None:
|
|
244
|
+
"""A's stats never reflect B's writes, even when both arenas
|
|
245
|
+
have the same email — the multi-tenancy invariant that #28
|
|
246
|
+
landed for Person/Chunk extends to ChannelStat."""
|
|
247
|
+
driver, (arena_a, arena_b) = neo4j_driver
|
|
248
|
+
with driver.session() as session:
|
|
249
|
+
_store_chunk_with_email(
|
|
250
|
+
session, arena_a, "c1", "shared@example.com",
|
|
251
|
+
"email", "inbound", "2026-05-09T10:00:00Z",
|
|
252
|
+
)
|
|
253
|
+
_store_chunk_with_email(
|
|
254
|
+
session, arena_b, "c2", "shared@example.com",
|
|
255
|
+
"email", "inbound", "2026-05-09T11:00:00Z",
|
|
256
|
+
)
|
|
257
|
+
_store_chunk_with_email(
|
|
258
|
+
session, arena_b, "c3", "shared@example.com",
|
|
259
|
+
"slack", "outbound", "2026-05-09T12:00:00Z",
|
|
260
|
+
)
|
|
261
|
+
a_stats = _read_stats(session, arena_a, "shared@example.com")
|
|
262
|
+
b_stats = _read_stats(session, arena_b, "shared@example.com")
|
|
263
|
+
# A only saw the email message.
|
|
264
|
+
assert len(a_stats) == 1
|
|
265
|
+
assert a_stats[0]["channel"] == "email"
|
|
266
|
+
assert a_stats[0]["count"] == 1
|
|
267
|
+
# B saw both.
|
|
268
|
+
assert len(b_stats) == 2
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
@_skip_no_neo4j
|
|
272
|
+
def test_communicated_edge_has_counted_flag_after_write(neo4j_driver) -> None:
|
|
273
|
+
"""The idempotency flag must be true after the first store so
|
|
274
|
+
re-runs of the writer (which use the same MERGE pattern) skip
|
|
275
|
+
the increment subquery."""
|
|
276
|
+
driver, (arena, _) = neo4j_driver
|
|
277
|
+
with driver.session() as session:
|
|
278
|
+
_store_chunk_with_email(
|
|
279
|
+
session, arena, "c1", "alex@x.io",
|
|
280
|
+
"email", "inbound", "2026-05-09T10:00:00Z",
|
|
281
|
+
)
|
|
282
|
+
rec = session.run(
|
|
283
|
+
"MATCH (:Person {arena: $arena, email: $email})-[r:COMMUNICATED]->(:Chunk {arena: $arena, id: $cid})\n"
|
|
284
|
+
"RETURN r._counted AS counted",
|
|
285
|
+
arena=arena, email="alex@x.io", cid="c1",
|
|
286
|
+
).single()
|
|
287
|
+
assert rec["counted"] is True
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
@_skip_no_neo4j
|
|
291
|
+
def test_has_stat_edge_links_person_to_channelstat(neo4j_driver) -> None:
|
|
292
|
+
"""The Person→ChannelStat traversal exists for ops queries
|
|
293
|
+
('show me all stat rollups for this person') and for cascade
|
|
294
|
+
deletes when a tenant is offboarded."""
|
|
295
|
+
driver, (arena, _) = neo4j_driver
|
|
296
|
+
with driver.session() as session:
|
|
297
|
+
_store_chunk_with_email(
|
|
298
|
+
session, arena, "c1", "alex@x.io",
|
|
299
|
+
"email", "inbound", "2026-05-09T10:00:00Z",
|
|
300
|
+
)
|
|
301
|
+
rows = list(session.run(
|
|
302
|
+
"MATCH (p:Person {arena: $arena, email: $email})-[:HAS_STAT]->(s:ChannelStat {arena: $arena})\n"
|
|
303
|
+
"RETURN s.channel AS channel, s.count AS count\n",
|
|
304
|
+
arena=arena, email="alex@x.io",
|
|
305
|
+
))
|
|
306
|
+
assert len(rows) == 1
|
|
307
|
+
assert rows[0]["channel"] == "email"
|
|
308
|
+
assert rows[0]["count"] == 1
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Regression test for the nv_embed /health probe URL construction.
|
|
2
|
+
|
|
3
|
+
Before v0.8.1 the probe was built via str.replace("/v1/embeddings", "/health"),
|
|
4
|
+
which silently produced the wrong URL when NV_EMBED_URL was anything other
|
|
5
|
+
than ".../v1/embeddings" (e.g. /v1/embed for the Pentatonic AI Gateway, or
|
|
6
|
+
a bare host). The probe would end up GET-ing the embed endpoint itself and
|
|
7
|
+
the gateway would return 401 — making /health falsely report a degraded
|
|
8
|
+
nv_embed layer even when embeddings worked fine.
|
|
9
|
+
|
|
10
|
+
The fix replaces the substring replace with a urlparse-based path swap.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from urllib.parse import urlparse, urlunparse
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def probe_url(nv_embed_url: str) -> str:
|
|
17
|
+
"""Re-implementation of the probe URL builder from compat/server.py.
|
|
18
|
+
Kept tiny + isolated so it can be unit-tested without booting FastAPI."""
|
|
19
|
+
u = urlparse(nv_embed_url)
|
|
20
|
+
return urlunparse((u.scheme, u.netloc, "/health", "", "", ""))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_probe_with_v1_embeddings_path():
|
|
24
|
+
assert probe_url("https://nv-embed:8041/v1/embeddings") == "https://nv-embed:8041/health"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_probe_with_v1_embed_path():
|
|
28
|
+
"""The bug case — gateway moved to /v1/embed and old replace() left URL untouched."""
|
|
29
|
+
assert probe_url("https://lambda-gateway.pentatonic.com/v1/embed") == (
|
|
30
|
+
"https://lambda-gateway.pentatonic.com/health"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_probe_with_bare_host():
|
|
35
|
+
"""No path at all — provider supplies path_default, but probe just wants /health."""
|
|
36
|
+
assert probe_url("https://lambda-gateway.pentatonic.com") == (
|
|
37
|
+
"https://lambda-gateway.pentatonic.com/health"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_probe_strips_query_and_fragment():
|
|
42
|
+
assert probe_url("https://gw.example.com/v1/embeddings?token=x#frag") == (
|
|
43
|
+
"https://gw.example.com/health"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_probe_preserves_port():
|
|
48
|
+
assert probe_url("http://nv-embed:8041/v1/embeddings") == "http://nv-embed:8041/health"
|