@pentatonic-ai/ai-agent-sdk 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/bin/cli.js +1 -1
- package/bin/commands/config.js +1 -1
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/package.json +2 -2
- package/packages/doctor/src/checks/local-memory.js +2 -2
- package/packages/memory/README.md +2 -2
- package/packages/memory/openclaw-plugin/README.md +2 -2
- package/packages/memory/openclaw-plugin/openclaw.plugin.json +1 -1
- package/packages/memory/src/server.js +2 -2
- package/packages/memory-engine-v2/.env.example +30 -0
- package/packages/memory-engine-v2/README.md +125 -0
- package/packages/memory-engine-v2/compat/Dockerfile +11 -0
- package/packages/memory-engine-v2/compat/requirements.txt +6 -0
- package/packages/memory-engine-v2/compat/server.py +1047 -0
- package/packages/memory-engine-v2/docker-compose.aws.yml +78 -0
- package/packages/memory-engine-v2/docker-compose.yml +206 -0
- package/packages/memory-engine-v2/extractor-async/Dockerfile +14 -0
- package/packages/memory-engine-v2/extractor-async/confidence.py +62 -0
- package/packages/memory-engine-v2/extractor-async/noise_filter.py +144 -0
- package/packages/memory-engine-v2/extractor-async/requirements.txt +2 -0
- package/packages/memory-engine-v2/extractor-async/test_confidence.py +76 -0
- package/packages/memory-engine-v2/extractor-async/test_noise_filter.py +177 -0
- package/packages/memory-engine-v2/extractor-async/worker.py +797 -0
- package/packages/memory-engine-v2/extractor-sync/Dockerfile +11 -0
- package/packages/memory-engine-v2/extractor-sync/requirements.txt +4 -0
- package/packages/memory-engine-v2/extractor-sync/server.py +424 -0
- package/packages/memory-engine-v2/org-model/migrations/001_init.sql +390 -0
- package/packages/memory-engine-v2/tests/e2e_smoke.py +356 -0
- package/packages/memory-engine-v2/tests/fixtures/generate_synthetic_corpus.py +758 -0
- package/packages/memory-engine/.env.example +0 -13
- package/packages/memory-engine/MIGRATION.md +0 -219
- package/packages/memory-engine/README.md +0 -145
- package/packages/memory-engine/bench/README.md +0 -99
- package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +0 -961
- package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +0 -937
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +0 -961
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +0 -883
- package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +0 -1115
- package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +0 -819
- package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +0 -1278
- package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +0 -1018
- package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +0 -1038
- package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +0 -937
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +0 -1115
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +0 -1115
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +0 -819
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +0 -542
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +0 -1278
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +0 -894
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +0 -1018
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +0 -680
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +0 -1038
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +0 -693
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +0 -961
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +0 -727
- package/packages/memory-engine/compat/Dockerfile +0 -22
- package/packages/memory-engine/compat/server.py +0 -1255
- package/packages/memory-engine/docker-compose.test.yml +0 -59
- package/packages/memory-engine/docker-compose.yml +0 -255
- package/packages/memory-engine/engine/README.md +0 -52
- package/packages/memory-engine/engine/l2-hybridrag-proxy.py +0 -1543
- package/packages/memory-engine/engine/l5-comms-layer.py +0 -663
- package/packages/memory-engine/engine/l6-document-store.py +0 -1018
- package/packages/memory-engine/engine/services/_shared/__init__.py +0 -1
- package/packages/memory-engine/engine/services/_shared/embed_provider.py +0 -562
- package/packages/memory-engine/engine/services/l2/Dockerfile +0 -50
- package/packages/memory-engine/engine/services/l2/init_databases.py +0 -81
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +0 -2721
- package/packages/memory-engine/engine/services/l5/Dockerfile +0 -11
- package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +0 -808
- package/packages/memory-engine/engine/services/l6/Dockerfile +0 -30
- package/packages/memory-engine/engine/services/l6/l6-document-store.py +0 -1221
- package/packages/memory-engine/engine/services/nv-embed/Dockerfile +0 -28
- package/packages/memory-engine/engine/services/nv-embed/server.py +0 -152
- package/packages/memory-engine/pme_memory/__init__.py +0 -0
- package/packages/memory-engine/pme_memory/__main__.py +0 -129
- package/packages/memory-engine/pme_memory/artifacts.py +0 -95
- package/packages/memory-engine/pme_memory/embed.py +0 -74
- package/packages/memory-engine/pme_memory/health.py +0 -36
- package/packages/memory-engine/pme_memory/hygiene.py +0 -159
- package/packages/memory-engine/pme_memory/indexer.py +0 -200
- package/packages/memory-engine/pme_memory/needs.py +0 -55
- package/packages/memory-engine/pme_memory/provenance.py +0 -80
- package/packages/memory-engine/pme_memory/scoring.py +0 -168
- package/packages/memory-engine/pme_memory/search.py +0 -52
- package/packages/memory-engine/pme_memory/store.py +0 -86
- package/packages/memory-engine/pme_memory/synthesis.py +0 -114
- package/packages/memory-engine/pyproject.toml +0 -65
- package/packages/memory-engine/scripts/kg-extractor.py +0 -557
- package/packages/memory-engine/scripts/kg-preflexor-v2.py +0 -738
- package/packages/memory-engine/scripts/wipe-legacy-l3-entities.py +0 -128
- package/packages/memory-engine/tests/e2e_arena.sh +0 -259
- package/packages/memory-engine/tests/embed_stub/Dockerfile +0 -13
- package/packages/memory-engine/tests/embed_stub/server.py +0 -80
- package/packages/memory-engine/tests/test_aggregate.py +0 -333
- package/packages/memory-engine/tests/test_api_contract.sh +0 -57
- package/packages/memory-engine/tests/test_arena_safety.py +0 -232
- package/packages/memory-engine/tests/test_channel_stat_reader.py +0 -437
- package/packages/memory-engine/tests/test_channel_stat_rollups.py +0 -308
- package/packages/memory-engine/tests/test_compat_nv_embed_probe.py +0 -48
- package/packages/memory-engine/tests/test_embed_provider.py +0 -693
- package/packages/memory-engine/tests/test_l2_qmd_vec_search.py +0 -280
- package/packages/memory-engine/tests/test_l3_arena_isolation.py +0 -412
- package/packages/memory-engine/tests/test_l6_module_load.py +0 -84
- package/packages/memory-engine/tests/test_people_list_reader.py +0 -432
|
@@ -1,280 +0,0 @@
|
|
|
1
|
-
"""Tests for the sqlite-vec-backed QMD search path in l2-hybridrag-proxy.
|
|
2
|
-
|
|
3
|
-
Validates the migration from the legacy Python-cosine-over-JSON path
|
|
4
|
-
(which had a silent `ORDER BY id LIMIT 2000` correctness bug — only
|
|
5
|
-
the OLDEST 2000 chunks were ever considered) to native sqlite-vec
|
|
6
|
-
KNN MATCH over a vec0 virtual table.
|
|
7
|
-
|
|
8
|
-
Pure-Python tests — no Neo4j, no Milvus. The proxy module is loaded
|
|
9
|
-
via importlib so we can call helpers and handlers directly, and
|
|
10
|
-
QMD_DB_PATH is overridden to a tmp_path file per test.
|
|
11
|
-
|
|
12
|
-
Run:
|
|
13
|
-
|
|
14
|
-
cd packages/memory-engine
|
|
15
|
-
.venv/bin/python -m pytest tests/test_l2_qmd_vec_search.py -v
|
|
16
|
-
|
|
17
|
-
The tests skip cleanly when ``sqlite_vec`` is not importable — useful
|
|
18
|
-
for unit-only runs on machines that don't have the wheel installed.
|
|
19
|
-
"""
|
|
20
|
-
from __future__ import annotations
|
|
21
|
-
|
|
22
|
-
import importlib.util
|
|
23
|
-
import json
|
|
24
|
-
import struct
|
|
25
|
-
import sys
|
|
26
|
-
from pathlib import Path
|
|
27
|
-
|
|
28
|
-
import pytest
|
|
29
|
-
|
|
30
|
-
try:
|
|
31
|
-
import sqlite_vec # noqa: F401
|
|
32
|
-
_SQLITE_VEC_OK = True
|
|
33
|
-
except ImportError:
|
|
34
|
-
_SQLITE_VEC_OK = False
|
|
35
|
-
|
|
36
|
-
_skip_no_sqlite_vec = pytest.mark.skipif(
|
|
37
|
-
not _SQLITE_VEC_OK,
|
|
38
|
-
reason="sqlite_vec wheel not installed in this venv",
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
ENGINE_ROOT = Path(__file__).resolve().parent.parent / "engine" / "services" / "l2"
|
|
43
|
-
sys.path.insert(0, str(ENGINE_ROOT))
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
@pytest.fixture(scope="module")
|
|
47
|
-
def proxy_module():
|
|
48
|
-
"""Load l2-hybridrag-proxy as a module. Same pattern as
|
|
49
|
-
test_channel_stat_reader / test_people_list_reader so the
|
|
50
|
-
module-load failure mode (missing deps) skips cleanly rather than
|
|
51
|
-
erroring."""
|
|
52
|
-
spec = importlib.util.spec_from_file_location(
|
|
53
|
-
"l2_proxy_module_qmd_vec",
|
|
54
|
-
ENGINE_ROOT / "l2-hybridrag-proxy.py",
|
|
55
|
-
)
|
|
56
|
-
assert spec and spec.loader
|
|
57
|
-
try:
|
|
58
|
-
mod = importlib.util.module_from_spec(spec)
|
|
59
|
-
spec.loader.exec_module(mod)
|
|
60
|
-
except ImportError:
|
|
61
|
-
pytest.skip("l2 proxy deps unavailable in this venv (fine for unit-only runs)")
|
|
62
|
-
return mod
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
@pytest.fixture
|
|
66
|
-
def qmd_db(tmp_path, proxy_module, monkeypatch):
|
|
67
|
-
"""Per-test qmd.sqlite at a tmp path, with the proxy module pointed
|
|
68
|
-
at it. Yields the path so tests can run their own asserting queries
|
|
69
|
-
against it."""
|
|
70
|
-
db_path = tmp_path / "qmd.sqlite"
|
|
71
|
-
monkeypatch.setattr(proxy_module, "QMD_DB_PATH", str(db_path))
|
|
72
|
-
return db_path
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def _make_vec(seed: int, dim: int) -> list[float]:
|
|
76
|
-
"""Deterministic synthetic embedding — small enough to test fast,
|
|
77
|
-
structured enough that nearest-neighbour relationships are stable
|
|
78
|
-
across runs. The first slot dominates the cosine direction so we
|
|
79
|
-
can build orthogonal-ish clusters by varying its sign + magnitude."""
|
|
80
|
-
import random as _r
|
|
81
|
-
rng = _r.Random(seed)
|
|
82
|
-
return [rng.gauss(0.0, 1.0) for _ in range(dim)]
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
# ---------------------------------------------------------------------------
|
|
86
|
-
# 1. vec_index MATCH semantics — sanity check the SDK glue against sqlite-vec.
|
|
87
|
-
# ---------------------------------------------------------------------------
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
@_skip_no_sqlite_vec
|
|
91
|
-
def test_vec_index_match_returns_top_k(qmd_db, proxy_module) -> None:
|
|
92
|
-
"""Insert N known vectors with a planted ringer, query with the
|
|
93
|
-
ringer's vector, assert the ringer is the top hit. This is the
|
|
94
|
-
minimum signal that ``_ensure_vec_index`` + native MATCH actually
|
|
95
|
-
work end-to-end against the dim our proxy is configured for."""
|
|
96
|
-
conn = proxy_module._open_qmd_conn()
|
|
97
|
-
proxy_module._ensure_vec_index(conn)
|
|
98
|
-
dim = proxy_module.EMBED_DIM
|
|
99
|
-
# 20 rows of noise + 1 planted ringer at id=999. Planted vector is
|
|
100
|
-
# near-orthogonal to the noise (which uses positive-slot dominance)
|
|
101
|
-
# by flipping the first slot's sign — confirms the cosine MATCH
|
|
102
|
-
# actually orders by similarity, not by row id.
|
|
103
|
-
for i in range(20):
|
|
104
|
-
v = _make_vec(seed=i + 1, dim=dim)
|
|
105
|
-
v[0] = abs(v[0]) + 10.0 # bias positive
|
|
106
|
-
conn.execute(
|
|
107
|
-
"INSERT INTO vec_index(id, embedding) VALUES (?, ?)",
|
|
108
|
-
(i + 1, struct.pack(f"{dim}f", *v)),
|
|
109
|
-
)
|
|
110
|
-
ringer = _make_vec(seed=999, dim=dim)
|
|
111
|
-
ringer[0] = -abs(ringer[0]) - 10.0 # bias negative — opposite cluster
|
|
112
|
-
conn.execute(
|
|
113
|
-
"INSERT INTO vec_index(id, embedding) VALUES (?, ?)",
|
|
114
|
-
(999, struct.pack(f"{dim}f", *ringer)),
|
|
115
|
-
)
|
|
116
|
-
conn.commit()
|
|
117
|
-
qbytes = struct.pack(f"{dim}f", *ringer)
|
|
118
|
-
rows = conn.execute(
|
|
119
|
-
"""
|
|
120
|
-
SELECT id, distance
|
|
121
|
-
FROM vec_index
|
|
122
|
-
WHERE embedding MATCH ? AND k = ?
|
|
123
|
-
ORDER BY distance
|
|
124
|
-
""",
|
|
125
|
-
(qbytes, 5),
|
|
126
|
-
).fetchall()
|
|
127
|
-
conn.close()
|
|
128
|
-
assert len(rows) == 5
|
|
129
|
-
top_id, top_dist = rows[0]
|
|
130
|
-
assert top_id == 999, f"expected ringer id=999, got {top_id} ({rows!r})"
|
|
131
|
-
# Cosine distance = 1 - cos_sim, so identity vector → ~0 distance.
|
|
132
|
-
# Ringer-vs-itself is exact, so we expect ~0 here; allow float32
|
|
133
|
-
# round-trip slop.
|
|
134
|
-
assert top_dist < 1e-3, f"ringer-vs-itself should be ~0, got {top_dist}"
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
# ---------------------------------------------------------------------------
|
|
138
|
-
# 2. search_qmd_informed uses vec_index, not the legacy JSON-cosine path.
|
|
139
|
-
# ---------------------------------------------------------------------------
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
@_skip_no_sqlite_vec
|
|
143
|
-
def test_search_qmd_informed_uses_vec_index(qmd_db, proxy_module, monkeypatch) -> None:
|
|
144
|
-
"""Full search path test: seed chunks + vec_index, mock
|
|
145
|
-
``get_embedding`` to return a vector that matches the ringer,
|
|
146
|
-
assert the returned results are sourced from the vec_index JOIN
|
|
147
|
-
(which preserves path/text from chunks) and ranked by similarity.
|
|
148
|
-
|
|
149
|
-
This is the test that would fail if someone reverted the search
|
|
150
|
-
body to the legacy ``ORDER BY id LIMIT 2000`` path — because the
|
|
151
|
-
ringer's id is 999 (well outside the 2000-row prefix), the legacy
|
|
152
|
-
path would never see it."""
|
|
153
|
-
import sqlite3
|
|
154
|
-
conn = proxy_module._open_qmd_conn()
|
|
155
|
-
proxy_module._ensure_vec_index(conn)
|
|
156
|
-
conn.execute(
|
|
157
|
-
"""
|
|
158
|
-
CREATE TABLE IF NOT EXISTS chunks (
|
|
159
|
-
id INTEGER PRIMARY KEY,
|
|
160
|
-
path TEXT,
|
|
161
|
-
text TEXT,
|
|
162
|
-
embedding TEXT,
|
|
163
|
-
embedding_model TEXT,
|
|
164
|
-
embedding_dim INTEGER,
|
|
165
|
-
chunk_index INTEGER,
|
|
166
|
-
created_at TEXT
|
|
167
|
-
)
|
|
168
|
-
"""
|
|
169
|
-
)
|
|
170
|
-
dim = proxy_module.EMBED_DIM
|
|
171
|
-
# Noise rows 1..20 + planted ringer id=999. Same orthogonal-cluster
|
|
172
|
-
# setup as test 1 — guarantees the ringer wins on cosine.
|
|
173
|
-
for i in range(20):
|
|
174
|
-
v = _make_vec(seed=i + 1, dim=dim)
|
|
175
|
-
v[0] = abs(v[0]) + 10.0
|
|
176
|
-
conn.execute(
|
|
177
|
-
"INSERT INTO chunks(id, path, text, embedding) VALUES (?, ?, ?, ?)",
|
|
178
|
-
(i + 1, f"noise/{i}.md", f"noise text {i}", json.dumps(v)),
|
|
179
|
-
)
|
|
180
|
-
conn.execute(
|
|
181
|
-
"INSERT INTO vec_index(id, embedding) VALUES (?, ?)",
|
|
182
|
-
(i + 1, struct.pack(f"{dim}f", *v)),
|
|
183
|
-
)
|
|
184
|
-
ringer = _make_vec(seed=999, dim=dim)
|
|
185
|
-
ringer[0] = -abs(ringer[0]) - 10.0
|
|
186
|
-
conn.execute(
|
|
187
|
-
"INSERT INTO chunks(id, path, text, embedding) VALUES (?, ?, ?, ?)",
|
|
188
|
-
(999, "ringer/needle.md", "needle in the haystack", json.dumps(ringer)),
|
|
189
|
-
)
|
|
190
|
-
conn.execute(
|
|
191
|
-
"INSERT INTO vec_index(id, embedding) VALUES (?, ?)",
|
|
192
|
-
(999, struct.pack(f"{dim}f", *ringer)),
|
|
193
|
-
)
|
|
194
|
-
conn.commit()
|
|
195
|
-
conn.close()
|
|
196
|
-
|
|
197
|
-
# Mock get_embedding to return the ringer's vector for any query.
|
|
198
|
-
monkeypatch.setattr(proxy_module, "get_embedding", lambda *_a, **_kw: ringer)
|
|
199
|
-
|
|
200
|
-
out = proxy_module.search_qmd_informed(
|
|
201
|
-
"any query — get_embedding is mocked",
|
|
202
|
-
{"graph_entities": []},
|
|
203
|
-
limit=3,
|
|
204
|
-
)
|
|
205
|
-
assert out, "search returned empty; vec_index path must surface ringer"
|
|
206
|
-
top = out[0]
|
|
207
|
-
assert top["path"] == "ringer/needle.md", (
|
|
208
|
-
f"top hit should be the ringer at row 999; got {top['path']}. "
|
|
209
|
-
f"If this fails, the search may have reverted to the LIMIT 2000 "
|
|
210
|
-
f"legacy path which never sees row 999."
|
|
211
|
-
)
|
|
212
|
-
assert top["text"] == "needle in the haystack"
|
|
213
|
-
assert top["base_similarity"] > 0.9
|
|
214
|
-
assert top["source"] == "vector"
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
# ---------------------------------------------------------------------------
|
|
218
|
-
# 3. Backfill is idempotent — second run on a populated vec_index is no-op.
|
|
219
|
-
# ---------------------------------------------------------------------------
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
@_skip_no_sqlite_vec
|
|
223
|
-
def test_backfill_idempotent(qmd_db, proxy_module) -> None:
|
|
224
|
-
"""Seed chunks with JSON embeddings only (no vec_index rows),
|
|
225
|
-
call ``_backfill_vec_index`` twice, assert:
|
|
226
|
-
1. First call copies all rows into vec_index.
|
|
227
|
-
2. Second call observes vec_n >= chunks_n and is a no-op (no
|
|
228
|
-
duplicate inserts, no errors).
|
|
229
|
-
|
|
230
|
-
Catches the failure mode where a missing idempotency check would
|
|
231
|
-
INSERT duplicate ids on the second invocation, blow up the UNIQUE
|
|
232
|
-
constraint, and corrupt the index."""
|
|
233
|
-
import asyncio
|
|
234
|
-
conn = proxy_module._open_qmd_conn()
|
|
235
|
-
proxy_module._ensure_vec_index(conn)
|
|
236
|
-
conn.execute(
|
|
237
|
-
"""
|
|
238
|
-
CREATE TABLE IF NOT EXISTS chunks (
|
|
239
|
-
id INTEGER PRIMARY KEY,
|
|
240
|
-
path TEXT,
|
|
241
|
-
text TEXT,
|
|
242
|
-
embedding TEXT,
|
|
243
|
-
embedding_model TEXT,
|
|
244
|
-
embedding_dim INTEGER,
|
|
245
|
-
chunk_index INTEGER,
|
|
246
|
-
created_at TEXT
|
|
247
|
-
)
|
|
248
|
-
"""
|
|
249
|
-
)
|
|
250
|
-
dim = proxy_module.EMBED_DIM
|
|
251
|
-
N = 7
|
|
252
|
-
for i in range(N):
|
|
253
|
-
v = _make_vec(seed=i + 100, dim=dim)
|
|
254
|
-
conn.execute(
|
|
255
|
-
"INSERT INTO chunks(id, path, text, embedding) VALUES (?, ?, ?, ?)",
|
|
256
|
-
(i + 1, f"p/{i}.md", f"t{i}", json.dumps(v)),
|
|
257
|
-
)
|
|
258
|
-
conn.commit()
|
|
259
|
-
pre_chunks = conn.execute("SELECT count(*) FROM chunks").fetchone()[0]
|
|
260
|
-
pre_vec = conn.execute("SELECT count(*) FROM vec_index").fetchone()[0]
|
|
261
|
-
conn.close()
|
|
262
|
-
assert pre_chunks == N and pre_vec == 0, (
|
|
263
|
-
f"setup mismatch: chunks={pre_chunks}, vec={pre_vec}"
|
|
264
|
-
)
|
|
265
|
-
|
|
266
|
-
# First run — should copy all N rows.
|
|
267
|
-
asyncio.run(proxy_module._backfill_vec_index())
|
|
268
|
-
conn = proxy_module._open_qmd_conn()
|
|
269
|
-
mid_vec = conn.execute("SELECT count(*) FROM vec_index").fetchone()[0]
|
|
270
|
-
conn.close()
|
|
271
|
-
assert mid_vec == N, f"first backfill should copy all {N} rows, got {mid_vec}"
|
|
272
|
-
|
|
273
|
-
# Second run — must no-op cleanly. No exception, no duplicate inserts.
|
|
274
|
-
asyncio.run(proxy_module._backfill_vec_index())
|
|
275
|
-
conn = proxy_module._open_qmd_conn()
|
|
276
|
-
final_vec = conn.execute("SELECT count(*) FROM vec_index").fetchone()[0]
|
|
277
|
-
conn.close()
|
|
278
|
-
assert final_vec == N, (
|
|
279
|
-
f"second backfill should be no-op; got {final_vec} rows instead of {N}"
|
|
280
|
-
)
|
|
@@ -1,412 +0,0 @@
|
|
|
1
|
-
"""Integration tests for L3 arena isolation + typed-entity writes.
|
|
2
|
-
|
|
3
|
-
Two flavours:
|
|
4
|
-
|
|
5
|
-
- Neo4j-backed integration tests (run when ``NEO4J_TEST_URI`` and
|
|
6
|
-
``NEO4J_TEST_PASSWORD`` env vars are set). These spin up the actual
|
|
7
|
-
writer logic and verify cross-arena isolation against a live
|
|
8
|
-
Neo4j. Skip cleanly when env is absent so the unit-test job stays
|
|
9
|
-
Neo4j-free.
|
|
10
|
-
|
|
11
|
-
- Pure-unit tests using a stub session that records every Cypher
|
|
12
|
-
call. Fast, hermetic, validate the structural invariants we care
|
|
13
|
-
about: each typed-entity write carries arena, COMMUNICATED edges
|
|
14
|
-
carry channel + direction, etc.
|
|
15
|
-
|
|
16
|
-
Run:
|
|
17
|
-
|
|
18
|
-
cd packages/memory-engine
|
|
19
|
-
.venv/bin/python -m pytest tests/test_l3_arena_isolation.py -v
|
|
20
|
-
|
|
21
|
-
Run with Neo4j:
|
|
22
|
-
|
|
23
|
-
NEO4J_TEST_URI=bolt://localhost:7687 \\
|
|
24
|
-
NEO4J_TEST_PASSWORD=test \\
|
|
25
|
-
.venv/bin/python -m pytest tests/test_l3_arena_isolation.py -v
|
|
26
|
-
"""
|
|
27
|
-
from __future__ import annotations
|
|
28
|
-
|
|
29
|
-
import os
|
|
30
|
-
import sys
|
|
31
|
-
import uuid
|
|
32
|
-
from pathlib import Path
|
|
33
|
-
|
|
34
|
-
import pytest
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
# Make the engine module importable without packaging it.
|
|
38
|
-
ENGINE_ROOT = Path(__file__).resolve().parent.parent / "engine" / "services" / "l2"
|
|
39
|
-
sys.path.insert(0, str(ENGINE_ROOT))
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
# ---------------------------------------------------------------------------
|
|
43
|
-
# Stub session for unit tests — records calls without hitting Neo4j.
|
|
44
|
-
# ---------------------------------------------------------------------------
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
class _Recorder:
|
|
48
|
-
"""Stand-in for a Neo4j session.run that records every call.
|
|
49
|
-
|
|
50
|
-
Just enough surface for the writer block in the engine to think it's
|
|
51
|
-
talking to Neo4j: ``run(cypher, **params)`` returns an object whose
|
|
52
|
-
``single()`` / iteration / ``data()`` calls all return empty.
|
|
53
|
-
"""
|
|
54
|
-
|
|
55
|
-
def __init__(self) -> None:
|
|
56
|
-
self.calls: list[tuple[str, dict]] = []
|
|
57
|
-
|
|
58
|
-
def run(self, cypher: str, **params) -> "_Recorder":
|
|
59
|
-
self.calls.append((cypher, params))
|
|
60
|
-
return self
|
|
61
|
-
|
|
62
|
-
def single(self) -> dict:
|
|
63
|
-
return {}
|
|
64
|
-
|
|
65
|
-
def __iter__(self):
|
|
66
|
-
return iter([])
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def _has_arena_in_pattern(cypher: str, label: str, var: str = "") -> bool:
|
|
70
|
-
"""True if every occurrence of (…label…) in the cypher names arena.
|
|
71
|
-
|
|
72
|
-
Parses each node pattern as ``(var:Label1:Label2:… {props})``,
|
|
73
|
-
finds the ones whose label list contains the target ``label``, and
|
|
74
|
-
asserts each one is arena-scoped (either via ``arena:`` in the
|
|
75
|
-
property bag or via ``<var>.arena`` somewhere in the same block).
|
|
76
|
-
|
|
77
|
-
Parameters
|
|
78
|
-
----------
|
|
79
|
-
cypher : the Cypher block to inspect.
|
|
80
|
-
label : target label (e.g. ``"Entity"``, ``"Chunk"``).
|
|
81
|
-
var : optional variable filter — when set, only patterns that
|
|
82
|
-
bind this variable are checked. Useful when a block has
|
|
83
|
-
multiple patterns of the same label and we want to assert
|
|
84
|
-
one specific one.
|
|
85
|
-
"""
|
|
86
|
-
import re
|
|
87
|
-
|
|
88
|
-
pattern = re.compile(
|
|
89
|
-
r"\("
|
|
90
|
-
r"\s*(?P<v>[A-Za-z_]\w*)?"
|
|
91
|
-
r"\s*(?P<labels>(?::[A-Za-z_]\w*)+)" # one or more labels
|
|
92
|
-
r"\s*(?P<props>\{[^{}]*\})?"
|
|
93
|
-
r"\s*\)",
|
|
94
|
-
re.MULTILINE,
|
|
95
|
-
)
|
|
96
|
-
target = f":{label}"
|
|
97
|
-
found_any = False
|
|
98
|
-
for m in pattern.finditer(cypher):
|
|
99
|
-
labels = m.group("labels") or ""
|
|
100
|
-
# Require an exact-label match so :Entity matches `:Entity` and
|
|
101
|
-
# `:Entity:Concept` but not `:Entitlement` (re.search alone
|
|
102
|
-
# would treat the latter as a hit).
|
|
103
|
-
label_tokens = re.findall(r":([A-Za-z_]\w*)", labels)
|
|
104
|
-
if label not in label_tokens:
|
|
105
|
-
continue
|
|
106
|
-
v = m.group("v") or ""
|
|
107
|
-
if var and v != var:
|
|
108
|
-
continue
|
|
109
|
-
found_any = True
|
|
110
|
-
props = m.group("props") or ""
|
|
111
|
-
if "arena" in props:
|
|
112
|
-
continue
|
|
113
|
-
if v and re.search(rf"\b{re.escape(v)}\.arena\b", cypher):
|
|
114
|
-
continue
|
|
115
|
-
return False
|
|
116
|
-
return found_any
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
# ---------------------------------------------------------------------------
|
|
120
|
-
# Unit tests — exercise the writer block via the stub session by
|
|
121
|
-
# calling its Cypher directly.
|
|
122
|
-
# ---------------------------------------------------------------------------
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
def test_writer_concept_path_carries_arena() -> None:
|
|
126
|
-
"""The Concept-extraction Cypher always names arena on Entity + Chunk."""
|
|
127
|
-
rec = _Recorder()
|
|
128
|
-
arena = "acme"
|
|
129
|
-
rec.run(
|
|
130
|
-
"""
|
|
131
|
-
MERGE (e:Entity:Concept {arena: $arena, name: $name})
|
|
132
|
-
ON CREATE SET e.type = 'Concept',
|
|
133
|
-
e.created_at = $now,
|
|
134
|
-
e.weight = 1.0
|
|
135
|
-
WITH e
|
|
136
|
-
MATCH (c:Chunk {arena: $arena, id: $cid})
|
|
137
|
-
MERGE (e)-[r:MENTIONS]->(c)
|
|
138
|
-
ON CREATE SET r.weight = 1.0, r.created_at = $now
|
|
139
|
-
ON MATCH SET r.weight = coalesce(r.weight, 1.0) + 0.1
|
|
140
|
-
""",
|
|
141
|
-
arena=arena, name="Pricing", cid="chunk-1", now="t",
|
|
142
|
-
)
|
|
143
|
-
cypher, params = rec.calls[-1]
|
|
144
|
-
assert _has_arena_in_pattern(cypher, "Entity")
|
|
145
|
-
assert _has_arena_in_pattern(cypher, "Chunk")
|
|
146
|
-
assert params["arena"] == arena
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
def test_writer_person_email_carries_arena_email_and_communicated_edge() -> None:
|
|
150
|
-
"""Metadata-driven Person email path tags arena + builds COMMUNICATED edge."""
|
|
151
|
-
rec = _Recorder()
|
|
152
|
-
arena = "acme"
|
|
153
|
-
rec.run(
|
|
154
|
-
"""
|
|
155
|
-
MERGE (p:Entity:Person {arena: $arena, email: $email})
|
|
156
|
-
ON CREATE SET p.created_at = $now,
|
|
157
|
-
p.first_seen = $occurred_at,
|
|
158
|
-
p.last_seen = $occurred_at
|
|
159
|
-
ON MATCH SET p.last_seen = CASE
|
|
160
|
-
WHEN $occurred_at > coalesce(p.last_seen, '')
|
|
161
|
-
THEN $occurred_at
|
|
162
|
-
ELSE p.last_seen END
|
|
163
|
-
WITH p
|
|
164
|
-
MATCH (c:Chunk {arena: $arena, id: $cid})
|
|
165
|
-
MERGE (p)-[r:COMMUNICATED]->(c)
|
|
166
|
-
ON CREATE SET r.channel = $channel,
|
|
167
|
-
r.direction = $direction,
|
|
168
|
-
r.occurred_at = $occurred_at,
|
|
169
|
-
r.weight = 1.0
|
|
170
|
-
""",
|
|
171
|
-
arena=arena, email="alex@acme.com", cid="c-1",
|
|
172
|
-
channel="email", direction="inbound",
|
|
173
|
-
occurred_at="2026-05-08T00:00:00Z", now="t",
|
|
174
|
-
)
|
|
175
|
-
cypher, params = rec.calls[-1]
|
|
176
|
-
assert _has_arena_in_pattern(cypher, "Person")
|
|
177
|
-
assert _has_arena_in_pattern(cypher, "Chunk")
|
|
178
|
-
assert "COMMUNICATED" in cypher
|
|
179
|
-
assert params["channel"] == "email"
|
|
180
|
-
assert params["direction"] == "inbound"
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
def test_co_occurs_edges_are_arena_scoped_on_both_endpoints() -> None:
|
|
184
|
-
"""CO_OCCURS Cypher matches both endpoints in the same arena."""
|
|
185
|
-
rec = _Recorder()
|
|
186
|
-
rec.run(
|
|
187
|
-
"""
|
|
188
|
-
MATCH (a:Entity:Concept {arena: $arena, name: $a})
|
|
189
|
-
MATCH (b:Entity:Concept {arena: $arena, name: $b})
|
|
190
|
-
MERGE (a)-[r:CO_OCCURS]->(b)
|
|
191
|
-
ON CREATE SET r.weight = 0.5, r.created_at = $now
|
|
192
|
-
ON MATCH SET r.weight = coalesce(r.weight, 0.5) + 0.05
|
|
193
|
-
""",
|
|
194
|
-
arena="acme", a="Pricing", b="Negotiation", now="t",
|
|
195
|
-
)
|
|
196
|
-
cypher, _ = rec.calls[-1]
|
|
197
|
-
# Both endpoints carry arena in the property bag.
|
|
198
|
-
assert cypher.count("arena: $arena") == 2
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
def test_known_as_edge_links_email_and_name_within_arena() -> None:
|
|
202
|
-
"""KNOWN_AS edge connects the (arena, name) and (arena, email) Person nodes."""
|
|
203
|
-
rec = _Recorder()
|
|
204
|
-
rec.run(
|
|
205
|
-
"""
|
|
206
|
-
MATCH (n:Person {arena: $arena, name: $name})
|
|
207
|
-
MATCH (e:Person {arena: $arena, email: $email})
|
|
208
|
-
MERGE (n)-[:KNOWN_AS]->(e)
|
|
209
|
-
""",
|
|
210
|
-
arena="acme", name="Alex Tong", email="alex@acme.com",
|
|
211
|
-
)
|
|
212
|
-
cypher, _ = rec.calls[-1]
|
|
213
|
-
assert _has_arena_in_pattern(cypher, "Person")
|
|
214
|
-
assert "KNOWN_AS" in cypher
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
# ---------------------------------------------------------------------------
|
|
218
|
-
# Self-test: the helper above flags the bug the lint missed in v1.
|
|
219
|
-
# ---------------------------------------------------------------------------
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
def test_helper_flags_unscoped_person_pattern() -> None:
|
|
223
|
-
bad_cypher = """
|
|
224
|
-
MERGE (p:Entity:Person {email: $email})
|
|
225
|
-
ON CREATE SET p.created_at = $now
|
|
226
|
-
MATCH (c:Chunk {arena: $arena, id: $cid})
|
|
227
|
-
MERGE (p)-[:MENTIONS]->(c)
|
|
228
|
-
"""
|
|
229
|
-
assert not _has_arena_in_pattern(bad_cypher, "Person"), \
|
|
230
|
-
"helper must flag the unscoped Person pattern"
|
|
231
|
-
assert _has_arena_in_pattern(bad_cypher, "Chunk"), \
|
|
232
|
-
"Chunk pattern with arena in property bag should pass"
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
# ---------------------------------------------------------------------------
|
|
236
|
-
# Neo4j-backed integration tests — only run when env is set.
|
|
237
|
-
# ---------------------------------------------------------------------------
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
_NEO4J_URI = os.environ.get("NEO4J_TEST_URI")
|
|
241
|
-
_NEO4J_USER = os.environ.get("NEO4J_TEST_USER", "neo4j")
|
|
242
|
-
_NEO4J_PASSWORD = os.environ.get("NEO4J_TEST_PASSWORD")
|
|
243
|
-
|
|
244
|
-
_skip_no_neo4j = pytest.mark.skipif(
|
|
245
|
-
not (_NEO4J_URI and _NEO4J_PASSWORD),
|
|
246
|
-
reason="set NEO4J_TEST_URI + NEO4J_TEST_PASSWORD to run integration tests",
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
@pytest.fixture
|
|
251
|
-
def neo4j_driver():
|
|
252
|
-
"""Open a Neo4j driver and clean test data on teardown.
|
|
253
|
-
|
|
254
|
-
Uses a randomised arena pair so concurrent test runs don't trample
|
|
255
|
-
each other; tears down by deleting nodes scoped to those arenas
|
|
256
|
-
(never a global wipe — this fixture must be safe against a
|
|
257
|
-
populated dev database).
|
|
258
|
-
"""
|
|
259
|
-
from neo4j import GraphDatabase # local import keeps unit-only runs neo4j-free
|
|
260
|
-
|
|
261
|
-
driver = GraphDatabase.driver(_NEO4J_URI, auth=(_NEO4J_USER, _NEO4J_PASSWORD))
|
|
262
|
-
arenas = [f"test_a_{uuid.uuid4().hex[:8]}", f"test_b_{uuid.uuid4().hex[:8]}"]
|
|
263
|
-
yield driver, arenas
|
|
264
|
-
with driver.session() as session:
|
|
265
|
-
for arena in arenas:
|
|
266
|
-
session.run(
|
|
267
|
-
"MATCH (n) WHERE n.arena = $arena DETACH DELETE n",
|
|
268
|
-
arena=arena,
|
|
269
|
-
)
|
|
270
|
-
driver.close()
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
@_skip_no_neo4j
|
|
274
|
-
def test_two_arenas_get_distinct_person_nodes_for_same_email(neo4j_driver) -> None:
|
|
275
|
-
"""Same contact_email in two arenas → two :Person nodes, not one."""
|
|
276
|
-
driver, (arena_a, arena_b) = neo4j_driver
|
|
277
|
-
email = "shared@example.com"
|
|
278
|
-
chunk_id = lambda a: f"c_{a}"
|
|
279
|
-
|
|
280
|
-
with driver.session() as session:
|
|
281
|
-
for arena in (arena_a, arena_b):
|
|
282
|
-
# Materialise the chunk that the Person attaches to.
|
|
283
|
-
session.run(
|
|
284
|
-
"""
|
|
285
|
-
MERGE (c:Chunk {id: $cid})
|
|
286
|
-
SET c.arena = $arena, c.text = 'test', c.path = 'test',
|
|
287
|
-
c.created_at = '2026-05-08T00:00:00Z'
|
|
288
|
-
""",
|
|
289
|
-
cid=chunk_id(arena), arena=arena,
|
|
290
|
-
)
|
|
291
|
-
# Apply the same writer block as the engine would.
|
|
292
|
-
session.run(
|
|
293
|
-
"""
|
|
294
|
-
MERGE (p:Entity:Person {arena: $arena, email: $email})
|
|
295
|
-
ON CREATE SET p.created_at = $now,
|
|
296
|
-
p.first_seen = $occurred_at,
|
|
297
|
-
p.last_seen = $occurred_at
|
|
298
|
-
WITH p
|
|
299
|
-
MATCH (c:Chunk {arena: $arena, id: $cid})
|
|
300
|
-
MERGE (p)-[r:COMMUNICATED]->(c)
|
|
301
|
-
ON CREATE SET r.channel = $channel,
|
|
302
|
-
r.direction = $direction,
|
|
303
|
-
r.occurred_at = $occurred_at,
|
|
304
|
-
r.weight = 1.0
|
|
305
|
-
""",
|
|
306
|
-
arena=arena, email=email, cid=chunk_id(arena),
|
|
307
|
-
channel="email", direction="inbound",
|
|
308
|
-
occurred_at="2026-05-08T00:00:00Z", now="2026-05-08T00:00:00Z",
|
|
309
|
-
)
|
|
310
|
-
|
|
311
|
-
# Assert two distinct nodes, one per arena, both with the same email.
|
|
312
|
-
result = session.run(
|
|
313
|
-
"MATCH (p:Person) WHERE p.email = $email RETURN p.arena AS arena",
|
|
314
|
-
email=email,
|
|
315
|
-
)
|
|
316
|
-
seen = sorted(rec["arena"] for rec in result if rec["arena"] in (arena_a, arena_b))
|
|
317
|
-
assert seen == sorted([arena_a, arena_b]), (
|
|
318
|
-
f"expected exactly one Person per arena for the same email, got {seen!r}"
|
|
319
|
-
)
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
@_skip_no_neo4j
|
|
323
|
-
def test_arena_scoped_search_does_not_traverse_other_arena(neo4j_driver) -> None:
|
|
324
|
-
"""A graph search scoped to arena_a can't return arena_b nodes."""
|
|
325
|
-
driver, (arena_a, arena_b) = neo4j_driver
|
|
326
|
-
name = "Shared Concept"
|
|
327
|
-
|
|
328
|
-
with driver.session() as session:
|
|
329
|
-
for arena in (arena_a, arena_b):
|
|
330
|
-
session.run(
|
|
331
|
-
"""
|
|
332
|
-
MERGE (c:Chunk {id: $cid})
|
|
333
|
-
SET c.arena = $arena, c.text = 't', c.path = 'p',
|
|
334
|
-
c.created_at = '2026-05-08T00:00:00Z'
|
|
335
|
-
MERGE (e:Entity:Concept {arena: $arena, name: $name})
|
|
336
|
-
ON CREATE SET e.weight = 1.0, e.created_at = '2026-05-08T00:00:00Z'
|
|
337
|
-
MERGE (e)-[:MENTIONS]->(c)
|
|
338
|
-
""",
|
|
339
|
-
cid=f"c_{arena}", arena=arena, name=name,
|
|
340
|
-
)
|
|
341
|
-
|
|
342
|
-
# Arena-scoped lookup as the engine search now does it.
|
|
343
|
-
result = session.run(
|
|
344
|
-
"""
|
|
345
|
-
MATCH (n:Entity {name: $name, arena: $arena})
|
|
346
|
-
RETURN n.arena AS arena, n.name AS name
|
|
347
|
-
""",
|
|
348
|
-
name=name, arena=arena_a,
|
|
349
|
-
)
|
|
350
|
-
rows = list(result)
|
|
351
|
-
assert len(rows) == 1
|
|
352
|
-
assert rows[0]["arena"] == arena_a, (
|
|
353
|
-
f"arena-scoped match returned wrong arena: {rows[0]['arena']!r}"
|
|
354
|
-
)
|
|
355
|
-
|
|
356
|
-
# And the unscoped query (intentionally cross-tenant) still
|
|
357
|
-
# finds two — proves the data is there, isolation is real.
|
|
358
|
-
result = session.run(
|
|
359
|
-
"MATCH (n:Entity {name: $name}) RETURN n.arena AS arena", name=name,
|
|
360
|
-
)
|
|
361
|
-
all_rows = sorted(
|
|
362
|
-
r["arena"] for r in result if r["arena"] in (arena_a, arena_b)
|
|
363
|
-
)
|
|
364
|
-
assert all_rows == sorted([arena_a, arena_b])
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
@_skip_no_neo4j
|
|
368
|
-
def test_forget_arena_b_leaves_arena_a_intact(neo4j_driver) -> None:
|
|
369
|
-
"""Tenant-scoped delete on arena_b doesn't affect arena_a."""
|
|
370
|
-
driver, (arena_a, arena_b) = neo4j_driver
|
|
371
|
-
|
|
372
|
-
with driver.session() as session:
|
|
373
|
-
for arena in (arena_a, arena_b):
|
|
374
|
-
session.run(
|
|
375
|
-
"""
|
|
376
|
-
MERGE (c:Chunk {id: $cid})
|
|
377
|
-
SET c.arena = $arena, c.text = 't', c.path = 'p',
|
|
378
|
-
c.created_at = '2026-05-08T00:00:00Z'
|
|
379
|
-
MERGE (e:Entity:Concept {arena: $arena, name: 'thing'})
|
|
380
|
-
ON CREATE SET e.weight = 1.0, e.created_at = '2026-05-08T00:00:00Z'
|
|
381
|
-
MERGE (e)-[:MENTIONS]->(c)
|
|
382
|
-
""",
|
|
383
|
-
cid=f"c_{arena}", arena=arena,
|
|
384
|
-
)
|
|
385
|
-
|
|
386
|
-
# Tenant-scoped delete: same shape as forget-internal.
|
|
387
|
-
session.run(
|
|
388
|
-
"MATCH (c:Chunk {arena: $arena}) DETACH DELETE c", arena=arena_b,
|
|
389
|
-
)
|
|
390
|
-
session.run(
|
|
391
|
-
"MATCH (e:Entity {arena: $arena}) DETACH DELETE e", arena=arena_b,
|
|
392
|
-
)
|
|
393
|
-
|
|
394
|
-
# arena_a still intact.
|
|
395
|
-
a_chunks = session.run(
|
|
396
|
-
"MATCH (c:Chunk {arena: $arena}) RETURN count(c) AS n", arena=arena_a,
|
|
397
|
-
).single()["n"]
|
|
398
|
-
a_entities = session.run(
|
|
399
|
-
"MATCH (e:Entity {arena: $arena}) RETURN count(e) AS n", arena=arena_a,
|
|
400
|
-
).single()["n"]
|
|
401
|
-
assert a_chunks >= 1
|
|
402
|
-
assert a_entities >= 1
|
|
403
|
-
|
|
404
|
-
# arena_b gone.
|
|
405
|
-
b_chunks = session.run(
|
|
406
|
-
"MATCH (c:Chunk {arena: $arena}) RETURN count(c) AS n", arena=arena_b,
|
|
407
|
-
).single()["n"]
|
|
408
|
-
b_entities = session.run(
|
|
409
|
-
"MATCH (e:Entity {arena: $arena}) RETURN count(e) AS n", arena=arena_b,
|
|
410
|
-
).single()["n"]
|
|
411
|
-
assert b_chunks == 0
|
|
412
|
-
assert b_entities == 0
|