npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.8.0 → 0.8.2 - Mend

@pentatonic-ai/ai-agent-sdk 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/packages/memory-engine/scripts/wipe-legacy-l3-entities.py ADDED Viewed

@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+"""Wipe pre-arena-scoping :Entity nodes from the L3 Neo4j graph.
+Run this AFTER the engine has been deployed with the arena-scoped
+writer paths, not before. Sequence:
+    1. Deploy l2-hybridrag-proxy with arena-scoped MERGE patterns.
+    2. Verify new ingest is creating arena-tagged entities (run
+       --dry-run on this script first; it reports legacy vs new).
+    3. Run this script with --confirm to wipe legacy entities.
+    4. Future ingest re-extracts entities from existing :Chunk nodes
+       on-demand (search-side touches them; new stores recreate them
+       from scratch under the right arena).
+Why wipe vs. backfill: pre-fix entities collapsed cross-tenant by name,
+so their MENTIONS edges connect to chunks across multiple arenas.
+Splitting them by mentions is doable but error-prone (edge cases for
+many-arena entities, orphans, no-mention entities). The Hebbian weights
+on those edges were also corrupted by cross-tenant traffic, so they
+weren't worth saving. Chunks are preserved either way — re-extraction
+is cheap.
+Usage:
+    # report counts only
+    python wipe-legacy-l3-entities.py --neo4j-uri bolt://localhost:7687
+    # actually wipe
+    python wipe-legacy-l3-entities.py --neo4j-uri bolt://localhost:7687 --confirm
+The script is idempotent — running it twice on a clean graph deletes
+zero rows.
+"""
+from __future__ import annotations
+import argparse
+import os
+import sys
+try:
+    from neo4j import GraphDatabase
+except ImportError:
+    print("ERROR: neo4j driver not installed. `pip install neo4j` first.", file=sys.stderr)
+    sys.exit(1)
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__.split("\n\n")[0])
+    parser.add_argument(
+        "--neo4j-uri",
+        default=os.environ.get("NEO4J_URI", "bolt://localhost:7687"),
+        help="Neo4j bolt URI (default: NEO4J_URI env or bolt://localhost:7687)",
+    )
+    parser.add_argument(
+        "--neo4j-user",
+        default=os.environ.get("NEO4J_USER", "neo4j"),
+    )
+    parser.add_argument(
+        "--neo4j-password",
+        default=os.environ.get("NEO4J_PASSWORD"),
+    )
+    parser.add_argument(
+        "--confirm",
+        action="store_true",
+        help="Actually delete. Without this flag, runs in dry-run mode "
+        "(reports counts only).",
+    )
+    args = parser.parse_args()
+    if not args.neo4j_password:
+        print("ERROR: --neo4j-password or NEO4J_PASSWORD env required", file=sys.stderr)
+        return 2
+    driver = GraphDatabase.driver(
+        args.neo4j_uri, auth=(args.neo4j_user, args.neo4j_password),
+    )
+    try:
+        with driver.session() as session:
+            # Count legacy vs arena-scoped entities so the operator can
+            # eyeball whether the new writer path has actually started
+            # producing arena-scoped rows before deleting anything.
+            legacy = session.run(
+                "MATCH (e:Entity) WHERE e.arena IS NULL RETURN count(e) AS n"
+            ).single()["n"]
+            scoped = session.run(
+                "MATCH (e:Entity) WHERE e.arena IS NOT NULL RETURN count(e) AS n"
+            ).single()["n"]
+            print(f"L3 Entity nodes: {legacy} legacy (no arena) / {scoped} arena-scoped")
+            # Same breakdown for chunks — they should already all be
+            # arena-tagged but worth verifying before/after.
+            chunk_legacy = session.run(
+                "MATCH (c:Chunk) WHERE c.arena IS NULL RETURN count(c) AS n"
+            ).single()["n"]
+            chunk_scoped = session.run(
+                "MATCH (c:Chunk) WHERE c.arena IS NOT NULL RETURN count(c) AS n"
+            ).single()["n"]
+            print(f"L3 Chunk nodes:  {chunk_legacy} legacy (no arena) / {chunk_scoped} arena-scoped")
+            if not args.confirm:
+                print("\nDry run — pass --confirm to actually delete legacy entities.")
+                return 0
+            if legacy == 0:
+                print("\nNothing to do — all entities already arena-scoped.")
+                return 0
+            print(f"\nWiping {legacy} legacy entities…")
+            result = session.run(
+                "MATCH (e:Entity) WHERE e.arena IS NULL DETACH DELETE e RETURN count(e) AS n"
+            ).single()
+            deleted = result["n"]
+            print(f"Deleted {deleted} legacy entities.")
+            # Drop the old single-key entity index now that the data
+            # is gone — safe to do because the new writer path uses
+            # the compound (arena, name) index.
+            try:
+                session.run("DROP INDEX entity_name IF EXISTS")
+                print("Dropped legacy `entity_name` index.")
+            except Exception as e:  # noqa: BLE001
+                print(f"Note: could not drop entity_name index ({e}); ok if absent.")
+        return 0
+    finally:
+        driver.close()
+if __name__ == "__main__":
+    sys.exit(main())

package/packages/memory-engine/tests/e2e_arena.sh CHANGED Viewed

@@ -217,16 +217,40 @@ print("yes" if any("Eclipse" in r.get("content","") for r in data) else "no")')
   || fail "tenant-y lost the shared phrase (id collision?)"
 # ---------------------------------------------------------------------------
-# /forget — by metadata_contains. Cleans up so reruns are idempotent.
+# /forget — tenant-scoped delete by arena.
+#
+# Pre-arena, /forget would unconditionally trigger a global wipe of
+# L0+L4+L3 inside the shim, regardless of what filters were passed. The
+# smoke test happily asserted "deleted >= 1" because the shim returned
+# the count of every row globally, even rows the caller didn't ask to
+# remove. That was the right thing for a bench reset, the wrong thing
+# for any real tenant operation.
+#
+# Now /forget honours the arena in the metadata filter:
+#   - {metadata_contains: {arena: <tenant>}}     → tenant-scoped wipe
+#   - {metadata_contains: {<other>: <val>}}      → L6 targeted only
+#   - {} (empty body)                            → bench reset (gated
+#                                                  by GLOBAL_WIPE in the
+#                                                  internal endpoint)
+#
+# We assert here that wiping arena=e2e-tenant-a actually removes ≥1 row,
+# AND that arena=e2e-tenant-b's rows survived — proves real tenant
+# isolation rather than just "shim returned non-zero".
 # ---------------------------------------------------------------------------
 echo ""
-echo "=== /forget probe=e2e-arena ==="
+echo "=== /forget arena=e2e-tenant-a ==="
 F=$(curl -sf -X POST "$BASE/forget" -H "Content-Type: application/json" \
-  -d '{"metadata_contains":{"probe":"e2e-arena"}}')
+  -d '{"metadata_contains":{"arena":"e2e-tenant-a"}}')
 deleted=$(echo "$F" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("deleted",0))')
 echo "  deleted: $deleted"
-[ "$deleted" -ge "1" ] && ok "/forget removed at least 1 row" || fail "/forget"
+[ "$deleted" -ge "1" ] && ok "/forget removed tenant-a rows" || fail "/forget tenant-a wipe"
+# tenant-b should still have its rows.
+B=$(curl -sf -X POST "$BASE/search" -H "Content-Type: application/json" \
+  -d '{"arena":"e2e-tenant-b","query":"Borealis","limit":4}')
+b_hits=$(echo "$B" | python3 -c 'import json,sys; print(len(json.load(sys.stdin).get("results",[])))')
+[ "$b_hits" -ge "1" ] && ok "/forget left tenant-b intact" || fail "/forget leaked into tenant-b"
 echo ""
 echo "=== Result ==="

package/packages/memory-engine/tests/test_aggregate.py ADDED Viewed

@@ -0,0 +1,333 @@
+"""Tests for the /aggregate endpoint and its L2 proxy backend.
+Two flavours:
+  - Pure-unit tests around the request validation + group_by
+    whitelisting logic. Hermetic, fast, no Neo4j needed. Run on
+    every PR.
+  - Neo4j-backed integration tests (run when ``NEO4J_TEST_URI`` and
+    ``NEO4J_TEST_PASSWORD`` env vars are set). These prove the
+    aggregate Cypher actually returns the right buckets for typed-
+    Person + COMMUNICATED graphs and stays inside the caller's
+    arena.
+Run:
+    cd packages/memory-engine
+    .venv/bin/python -m pytest tests/test_aggregate.py -v
+Run with Neo4j:
+    NEO4J_TEST_URI=bolt://localhost:17687 \\
+    NEO4J_TEST_PASSWORD=testpassword \\
+    .venv/bin/python -m pytest tests/test_aggregate.py -v
+"""
+from __future__ import annotations
+import os
+import sys
+import uuid
+from pathlib import Path
+import pytest
+ENGINE_ROOT = Path(__file__).resolve().parent.parent / "engine" / "services" / "l2"
+sys.path.insert(0, str(ENGINE_ROOT))
+# ---------------------------------------------------------------------------
+# Unit tests — validation surface around the public shape.
+# ---------------------------------------------------------------------------
+def test_group_by_keys_whitelist_keeps_supported_keys_in_order() -> None:
+    """The L2 proxy templates group_by keys directly into Cypher; the
+    whitelist is the safety rail. Pin its contents + ordering
+    behaviour so a future change can't accidentally accept arbitrary
+    property names."""
+    # Import lazily so the unit tests don't pull pymilvus etc.
+    import importlib.util
+    spec = importlib.util.spec_from_file_location(
+        "l2_proxy_module",
+        ENGINE_ROOT / "l2-hybridrag-proxy.py",
+    )
+    assert spec and spec.loader
+    # The module imports fastapi/neo4j/etc. at import time. Skip when
+    # those aren't available — the whitelist is also asserted via the
+    # integration tests.
+    try:
+        mod = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(mod)
+    except ImportError:
+        pytest.skip("l2 proxy deps unavailable in this venv (fine for unit-only runs)")
+    # Public contract: only channel + direction are supported today.
+    # Adding more is a deliberate decision; this assertion is a guard
+    # against adding without thinking.
+    assert mod._AGGREGATE_GROUP_BY_KEYS == {"channel", "direction"}
+# ---------------------------------------------------------------------------
+# Neo4j-backed integration tests.
+# ---------------------------------------------------------------------------
+_NEO4J_URI = os.environ.get("NEO4J_TEST_URI")
+_NEO4J_USER = os.environ.get("NEO4J_TEST_USER", "neo4j")
+_NEO4J_PASSWORD = os.environ.get("NEO4J_TEST_PASSWORD")
+_skip_no_neo4j = pytest.mark.skipif(
+    not (_NEO4J_URI and _NEO4J_PASSWORD),
+    reason="set NEO4J_TEST_URI + NEO4J_TEST_PASSWORD to run integration tests",
+)
+@pytest.fixture
+def neo4j_driver():
+    """Open a Neo4j driver and clean test data on teardown.
+    Uses a randomised arena pair so concurrent test runs don't trample
+    each other; tears down by deleting nodes scoped to those arenas
+    (never a global wipe — must stay safe against a populated dev
+    database).
+    """
+    from neo4j import GraphDatabase
+    driver = GraphDatabase.driver(_NEO4J_URI, auth=(_NEO4J_USER, _NEO4J_PASSWORD))
+    arenas = [f"agg_a_{uuid.uuid4().hex[:8]}", f"agg_b_{uuid.uuid4().hex[:8]}"]
+    yield driver, arenas
+    with driver.session() as session:
+        for arena in arenas:
+            session.run(
+                "MATCH (n) WHERE n.arena = $arena DETACH DELETE n",
+                arena=arena,
+            )
+    driver.close()
+def _seed_person_communications(
+    session,
+    arena: str,
+    email: str,
+    edges: list[tuple[str, str, str]],
+) -> None:
+    """Materialise (:Person {arena, email})-[:COMMUNICATED {channel,
+    direction, occurred_at}]->(:Chunk {arena}) edges from a list of
+    (channel, direction, occurred_at) tuples. Mirrors the exact shape
+    that engine/services/l2/l2-hybridrag-proxy.py:_index-internal-batch
+    writes on STORE_MEMORY ingest."""
+    for i, (channel, direction, occurred_at) in enumerate(edges):
+        session.run(
+            """
+            MERGE (c:Chunk {id: $cid})
+            SET c.arena = $arena, c.text = 't', c.path = 'p',
+                c.created_at = $occurred_at
+            MERGE (p:Entity:Person {arena: $arena, email: $email})
+            ON CREATE SET p.created_at = $occurred_at
+            MERGE (p)-[r:COMMUNICATED]->(c)
+            ON CREATE SET r.channel = $channel,
+                          r.direction = $direction,
+                          r.occurred_at = $occurred_at,
+                          r.weight = 1.0
+            """,
+            cid=f"chunk_{arena}_{i}", arena=arena, email=email,
+            channel=channel, direction=direction, occurred_at=occurred_at,
+        )
+def _aggregate_request(
+    session,
+    arena: str,
+    contact_email: str,
+    group_by: list[str] | None = None,
+) -> dict:
+    """Run the same Cypher that /aggregate-internal would. Built
+    inline so tests stay free of FastAPI plumbing."""
+    # Mirror the real endpoint: an explicit empty list means "no
+    # group_by" (one global bucket), None means default to channel.
+    requested = ["channel"] if group_by is None else group_by
+    safe_group_by = []
+    seen = set()
+    for k in requested:
+        if k in {"channel", "direction"} and k not in seen:
+            seen.add(k)
+            safe_group_by.append(k)
+    if safe_group_by:
+        with_keys = ", ".join(f"r.{k} AS {k}" for k in safe_group_by)
+        return_keys = ", ".join(safe_group_by)
+        cypher = (
+            "MATCH (p:Person {arena: $arena})-[r:COMMUNICATED]->(c:Chunk {arena: $arena})\n"
+            "WHERE p.email = $contact_email\n"
+            f"WITH {with_keys}, r.direction AS _direction, r.occurred_at AS _occurred_at\n"
+            f"RETURN {return_keys},\n"
+            "count(*) AS count,\n"
+            "sum(CASE WHEN _direction = 'inbound' THEN 1 ELSE 0 END) AS inbound,\n"
+            "sum(CASE WHEN _direction = 'outbound' THEN 1 ELSE 0 END) AS outbound,\n"
+            "max(_occurred_at) AS last_seen,\n"
+            "min(_occurred_at) AS first_seen\n"
+            "ORDER BY count DESC\n"
+        )
+    else:
+        cypher = (
+            "MATCH (p:Person {arena: $arena})-[r:COMMUNICATED]->(c:Chunk {arena: $arena})\n"
+            "WHERE p.email = $contact_email\n"
+            "WITH r.direction AS _direction, r.occurred_at AS _occurred_at\n"
+            "RETURN count(*) AS count,\n"
+            "sum(CASE WHEN _direction = 'inbound' THEN 1 ELSE 0 END) AS inbound,\n"
+            "sum(CASE WHEN _direction = 'outbound' THEN 1 ELSE 0 END) AS outbound,\n"
+            "max(_occurred_at) AS last_seen,\n"
+            "min(_occurred_at) AS first_seen\n"
+        )
+    buckets = []
+    total = 0
+    latest = None
+    for rec in session.run(cypher, arena=arena, contact_email=contact_email):
+        count = int(rec["count"] or 0)
+        total += count
+        last_seen = rec["last_seen"]
+        if last_seen and (latest is None or str(last_seen) > latest):
+            latest = str(last_seen)
+        keys = {k: rec[k] for k in safe_group_by} if safe_group_by else {}
+        buckets.append({
+            "keys": keys,
+            "count": count,
+            "inbound": int(rec["inbound"] or 0),
+            "outbound": int(rec["outbound"] or 0),
+            "last_seen": str(last_seen) if last_seen else None,
+            "first_seen": str(rec["first_seen"]) if rec["first_seen"] else None,
+        })
+    return {"arena": arena, "total": total, "last_seen": latest, "buckets": buckets}
+@_skip_no_neo4j
+def test_aggregate_groups_by_channel_with_direction_counters(neo4j_driver) -> None:
+    """5 emails (3 inbound, 2 outbound) + 2 slack (1/1) yields two
+    buckets ordered by count, each with correct inbound/outbound
+    splits and time bounds."""
+    driver, (arena_a, _) = neo4j_driver
+    email = "alex.tong@pentatonic.com"
+    with driver.session() as session:
+        _seed_person_communications(
+            session, arena_a, email,
+            [
+                ("email", "inbound", "2026-05-08T09:00:00Z"),
+                ("email", "outbound", "2026-05-09T10:00:00Z"),
+                ("email", "inbound", "2026-05-07T08:00:00Z"),
+                ("email", "outbound", "2026-05-09T11:00:00Z"),
+                ("email", "inbound", "2026-05-06T12:00:00Z"),
+                ("slack", "inbound", "2026-05-09T15:00:00Z"),
+                ("slack", "outbound", "2026-05-08T11:00:00Z"),
+            ],
+        )
+        out = _aggregate_request(session, arena_a, email, ["channel"])
+        assert out["total"] == 7
+        assert out["last_seen"] == "2026-05-09T15:00:00Z"
+        assert len(out["buckets"]) == 2
+        # Busiest first (5 > 2).
+        assert out["buckets"][0]["keys"] == {"channel": "email"}
+        assert out["buckets"][0]["count"] == 5
+        assert out["buckets"][0]["inbound"] == 3
+        assert out["buckets"][0]["outbound"] == 2
+        assert out["buckets"][1]["keys"] == {"channel": "slack"}
+        assert out["buckets"][1]["count"] == 2
+        assert out["buckets"][1]["inbound"] == 1
+        assert out["buckets"][1]["outbound"] == 1
+@_skip_no_neo4j
+def test_aggregate_arena_isolation(neo4j_driver) -> None:
+    """A's aggregate never sees B's edges, even when both arenas have
+    a Person with the same email — the shared name was the bug
+    pattern that motivated the typed-Person work in #28."""
+    driver, (arena_a, arena_b) = neo4j_driver
+    email = "shared@example.com"
+    with driver.session() as session:
+        _seed_person_communications(
+            session, arena_a, email,
+            [("email", "inbound", "2026-05-09T09:00:00Z")],
+        )
+        _seed_person_communications(
+            session, arena_b, email,
+            [
+                ("email", "inbound", "2026-05-09T10:00:00Z"),
+                ("slack", "outbound", "2026-05-09T11:00:00Z"),
+            ],
+        )
+        out_a = _aggregate_request(session, arena_a, email)
+        assert out_a["total"] == 1
+        assert len(out_a["buckets"]) == 1
+        out_b = _aggregate_request(session, arena_b, email)
+        assert out_b["total"] == 2
+        assert len(out_b["buckets"]) == 2
+@_skip_no_neo4j
+def test_aggregate_with_no_group_by_returns_single_bucket(neo4j_driver) -> None:
+    """No group_by → one global bucket with overall totals only."""
+    driver, (arena_a, _) = neo4j_driver
+    email = "alex@x.io"
+    with driver.session() as session:
+        _seed_person_communications(
+            session, arena_a, email,
+            [
+                ("email", "inbound", "2026-05-09T09:00:00Z"),
+                ("slack", "outbound", "2026-05-09T11:00:00Z"),
+                ("email", "outbound", "2026-05-09T15:00:00Z"),
+            ],
+        )
+        out = _aggregate_request(session, arena_a, email, group_by=[])
+        assert out["total"] == 3
+        assert len(out["buckets"]) == 1
+        assert out["buckets"][0]["keys"] == {}
+        assert out["buckets"][0]["inbound"] == 1
+        assert out["buckets"][0]["outbound"] == 2
+@_skip_no_neo4j
+def test_aggregate_returns_empty_when_person_node_missing(neo4j_driver) -> None:
+    """When the typed-Person nodes don't exist for this contact yet
+    (older memories, tenants pre-#28), the response is total: 0 with
+    no buckets — caller falls back to whatever it had before. This is
+    the deliberate non-fallback at this layer; the over-fetch v1
+    lives in TES."""
+    driver, (arena_a, _) = neo4j_driver
+    out = _aggregate_request(
+        driver.session().__enter__(), arena_a, "no-one@example.com",
+    )
+    assert out["total"] == 0
+    assert out["buckets"] == []
+@_skip_no_neo4j
+def test_aggregate_unknown_group_by_keys_are_silently_dropped(neo4j_driver) -> None:
+    """Defence-in-depth: even if a future caller passes
+    group_by=['evil_property_name'], we never template that string
+    into Cypher. Aggregate falls back to no group_by (one global
+    bucket) rather than failing — the whitelist is already on the
+    real endpoint, but the test pins the behaviour at the helper
+    level too."""
+    driver, (arena_a, _) = neo4j_driver
+    email = "alex@x.io"
+    with driver.session() as session:
+        _seed_person_communications(
+            session, arena_a, email,
+            [("email", "inbound", "2026-05-09T09:00:00Z")],
+        )
+        out = _aggregate_request(session, arena_a, email, ["evil; DROP TABLE"])
+        # No supported keys remained → single global bucket.
+        assert len(out["buckets"]) == 1
+        assert out["buckets"][0]["keys"] == {}