npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.7.13 → 0.8.1 - Mend

@pentatonic-ai/ai-agent-sdk 0.7.13 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/packages/memory-engine/tests/test_aggregate.py ADDED Viewed

@@ -0,0 +1,333 @@
+"""Tests for the /aggregate endpoint and its L2 proxy backend.
+Two flavours:
+  - Pure-unit tests around the request validation + group_by
+    whitelisting logic. Hermetic, fast, no Neo4j needed. Run on
+    every PR.
+  - Neo4j-backed integration tests (run when ``NEO4J_TEST_URI`` and
+    ``NEO4J_TEST_PASSWORD`` env vars are set). These prove the
+    aggregate Cypher actually returns the right buckets for typed-
+    Person + COMMUNICATED graphs and stays inside the caller's
+    arena.
+Run:
+    cd packages/memory-engine
+    .venv/bin/python -m pytest tests/test_aggregate.py -v
+Run with Neo4j:
+    NEO4J_TEST_URI=bolt://localhost:17687 \\
+    NEO4J_TEST_PASSWORD=testpassword \\
+    .venv/bin/python -m pytest tests/test_aggregate.py -v
+"""
+from __future__ import annotations
+import os
+import sys
+import uuid
+from pathlib import Path
+import pytest
+ENGINE_ROOT = Path(__file__).resolve().parent.parent / "engine" / "services" / "l2"
+sys.path.insert(0, str(ENGINE_ROOT))
+# ---------------------------------------------------------------------------
+# Unit tests — validation surface around the public shape.
+# ---------------------------------------------------------------------------
+def test_group_by_keys_whitelist_keeps_supported_keys_in_order() -> None:
+    """The L2 proxy templates group_by keys directly into Cypher; the
+    whitelist is the safety rail. Pin its contents + ordering
+    behaviour so a future change can't accidentally accept arbitrary
+    property names."""
+    # Import lazily so the unit tests don't pull pymilvus etc.
+    import importlib.util
+    spec = importlib.util.spec_from_file_location(
+        "l2_proxy_module",
+        ENGINE_ROOT / "l2-hybridrag-proxy.py",
+    )
+    assert spec and spec.loader
+    # The module imports fastapi/neo4j/etc. at import time. Skip when
+    # those aren't available — the whitelist is also asserted via the
+    # integration tests.
+    try:
+        mod = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(mod)
+    except ImportError:
+        pytest.skip("l2 proxy deps unavailable in this venv (fine for unit-only runs)")
+    # Public contract: only channel + direction are supported today.
+    # Adding more is a deliberate decision; this assertion is a guard
+    # against adding without thinking.
+    assert mod._AGGREGATE_GROUP_BY_KEYS == {"channel", "direction"}
+# ---------------------------------------------------------------------------
+# Neo4j-backed integration tests.
+# ---------------------------------------------------------------------------
+_NEO4J_URI = os.environ.get("NEO4J_TEST_URI")
+_NEO4J_USER = os.environ.get("NEO4J_TEST_USER", "neo4j")
+_NEO4J_PASSWORD = os.environ.get("NEO4J_TEST_PASSWORD")
+_skip_no_neo4j = pytest.mark.skipif(
+    not (_NEO4J_URI and _NEO4J_PASSWORD),
+    reason="set NEO4J_TEST_URI + NEO4J_TEST_PASSWORD to run integration tests",
+)
+@pytest.fixture
+def neo4j_driver():
+    """Open a Neo4j driver and clean test data on teardown.
+    Uses a randomised arena pair so concurrent test runs don't trample
+    each other; tears down by deleting nodes scoped to those arenas
+    (never a global wipe — must stay safe against a populated dev
+    database).
+    """
+    from neo4j import GraphDatabase
+    driver = GraphDatabase.driver(_NEO4J_URI, auth=(_NEO4J_USER, _NEO4J_PASSWORD))
+    arenas = [f"agg_a_{uuid.uuid4().hex[:8]}", f"agg_b_{uuid.uuid4().hex[:8]}"]
+    yield driver, arenas
+    with driver.session() as session:
+        for arena in arenas:
+            session.run(
+                "MATCH (n) WHERE n.arena = $arena DETACH DELETE n",
+                arena=arena,
+            )
+    driver.close()
+def _seed_person_communications(
+    session,
+    arena: str,
+    email: str,
+    edges: list[tuple[str, str, str]],
+) -> None:
+    """Materialise (:Person {arena, email})-[:COMMUNICATED {channel,
+    direction, occurred_at}]->(:Chunk {arena}) edges from a list of
+    (channel, direction, occurred_at) tuples. Mirrors the exact shape
+    that engine/services/l2/l2-hybridrag-proxy.py:_index-internal-batch
+    writes on STORE_MEMORY ingest."""
+    for i, (channel, direction, occurred_at) in enumerate(edges):
+        session.run(
+            """
+            MERGE (c:Chunk {id: $cid})
+            SET c.arena = $arena, c.text = 't', c.path = 'p',
+                c.created_at = $occurred_at
+            MERGE (p:Entity:Person {arena: $arena, email: $email})
+            ON CREATE SET p.created_at = $occurred_at
+            MERGE (p)-[r:COMMUNICATED]->(c)
+            ON CREATE SET r.channel = $channel,
+                          r.direction = $direction,
+                          r.occurred_at = $occurred_at,
+                          r.weight = 1.0
+            """,
+            cid=f"chunk_{arena}_{i}", arena=arena, email=email,
+            channel=channel, direction=direction, occurred_at=occurred_at,
+        )
+def _aggregate_request(
+    session,
+    arena: str,
+    contact_email: str,
+    group_by: list[str] | None = None,
+) -> dict:
+    """Run the same Cypher that /aggregate-internal would. Built
+    inline so tests stay free of FastAPI plumbing."""
+    # Mirror the real endpoint: an explicit empty list means "no
+    # group_by" (one global bucket), None means default to channel.
+    requested = ["channel"] if group_by is None else group_by
+    safe_group_by = []
+    seen = set()
+    for k in requested:
+        if k in {"channel", "direction"} and k not in seen:
+            seen.add(k)
+            safe_group_by.append(k)
+    if safe_group_by:
+        with_keys = ", ".join(f"r.{k} AS {k}" for k in safe_group_by)
+        return_keys = ", ".join(safe_group_by)
+        cypher = (
+            "MATCH (p:Person {arena: $arena})-[r:COMMUNICATED]->(c:Chunk {arena: $arena})\n"
+            "WHERE p.email = $contact_email\n"
+            f"WITH {with_keys}, r.direction AS _direction, r.occurred_at AS _occurred_at\n"
+            f"RETURN {return_keys},\n"
+            "count(*) AS count,\n"
+            "sum(CASE WHEN _direction = 'inbound' THEN 1 ELSE 0 END) AS inbound,\n"
+            "sum(CASE WHEN _direction = 'outbound' THEN 1 ELSE 0 END) AS outbound,\n"
+            "max(_occurred_at) AS last_seen,\n"
+            "min(_occurred_at) AS first_seen\n"
+            "ORDER BY count DESC\n"
+        )
+    else:
+        cypher = (
+            "MATCH (p:Person {arena: $arena})-[r:COMMUNICATED]->(c:Chunk {arena: $arena})\n"
+            "WHERE p.email = $contact_email\n"
+            "WITH r.direction AS _direction, r.occurred_at AS _occurred_at\n"
+            "RETURN count(*) AS count,\n"
+            "sum(CASE WHEN _direction = 'inbound' THEN 1 ELSE 0 END) AS inbound,\n"
+            "sum(CASE WHEN _direction = 'outbound' THEN 1 ELSE 0 END) AS outbound,\n"
+            "max(_occurred_at) AS last_seen,\n"
+            "min(_occurred_at) AS first_seen\n"
+        )
+    buckets = []
+    total = 0
+    latest = None
+    for rec in session.run(cypher, arena=arena, contact_email=contact_email):
+        count = int(rec["count"] or 0)
+        total += count
+        last_seen = rec["last_seen"]
+        if last_seen and (latest is None or str(last_seen) > latest):
+            latest = str(last_seen)
+        keys = {k: rec[k] for k in safe_group_by} if safe_group_by else {}
+        buckets.append({
+            "keys": keys,
+            "count": count,
+            "inbound": int(rec["inbound"] or 0),
+            "outbound": int(rec["outbound"] or 0),
+            "last_seen": str(last_seen) if last_seen else None,
+            "first_seen": str(rec["first_seen"]) if rec["first_seen"] else None,
+        })
+    return {"arena": arena, "total": total, "last_seen": latest, "buckets": buckets}
+@_skip_no_neo4j
+def test_aggregate_groups_by_channel_with_direction_counters(neo4j_driver) -> None:
+    """5 emails (3 inbound, 2 outbound) + 2 slack (1/1) yields two
+    buckets ordered by count, each with correct inbound/outbound
+    splits and time bounds."""
+    driver, (arena_a, _) = neo4j_driver
+    email = "alex.tong@pentatonic.com"
+    with driver.session() as session:
+        _seed_person_communications(
+            session, arena_a, email,
+            [
+                ("email", "inbound", "2026-05-08T09:00:00Z"),
+                ("email", "outbound", "2026-05-09T10:00:00Z"),
+                ("email", "inbound", "2026-05-07T08:00:00Z"),
+                ("email", "outbound", "2026-05-09T11:00:00Z"),
+                ("email", "inbound", "2026-05-06T12:00:00Z"),
+                ("slack", "inbound", "2026-05-09T15:00:00Z"),
+                ("slack", "outbound", "2026-05-08T11:00:00Z"),
+            ],
+        )
+        out = _aggregate_request(session, arena_a, email, ["channel"])
+        assert out["total"] == 7
+        assert out["last_seen"] == "2026-05-09T15:00:00Z"
+        assert len(out["buckets"]) == 2
+        # Busiest first (5 > 2).
+        assert out["buckets"][0]["keys"] == {"channel": "email"}
+        assert out["buckets"][0]["count"] == 5
+        assert out["buckets"][0]["inbound"] == 3
+        assert out["buckets"][0]["outbound"] == 2
+        assert out["buckets"][1]["keys"] == {"channel": "slack"}
+        assert out["buckets"][1]["count"] == 2
+        assert out["buckets"][1]["inbound"] == 1
+        assert out["buckets"][1]["outbound"] == 1
+@_skip_no_neo4j
+def test_aggregate_arena_isolation(neo4j_driver) -> None:
+    """A's aggregate never sees B's edges, even when both arenas have
+    a Person with the same email — the shared name was the bug
+    pattern that motivated the typed-Person work in #28."""
+    driver, (arena_a, arena_b) = neo4j_driver
+    email = "shared@example.com"
+    with driver.session() as session:
+        _seed_person_communications(
+            session, arena_a, email,
+            [("email", "inbound", "2026-05-09T09:00:00Z")],
+        )
+        _seed_person_communications(
+            session, arena_b, email,
+            [
+                ("email", "inbound", "2026-05-09T10:00:00Z"),
+                ("slack", "outbound", "2026-05-09T11:00:00Z"),
+            ],
+        )
+        out_a = _aggregate_request(session, arena_a, email)
+        assert out_a["total"] == 1
+        assert len(out_a["buckets"]) == 1
+        out_b = _aggregate_request(session, arena_b, email)
+        assert out_b["total"] == 2
+        assert len(out_b["buckets"]) == 2
+@_skip_no_neo4j
+def test_aggregate_with_no_group_by_returns_single_bucket(neo4j_driver) -> None:
+    """No group_by → one global bucket with overall totals only."""
+    driver, (arena_a, _) = neo4j_driver
+    email = "alex@x.io"
+    with driver.session() as session:
+        _seed_person_communications(
+            session, arena_a, email,
+            [
+                ("email", "inbound", "2026-05-09T09:00:00Z"),
+                ("slack", "outbound", "2026-05-09T11:00:00Z"),
+                ("email", "outbound", "2026-05-09T15:00:00Z"),
+            ],
+        )
+        out = _aggregate_request(session, arena_a, email, group_by=[])
+        assert out["total"] == 3
+        assert len(out["buckets"]) == 1
+        assert out["buckets"][0]["keys"] == {}
+        assert out["buckets"][0]["inbound"] == 1
+        assert out["buckets"][0]["outbound"] == 2
+@_skip_no_neo4j
+def test_aggregate_returns_empty_when_person_node_missing(neo4j_driver) -> None:
+    """When the typed-Person nodes don't exist for this contact yet
+    (older memories, tenants pre-#28), the response is total: 0 with
+    no buckets — caller falls back to whatever it had before. This is
+    the deliberate non-fallback at this layer; the over-fetch v1
+    lives in TES."""
+    driver, (arena_a, _) = neo4j_driver
+    out = _aggregate_request(
+        driver.session().__enter__(), arena_a, "no-one@example.com",
+    )
+    assert out["total"] == 0
+    assert out["buckets"] == []
+@_skip_no_neo4j
+def test_aggregate_unknown_group_by_keys_are_silently_dropped(neo4j_driver) -> None:
+    """Defence-in-depth: even if a future caller passes
+    group_by=['evil_property_name'], we never template that string
+    into Cypher. Aggregate falls back to no group_by (one global
+    bucket) rather than failing — the whitelist is already on the
+    real endpoint, but the test pins the behaviour at the helper
+    level too."""
+    driver, (arena_a, _) = neo4j_driver
+    email = "alex@x.io"
+    with driver.session() as session:
+        _seed_person_communications(
+            session, arena_a, email,
+            [("email", "inbound", "2026-05-09T09:00:00Z")],
+        )
+        out = _aggregate_request(session, arena_a, email, ["evil; DROP TABLE"])
+        # No supported keys remained → single global bucket.
+        assert len(out["buckets"]) == 1
+        assert out["buckets"][0]["keys"] == {}

package/packages/memory-engine/tests/test_arena_safety.py ADDED Viewed

@@ -0,0 +1,232 @@
+"""Static safety check: every Cypher node pattern that targets a
+tenant-data label must scope by `arena` — not just somewhere in the
+surrounding block, but on the same variable.
+Run with:
+    cd packages/memory-engine
+    python -m pytest tests/test_arena_safety.py -v
+How the check works:
+1. Walk the live engine source and pull out every Cypher block (both
+   triple-quoted strings and inline ``session.run("…")`` calls).
+2. For each block, find every node pattern that names one of the
+   tenant labels — patterns like ``(p:Person {...})`` or
+   ``(e:Entity:Concept {...})``.
+3. For each such pattern, the variable bound by that pattern (e.g.
+   ``p`` / ``e``) must be tied to ``arena`` somewhere in the block:
+   either inside the pattern's own property bag (``{arena: $arena,
+   …}``) or via a WHERE clause that references ``<var>.arena``.
+The earlier weaker version of this lint checked "the block contains
+the string `arena` *somewhere*", which let a Person MERGE without
+arena slip through if any neighbouring chunk-join in the same block
+referenced `arena`. The bug-day repro was injecting
+``MERGE (p:Entity:Person {email: $email})`` while the rest of the
+block kept ``MATCH (c:Chunk {arena: $arena, …})`` — block contained
+"arena", lint was happy, the Person node was global.
+If a future change introduces a Cypher pattern on these labels without
+arena (e.g. a debug helper that genuinely needs to span all tenants),
+allow-list it via ``_ALLOWED_NO_ARENA_REASONS`` with a justification.
+"""
+from __future__ import annotations
+import re
+from pathlib import Path
+import pytest
+REPO_ROOT = Path(__file__).resolve().parent.parent
+ENGINE_LIVE = REPO_ROOT / "engine" / "services" / "l2" / "l2-hybridrag-proxy.py"
+COMPAT_SHIM = REPO_ROOT / "compat" / "server.py"
+# Labels that carry tenant data. Any Cypher pattern naming these MUST
+# bind the variable to `arena` — either as a property in the pattern
+# itself or via a WHERE clause on the same variable.
+TENANT_LABELS = ("Entity", "Person", "Concept", "Channel", "Chunk", "ChannelStat")
+_LABEL_ALT = "|".join(TENANT_LABELS)
+# Triple-quoted strings.
+_TRIPLE_STRING = re.compile(
+    r'"""(?P<body>[^"]*?(?:"(?!"")[^"]*?)*?)"""',
+    re.DOTALL,
+)
+# Inline `session.run("…")` calls that aren't already in a triple-quote.
+_SINGLELINE_RUN = re.compile(
+    r'session\.run\(\s*"((?:[^"\\]|\\.)+)"',
+    re.MULTILINE,
+)
+# Anything that smells like Cypher inside a string literal.
+_OP_PATTERN = re.compile(r"\b(MERGE|MATCH|DETACH\s+DELETE)\b", re.IGNORECASE)
+# Node pattern: (var:Label1:Label2 {props})  or  (var:Label)
+# The var is optional in Cypher, but anonymous patterns can't carry a
+# WHERE clause anyway — flag them as unsafe unless the inline property
+# bag scopes by arena.
+_NODE_PATTERN = re.compile(
+    r"""
+    \(
+    \s*(?P<var>[A-Za-z_][A-Za-z0-9_]*)?       # optional variable
+    \s*(?P<labels>(?::(?:""" + _LABEL_ALT + r"""))+)\b   # one+ tenant labels
+    \s*(?P<props>\{[^{}]*\})?                  # optional property bag
+    """,
+    re.VERBOSE,
+)
+# Allow-list: cross-tenant Cypher that we deliberately want to keep.
+# Map a unique substring of the offending pattern to a justification.
+_ALLOWED_NO_ARENA_REASONS: dict[str, str] = {
+    # /index-internal-stats — global ops counters that return ints.
+    "MATCH (c:Chunk) RETURN count(c) AS n":
+        "ops counter — returns a single int, no tenant data exposed",
+    "MATCH (e:Entity) RETURN count(e) AS n":
+        "ops counter — returns a single int, no tenant data exposed",
+    # /forget-internal global-wipe path — gated by confirm: GLOBAL_WIPE.
+    "MATCH (c:Chunk) DETACH DELETE c RETURN count(c) AS n":
+        "global-wipe, gated by explicit confirm: GLOBAL_WIPE",
+    "MATCH (e:Entity) DETACH DELETE e RETURN count(e) AS n":
+        "global-wipe, gated by explicit confirm: GLOBAL_WIPE",
+    # Migration target — pre-arena legacy entities have no arena.
+    "MATCH (e:Entity) WHERE e.arena IS NULL DETACH DELETE e":
+        "legacy-wipe migration target (entities pre-arena scoping)",
+}
+# Cypher line comments. Strip these from extracted blocks before
+# running the tenant-label scan so that prose mentions of pattern
+# syntax (e.g. "Person-COMMUNICATED edges") inside `// …` comments
+# don't get parsed as real node patterns.
+_CYPHER_LINE_COMMENT = re.compile(r"//[^\n]*")
+def _strip_cypher_comments(block: str) -> str:
+    return _CYPHER_LINE_COMMENT.sub("", block)
+def _extract_cypher_blocks(source: str) -> list[tuple[int, str]]:
+    """Return [(approx_line_no, body)] for every Cypher block."""
+    blocks: list[tuple[int, str]] = []
+    for m in _TRIPLE_STRING.finditer(source):
+        body = m.group("body")
+        if _OP_PATTERN.search(body):
+            line_no = source[: m.start()].count("\n") + 1
+            # Strip Cypher // comments so prose patterns inside
+            # comments don't get scanned as actual queries.
+            blocks.append((line_no, _strip_cypher_comments(body)))
+    # session.run("…") matches are skipped if they fell inside a triple
+    # string already covered above. Cheap dedup: if the body of a
+    # singleline match is a substring of any triple body, skip it.
+    triple_bodies = [b for _, b in blocks]
+    for m in _SINGLELINE_RUN.finditer(source):
+        body = m.group(1)
+        if not _OP_PATTERN.search(body):
+            continue
+        if any(body in tb for tb in triple_bodies):
+            continue
+        line_no = source[: m.start()].count("\n") + 1
+        blocks.append((line_no, body))
+    return blocks
+def _is_allowed(block: str) -> str | None:
+    for fragment, reason in _ALLOWED_NO_ARENA_REASONS.items():
+        if fragment in block:
+            return reason
+    return None
+def _pattern_scopes_arena(block: str, var: str | None, props: str | None) -> bool:
+    """True if this specific pattern is arena-scoped.
+    A pattern is arena-scoped when EITHER:
+      - The inline property bag contains `arena:`, OR
+      - A `WHERE` clause in the surrounding block references
+        `<var>.arena`.
+    """
+    if props and re.search(r"\barena\s*:", props):
+        return True
+    if var is None:
+        # Anonymous pattern with no property bag — there's no way to
+        # scope it via WHERE since there's no var to reference.
+        return False
+    # Look for `<var>.arena` anywhere in the block. Crude but the
+    # variable name is unambiguous within a single Cypher block.
+    if re.search(rf"\b{re.escape(var)}\.arena\b", block):
+        return True
+    return False
+@pytest.mark.parametrize(
+    "source_path",
+    [
+        pytest.param(ENGINE_LIVE, id="l2-hybridrag-proxy"),
+        pytest.param(COMPAT_SHIM, id="compat-shim"),
+    ],
+)
+def test_every_tenant_pattern_is_arena_scoped(source_path: Path) -> None:
+    """Each tenant-label node pattern is scoped by arena."""
+    if not source_path.exists():
+        pytest.skip(f"{source_path} not present")
+    source = source_path.read_text()
+    offenders: list[str] = []
+    for line_no, block in _extract_cypher_blocks(source):
+        # Honour block-level allow-list before per-pattern checks; that
+        # way an entire global-wipe block can be allow-listed once.
+        if _is_allowed(block):
+            continue
+        for m in _NODE_PATTERN.finditer(block):
+            var = m.group("var")
+            props = m.group("props")
+            if _pattern_scopes_arena(block, var, props):
+                continue
+            offenders.append(
+                f"{source_path.name}:~{line_no}  pattern `{m.group(0).strip()}` "
+                f"in block:\n{block.strip()[:240]}"
+            )
+    assert not offenders, (
+        f"{len(offenders)} tenant-labelled Cypher pattern(s) miss arena scoping:\n\n"
+        + "\n\n---\n\n".join(offenders)
+        + "\n\nAdd `arena` to the pattern (e.g. `{arena: $arena, …}`) or to a "
+        "WHERE clause on the same variable. If the pattern genuinely needs "
+        "to span tenants, add an entry to _ALLOWED_NO_ARENA_REASONS with a "
+        "justification."
+    )
+# A self-test: the lint should fail when given a block that's clearly
+# unscoped. This guards against future refactors of the lint silently
+# turning into a no-op.
+def test_lint_self_test_catches_obvious_bug() -> None:
+    """Inject an unscoped pattern into a fake source and assert lint flags it."""
+    bad_source = '''
+def writer():
+    session.run("""
+        MERGE (p:Entity:Person {email: $email})
+        ON CREATE SET p.created_at = $now
+        MATCH (c:Chunk {arena: $arena, id: $cid})
+        MERGE (p)-[:MENTIONS]->(c)
+    """, email="x", arena="acme", cid="1", now="t")
+'''
+    blocks = _extract_cypher_blocks(bad_source)
+    assert blocks, "lint helper failed to extract the test block"
+    block = blocks[0][1]
+    assert not _is_allowed(block)
+    flagged: list[str] = []
+    for m in _NODE_PATTERN.finditer(block):
+        var = m.group("var")
+        props = m.group("props")
+        if not _pattern_scopes_arena(block, var, props):
+            flagged.append(m.group(0))
+    # The Person MERGE has no arena anywhere on `p` — must be flagged.
+    # The Chunk MATCH has arena in the property bag — must NOT be flagged.
+    assert any("Person" in f for f in flagged), \
+        "self-test: unscoped Person pattern should have been flagged"
+    assert not any("Chunk" in f for f in flagged), \
+        "self-test: arena-scoped Chunk pattern should not have been flagged"