npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.7.13 → 0.8.1 - Mend

@pentatonic-ai/ai-agent-sdk 0.7.13 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py CHANGED Viewed

@@ -17,6 +17,7 @@ import json
 import logging
 import os
 import sqlite3
+import sys
 import time
 from datetime import datetime
 from pathlib import Path
@@ -30,6 +31,10 @@ from neo4j.time import DateTime as Neo4jDateTime, Date as Neo4jDate
 from pydantic import BaseModel
 import uvicorn
+# Shared embed client lives at engine/services/_shared/.
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+from _shared.embed_provider import EmbedClient  # noqa: E402
 def _serialize_neo4j_value(v: Any) -> Any:
     """Convert neo4j-specific types to JSON-serialisable equivalents.
@@ -93,10 +98,27 @@ QMD_DB_PATH = _resolve_qmd_db()
 OLLAMA_URL = os.environ.get("PME_OLLAMA_URL", "http://localhost:11434/api/embeddings")
 EMBEDDING_MODEL = os.environ.get("PME_EMBED_MODEL", "nomic-embed-text")
-# NV-Embed-v2 service (primary, 4096-dim)
-NV_EMBED_URL = os.environ.get("PME_NV_EMBED_URL", "http://localhost:8041/v1/embeddings")
+# NV-Embed-v2 service (primary, 4096-dim). URL/auth/path/body/response are
+# managed by the shared EmbedClient; PME_EMBED_PROVIDER (default openai)
+# selects auth scheme (Bearer vs X-API-Key) and request shape.
 NV_EMBED_ENABLED = os.environ.get("PME_NV_EMBED_ENABLED", "true").lower() == "true"
+_embed: EmbedClient | None = None
+def _embed_client() -> EmbedClient:
+    """Lazily build the shared EmbedClient for L2."""
+    global _embed
+    if _embed is None:
+        _embed = EmbedClient.from_env(
+            prefix="PME_",
+            url_var="PME_NV_EMBED_URL",
+            key_var="PME_EMBED_API_KEY",
+            model_var="PME_NV_EMBED_MODEL",
+            default_url="http://localhost:8041/v1/embeddings",
+        )
+    return _embed
 # Sequential processing weights - OPTIMIZED FOR QUALITY
 GRAPH_PRIORITY_BOOST = 0.5  # Extra score for graph-derived results (↑ for better entity/relationship context)
 VECTOR_BASE_WEIGHT = 0.5     # Base weight for vector results (↓ balanced for accuracy over speed)
@@ -208,6 +230,12 @@ class ChatCompletionRequest(BaseModel):
     model: str = "gpt-3.5-turbo"
     max_tokens: int = 1000
     temperature: float = 0.1
+    # Optional tenant scope. When absent, the L3 graph layer returns no
+    # results (rather than walking the global graph) — other layers
+    # still respond, so the call succeeds but with reduced L3 context.
+    # Existing single-tenant callers (benchmarks, dev) keep working.
+    arena: Optional[str] = None
+    arenas: Optional[List[str]] = None
 class EmbeddingRequest(BaseModel):
     input: Any
@@ -239,9 +267,15 @@ def extract_query_entities(query: str) -> List[str]:
     log.info(f"Extracted entities: {potential_entities}")
     return potential_entities
-def _hebbian_strengthen(session, node_names: List[str], increment: float = 0.05) -> None:
-    """Hebbian: strengthen edges between co-accessed nodes during query."""
-    if len(node_names) < 2:
+def _hebbian_strengthen(session, arenas: List[str], node_names: List[str], increment: float = 0.05) -> None:
+    """Hebbian: strengthen edges between co-accessed nodes during query.
+    Scoped by arena so a search inside tenant A can't reinforce edges
+    inside tenant B's graph (which would happen via shared entity-name
+    nodes pre-arena). When `arenas` is empty (single-tenant local dev,
+    benchmarks) we no-op rather than risk a cross-tenant write.
+    """
+    if len(node_names) < 2 or not arenas:
         return
     now = datetime.utcnow().isoformat() + "Z"
     for i, n1 in enumerate(node_names):
@@ -249,16 +283,28 @@ def _hebbian_strengthen(session, node_names: List[str], increment: float = 0.05)
             try:
                 session.run(
                     """MATCH (a {name: $n1})-[r]-(b {name: $n2})
+                       WHERE a.arena IN $arenas AND b.arena IN $arenas
                        SET r.weight = coalesce(r.weight, 1.0) + $inc,
                            r.last_accessed = $now""",
-                    n1=n1, n2=n2, inc=increment, now=now
+                    n1=n1, n2=n2, arenas=arenas, inc=increment, now=now
                 )
             except Exception:
                 pass  # non-critical
-def search_neo4j_sequential(query: str, entities: List[str], limit: int = 12) -> Dict:
-    """Phase 1: Neo4j graph search with spreading activation + Hebbian."""
+def search_neo4j_sequential(query: str, entities: List[str], arenas: List[str], limit: int = 12) -> Dict:
+    """Phase 1: Neo4j graph search with spreading activation + Hebbian.
+    `arenas` is the tenant-scope set the caller is authorised for —
+    typically [clientId] or [clientId, clientId:userId]. Every Cypher
+    clause filters on `n.arena IN $arenas`, so a search from tenant A
+    can never traverse into entity nodes belonging to tenant B even
+    when their names collide. Empty `arenas` short-circuits to no
+    results — that's safer than walking the entire graph in dev/test.
+    """
+    if not arenas:
+        log.warning("search_neo4j_sequential called without arenas — returning empty results")
+        return {"results": [], "graph_entities": [], "entity_count": 0}
     try:
         driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
         results = []
@@ -267,19 +313,20 @@ def search_neo4j_sequential(query: str, entities: List[str], limit: int = 12) ->
         with driver.session() as session:
             # Search for specific entities — use weighted spreading activation
             for entity in entities:
-                # Direct match first
+                # Direct match first — arena-scoped on every node we touch.
                 cypher = """
                 MATCH (n)
-                WHERE n.name CONTAINS $entity
+                WHERE n.name CONTAINS $entity AND n.arena IN $arenas
                 OPTIONAL MATCH (n)-[r]-(connected)
-                WHERE coalesce(r.weight, 1.0) >= 0.2
+                WHERE connected.arena IN $arenas
+                  AND coalesce(r.weight, 1.0) >= 0.2
                 RETURN n, r, connected, $entity as search_entity,
                        coalesce(r.weight, 1.0) AS edge_weight
                 ORDER BY edge_weight DESC
                 LIMIT $limit
                 """
-                records = session.run(cypher, entity=entity, limit=8)
+                records = session.run(cypher, entity=entity, arenas=arenas, limit=8)
                 for record in records:
                     node = _serialize_neo4j_value(dict(record["n"]))
@@ -314,11 +361,17 @@ def search_neo4j_sequential(query: str, entities: List[str], limit: int = 12) ->
                         "node_data": node
                     })
-                # 2-hop spreading activation for high-weight paths
+                # 2-hop spreading activation for high-weight paths.
+                # Every node along the walk must be in-arena. Without
+                # the filter, an activation could walk into another
+                # tenant's graph via a name-collision on the start node.
                 if entity:
                     activation_results = session.run("""
                         MATCH (start)-[r1]-(mid)-[r2]-(end)
                         WHERE start.name CONTAINS $entity
+                          AND start.arena IN $arenas
+                          AND mid.arena IN $arenas
+                          AND end.arena IN $arenas
                           AND coalesce(r1.weight, 1.0) >= 0.5
                           AND coalesce(r2.weight, 1.0) >= 0.5
                           AND start <> end
@@ -327,7 +380,7 @@ def search_neo4j_sequential(query: str, entities: List[str], limit: int = 12) ->
                              mid.name AS via
                         ORDER BY activation DESC
                         LIMIT 5
-                    """, entity=entity)
+                    """, entity=entity, arenas=arenas)
                     for rec in activation_results:
                         end_node = _serialize_neo4j_value(dict(rec["end"])) if rec["end"] else {}
@@ -343,20 +396,24 @@ def search_neo4j_sequential(query: str, entities: List[str], limit: int = 12) ->
                                 "node_data": end_node
                             })
-            # General query search if no specific entities found
+            # General query search if no specific entities found —
+            # arena-gated so the fallback can't walk other tenants'
+            # nodes when the heuristic entity extractor returned nothing.
             if not results:
                 general_words = [w for w in query.split() if len(w) > 3 and w.lower() not in ['what', 'who', 'where', 'when', 'how']]
                 for word in general_words[:2]:
                     cypher = """
                     MATCH (n)
-                    WHERE ANY(prop IN keys(n) WHERE n[prop] IS :: STRING AND n[prop] CONTAINS $term)
+                    WHERE n.arena IN $arenas
+                      AND ANY(prop IN keys(n) WHERE n[prop] IS :: STRING AND n[prop] CONTAINS $term)
                     OPTIONAL MATCH (n)-[r]-(connected)
+                    WHERE connected.arena IN $arenas
                     RETURN n, r, connected
                     LIMIT $limit
                     """
-                    records = session.run(cypher, term=word, limit=4)
+                    records = session.run(cypher, term=word, arenas=arenas, limit=4)
                     for record in records:
                         node = _serialize_neo4j_value(dict(record["n"]))
@@ -373,7 +430,7 @@ def search_neo4j_sequential(query: str, entities: List[str], limit: int = 12) ->
                         })
             # Hebbian: strengthen edges between all accessed entities
-            _hebbian_strengthen(session, list(graph_entities))
+            _hebbian_strengthen(session, arenas, list(graph_entities))
         driver.close()
@@ -389,12 +446,11 @@ def search_neo4j_sequential(query: str, entities: List[str], limit: int = 12) ->
 def get_embedding(text: str) -> List[float]:
     """Get embedding — tries NV-Embed-v2 (4096-dim) first, falls back to Ollama."""
-    # Try NV-Embed-v2 service first
+    # Try NV-Embed-v2 service first via the shared EmbedClient (handles
+    # provider selection, auth scheme, path, and 401 auto-detect).
     if NV_EMBED_ENABLED:
         try:
-            r = requests.post(NV_EMBED_URL, json={"input": text}, timeout=30)
-            r.raise_for_status()
-            return r.json()["data"][0]["embedding"]
+            return _embed_client().embed_one(text)
         except Exception as e:
             log.warning(f"NV-Embed-v2 failed, falling back to Ollama: {e}")
@@ -953,9 +1009,12 @@ def sequential_hybridrag_search(query: str, limit: int = 16,
     log.info(f"L1 System files: {len(system_results)} results")
     # L2: HybridRAG orchestration
-    # L3: Graph search (entity extraction + Neo4j)
+    # L3: Graph search (entity extraction + Neo4j) — arena-scoped so a
+    # tenant's search can never traverse another tenant's entity graph
+    # via name collisions on shared :Entity nodes. The post-filter shim
+    # protects chunks; this protects the entity-walking layer too.
     entities = extract_query_entities(query)
-    graph_context = search_neo4j_sequential(query, entities, limit=8)
+    graph_context = search_neo4j_sequential(query, entities, arena_list, limit=8)
     log.info(f"L3 Graph search: {len(graph_context['results'])} results, {graph_context['entity_count']} entities")
     # HyDE: expand query for better vector embeddings
@@ -1037,9 +1096,12 @@ async def search_endpoint(request: Request) -> dict:
         results = sequential_hybridrag_search(query, limit=limit, arena=arena, arenas=arenas)
-        # Also return raw graph entities for context enrichment
+        # Also return raw graph entities for context enrichment.
+        # Same arena scope as the cascade search above — without it
+        # the entities returned could include cross-tenant rows.
+        arena_list = list(arenas) if arenas else ([arena] if arena else [])
         entities = extract_query_entities(query)
-        graph_context = search_neo4j_sequential(query, entities, limit=8)
+        graph_context = search_neo4j_sequential(query, entities, arena_list, limit=8)
         return {
             "results": results,
@@ -1073,17 +1135,23 @@ async def list_models() -> dict:
 @app.post("/v1/embeddings")
 async def create_embeddings(request: EmbeddingRequest) -> dict:
     """Pass-through to NV-Embed-v2 (4096-dim). Batch-native — forwards the full
-    input list in a single HTTP call instead of looping one-at-a-time."""
+    input list in a single HTTP call instead of looping one-at-a-time.
+    Returns OpenAI-shaped response regardless of upstream provider, so
+    callers (including L4 search and external clients) get a consistent
+    contract from this proxy."""
     try:
-        import httpx
         inputs = [request.input] if isinstance(request.input, str) else request.input
-        async with httpx.AsyncClient(timeout=60) as client:
-            resp = await client.post(
-                NV_EMBED_URL,
-                json={"input": inputs, "model": request.model or "nv-embed-v2"}
-            )
-            resp.raise_for_status()
-            return resp.json()
+        embeddings = await _embed_client().embed_batch_async(inputs)
+        return {
+            "object": "list",
+            "model": request.model or "nv-embed-v2",
+            "data": [
+                {"object": "embedding", "embedding": e, "index": i}
+                for i, e in enumerate(embeddings)
+            ],
+            "usage": {"prompt_tokens": 0, "total_tokens": 0},
+        }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
@@ -1098,9 +1166,15 @@ async def chat_completions(request: ChatCompletionRequest) -> dict:
         query = user_messages[-1].content
-        # Perform sequential HybridRAG search
+        # Perform sequential HybridRAG search — pass through tenant
+        # scope from the request so L3 graph traversal stays inside the
+        # caller's arena. The search function short-circuits L3 to
+        # empty when no arenas are supplied; callers that need L3 must
+        # pass `arena` or `arenas` on the request body.
         start_time = time.time()
-        results = sequential_hybridrag_search(query, limit=16)
+        results = sequential_hybridrag_search(
+            query, limit=16, arena=request.arena, arenas=request.arenas,
+        )
         search_time = time.time() - start_time
         # Format results with correct layer structure
@@ -1156,38 +1230,57 @@ async def chat_completions(request: ChatCompletionRequest) -> dict:
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/contradictions/{node_name}")
-async def check_contradictions(node_name: str) -> dict:
-    """Detect contradictions around a named node."""
+async def check_contradictions(node_name: str, arena: Optional[str] = None) -> dict:
+    """Detect contradictions around a named node.
+    `arena` is required to scope the lookup to one tenant's graph. The
+    endpoint returns a 400 when called without it — silently spanning
+    the entire graph here would leak entity names across tenants via
+    the `node_name` lookup.
+    """
+    if not arena:
+        raise HTTPException(
+            status_code=400,
+            detail="arena query parameter is required to scope contradiction lookup",
+        )
     try:
         driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
         contradictions = []
         with driver.session() as session:
-            # Find the node
+            # Find the node — must be in the caller's arena.
             node = session.run(
-                "MATCH (n) WHERE toLower(n.name) = toLower($name) RETURN elementId(n) AS id", name=node_name
+                """MATCH (n) WHERE toLower(n.name) = toLower($name) AND n.arena = $arena
+                   RETURN elementId(n) AS id""",
+                name=node_name, arena=arena,
             ).single()
             if not node:
                 return {"node": node_name, "contradictions": [], "error": "Node not found"}
             nid = node["id"]
-            # Explicit CONTRADICTS
+            # Explicit CONTRADICTS — both endpoints must be in the same arena.
             for rec in session.run(
-                """MATCH (a)-[r:CONTRADICTS]-(b) WHERE elementId(a) = $nid
-                   RETURN a.name AS a, b.name AS b, r.reason AS reason""", nid=nid
+                """MATCH (a)-[r:CONTRADICTS]-(b)
+                   WHERE elementId(a) = $nid AND b.arena = $arena
+                   RETURN a.name AS a, b.name AS b, r.reason AS reason""",
+                nid=nid, arena=arena,
             ):
                 contradictions.append({"type": "explicit", "a": rec["a"], "b": rec["b"], "reason": rec["reason"]})
-            # Property conflicts via shared neighbour
+            # Property conflicts via shared neighbour — every node along
+            # the (a)--(shared)--(b) path filtered by arena so a shared
+            # neighbour from another tenant can't trigger a false-positive
+            # conflict in this tenant's view.
             for rec in session.run(
                 """MATCH (a)--(shared)--(b)
                    WHERE elementId(a) = $nid AND a <> b
+                     AND shared.arena = $arena AND b.arena = $arena
                    WITH a, b, shared, properties(a) AS pa, properties(b) AS pb
                    WITH a, b, shared,
                         [k IN keys(pa) WHERE k IN keys(pb) AND pa[k] <> pb[k]
                          AND NOT k IN ['last_accessed','embedding','created_at','updated_at','id','weight']] AS ck
                    WHERE size(ck) > 0
                    RETURN a.name AS a, b.name AS b, shared.name AS via, ck
-                   LIMIT 10""", nid=nid
+                   LIMIT 10""", nid=nid, arena=arena,
             ):
                 contradictions.append({
                     "type": "property_conflict", "a": rec["a"], "b": rec["b"],
@@ -1319,17 +1412,11 @@ def _extract_entities_for_kg(text: str, max_entities: int = 32) -> List[str]:
 def _embed_batch_local(texts: List[str]) -> List[List[float]]:
-    """Batch embed via NV-Embed. Returns vectors in input order."""
+    """Batch embed via the shared EmbedClient. Returns vectors in input order."""
     if not texts:
         return []
     try:
-        r = requests.post(NV_EMBED_URL,
-                          json={"input": texts, "model": "nv-embed-v2"},
-                          timeout=120)
-        r.raise_for_status()
-        data = r.json().get("data", [])
-        # NV-Embed returns [{embedding: [...]}, ...]
-        return [d["embedding"] for d in data]
+        return _embed_client().embed_batch(texts)
     except Exception as e:
         log.warning(f"NV-Embed batch failed: {e}; trying singletons")
         return [get_embedding(t) for t in texts]
@@ -1451,22 +1538,70 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
         log.error(f"L4 QMD write failed: {e}")
     # ---- L3 Neo4j KG ----------------------------------------------------
+    # Every node and edge written here is arena-scoped. Two paths:
+    #
+    #   1. Heuristic Concept extraction — title-case + bigrams over the
+    #      chunk body, same as before. Concepts MERGE on (arena, name)
+    #      so two tenants can independently mint a "Pricing" concept
+    #      without colliding.
+    #
+    #   2. Metadata-driven Person extraction — when the chunk's metadata
+    #      carries contact_email / contact_name (Pip emits these from
+    #      its ingest pipeline; other clients can do the same), we MERGE
+    #      a typed (:Entity:Person) node and connect it to the chunk via
+    #      a (:COMMUNICATED) edge that carries channel + direction. This
+    #      is the path the relationships UI reads from — it's reliable
+    #      because the writer knows exactly who the person is, no NLP
+    #      guessing required.
+    #
+    # The compound (arena, name) MERGE guarantees no cross-tenant entity
+    # collapse. Pre-existing unscoped entities (arena IS NULL) are left
+    # alone; the wipe-legacy migration script handles them out of band.
     l3_entities = 0
     l3_chunks = 0
     try:
         driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
         with driver.session() as session:
-            # Index for fast lookup (idempotent)
+            # Indexes — idempotent. The compound (arena, name) is the
+            # right shape now that entities are arena-scoped; the legacy
+            # entity_name index stays for the wipe-migration to work
+            # against pre-arena rows, then can be dropped in a follow-up.
             try:
-                session.run("CREATE INDEX entity_name IF NOT EXISTS FOR (n:Entity) ON (n.name)")
+                session.run("CREATE INDEX entity_arena_name IF NOT EXISTS FOR (n:Entity) ON (n.arena, n.name)")
+                session.run("CREATE INDEX person_arena_email IF NOT EXISTS FOR (n:Person) ON (n.arena, n.email)")
+                session.run("CREATE INDEX chunk_arena IF NOT EXISTS FOR (c:Chunk) ON (c.arena)")
                 session.run("CREATE INDEX chunk_id IF NOT EXISTS FOR (c:Chunk) ON (c.id)")
+                # ChannelStat is the denormalised aggregate read by
+                # /aggregate on the fast path. Compound index covers
+                # the (arena, person_email) lookup that the reader
+                # uses; the per-channel rows are returned in one
+                # range scan.
+                session.run("CREATE INDEX channelstat_arena_email IF NOT EXISTS FOR (s:ChannelStat) ON (s.arena, s.person_email)")
+                # UNIQUE constraint on the writer's MERGE key. Without
+                # this, two concurrent index-internal-batch transactions
+                # can both decide a ChannelStat doesn't exist and create
+                # rival nodes — the index doesn't lock, the constraint
+                # does. The constraint also implies an index on the
+                # full key so the MERGE locks efficiently.
+                session.run("CREATE CONSTRAINT channelstat_unique IF NOT EXISTS FOR (s:ChannelStat) REQUIRE (s.arena, s.person_email, s.channel) IS UNIQUE")
             except Exception:
                 pass
             for n in norm:
-                entities = _extract_entities_for_kg(n["content"])
-                if not entities:
+                heuristic_entities = _extract_entities_for_kg(n["content"])
+                meta = n.get("metadata") or {}
+                contact_email = meta.get("contact_email")
+                contact_name = meta.get("contact_name")
+                channel = meta.get("channel")
+                direction = meta.get("direction")
+                occurred_at = meta.get("timestamp") or meta.get("occurred_at") or now_iso
+                # Skip the chunk only when there is genuinely nothing to
+                # connect — heuristic entities AND no person metadata.
+                if not heuristic_entities and not contact_email and not contact_name:
                     continue
-                # Create the chunk node
+                # Create the chunk node — arena property is the
+                # tenant-isolation anchor. Every read traverses through
+                # this node, so getting the arena right here is the
+                # single most important invariant of this whole block.
                 session.run(
                     """
                     MERGE (c:Chunk {id: $cid})
@@ -1480,38 +1615,150 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
                     arena=arena, now=now_iso,
                 )
                 l3_chunks += 1
-                # Create/MERGE entities and MENTIONS edge
-                for ent in entities:
+                # Concept entities — heuristic, arena-scoped.
+                for ent in heuristic_entities:
                     session.run(
                         """
-                        MERGE (e:Entity {name: $name})
+                        MERGE (e:Entity:Concept {arena: $arena, name: $name})
                         ON CREATE SET e.type = 'Concept',
                                       e.created_at = $now,
                                       e.weight = 1.0
                         WITH e
-                        MATCH (c:Chunk {id: $cid})
+                        MATCH (c:Chunk {arena: $arena, id: $cid})
                         MERGE (e)-[r:MENTIONS]->(c)
                         ON CREATE SET r.weight = 1.0, r.created_at = $now
                         ON MATCH SET r.weight = coalesce(r.weight, 1.0) + 0.1
                         """,
-                        name=ent, cid=n["id"], now=now_iso,
+                        arena=arena, name=ent, cid=n["id"], now=now_iso,
                     )
                     l3_entities += 1
-                # Create entity-entity co-occurrence edges (within this chunk)
-                # so spreading activation has structure to walk.
-                if len(entities) >= 2:
-                    for i in range(len(entities)):
-                        for j in range(i + 1, len(entities)):
+                # Concept-concept co-occurrence — same arena on both
+                # ends so cross-tenant CO_OCCURS edges can't form even
+                # if two tenants happen to extract the same concept name.
+                if len(heuristic_entities) >= 2:
+                    for i in range(len(heuristic_entities)):
+                        for j in range(i + 1, len(heuristic_entities)):
                             session.run(
                                 """
-                                MATCH (a:Entity {name: $a})
-                                MATCH (b:Entity {name: $b})
+                                MATCH (a:Entity:Concept {arena: $arena, name: $a})
+                                MATCH (b:Entity:Concept {arena: $arena, name: $b})
                                 MERGE (a)-[r:CO_OCCURS]->(b)
                                 ON CREATE SET r.weight = 0.5, r.created_at = $now
                                 ON MATCH SET r.weight = coalesce(r.weight, 0.5) + 0.05
                                 """,
-                                a=entities[i], b=entities[j], now=now_iso,
+                                arena=arena, a=heuristic_entities[i],
+                                b=heuristic_entities[j], now=now_iso,
                             )
+                # Person entities — typed via writer-supplied metadata.
+                # Email gets its own node (canonical id for a person);
+                # name gets its own node (display surface). When both
+                # are present they're linked via KNOWN_AS so a query
+                # against either resolves the same person.
+                person_email_node = None
+                if isinstance(contact_email, str) and contact_email.strip():
+                    norm_email = contact_email.strip().lower()
+                    # Two-phase write: MERGE the Person + COMMUNICATED
+                    # edge, then update the ChannelStat aggregate IFF
+                    # the edge was just created. The `r._counted` flag
+                    # is the idempotency rail — set false on CREATE and
+                    # flipped to true after the stat update, so replays
+                    # of the same eventId never double-count even when
+                    # the chunk already exists.
+                    session.run(
+                        """
+                        MERGE (p:Entity:Person {arena: $arena, email: $email})
+                        ON CREATE SET p.created_at = $now,
+                                      p.first_seen = $occurred_at,
+                                      p.last_seen = $occurred_at
+                        ON MATCH SET p.last_seen = CASE
+                              WHEN $occurred_at > coalesce(p.last_seen, '')
+                                THEN $occurred_at
+                              ELSE p.last_seen END
+                        WITH p
+                        MATCH (c:Chunk {arena: $arena, id: $cid})
+                        MERGE (p)-[r:COMMUNICATED]->(c)
+                        ON CREATE SET r.channel = $channel,
+                                      r.direction = $direction,
+                                      r.occurred_at = $occurred_at,
+                                      r.weight = 1.0,
+                                      r._counted = false
+                        WITH p, r
+                        // ChannelStat denormalises Person-COMMUNICATED
+                        // edge counts so /aggregate becomes a property
+                        // read instead of a per-query Cypher walk over
+                        // every edge. Read path falls back to the edge
+                        // walk for older tenants whose stats haven't
+                        // been backfilled, so this is a forward-only
+                        // optimisation — no migration needed for stats
+                        // to start materialising.
+                        FOREACH (_ IN CASE WHEN r._counted = false THEN [1] ELSE [] END |
+                          MERGE (s:ChannelStat {arena: $arena, person_email: $email, channel: $channel})
+                          ON CREATE SET s.count = 0,
+                                        s.inbound = 0,
+                                        s.outbound = 0,
+                                        s.first_seen = $occurred_at,
+                                        s.last_seen = $occurred_at,
+                                        s.created_at = $now
+                          SET s.count = s.count + 1,
+                              s.inbound = s.inbound + (CASE WHEN $direction = 'inbound' THEN 1 ELSE 0 END),
+                              s.outbound = s.outbound + (CASE WHEN $direction = 'outbound' THEN 1 ELSE 0 END),
+                              s.first_seen = CASE
+                                WHEN $occurred_at < coalesce(s.first_seen, $occurred_at)
+                                  THEN $occurred_at
+                                ELSE s.first_seen END,
+                              s.last_seen = CASE
+                                WHEN $occurred_at > coalesce(s.last_seen, '')
+                                  THEN $occurred_at
+                                ELSE s.last_seen END,
+                              s.updated_at = $now
+                          MERGE (p)-[:HAS_STAT]->(s)
+                          SET r._counted = true
+                        )
+                        """,
+                        arena=arena, email=norm_email, cid=n["id"],
+                        channel=channel, direction=direction,
+                        occurred_at=occurred_at, now=now_iso,
+                    )
+                    person_email_node = norm_email
+                    l3_entities += 1
+                if isinstance(contact_name, str) and contact_name.strip():
+                    cname = contact_name.strip()
+                    session.run(
+                        """
+                        MERGE (p:Entity:Person {arena: $arena, name: $name})
+                        ON CREATE SET p.created_at = $now,
+                                      p.first_seen = $occurred_at,
+                                      p.last_seen = $occurred_at
+                        ON MATCH SET p.last_seen = CASE
+                              WHEN $occurred_at > coalesce(p.last_seen, '')
+                                THEN $occurred_at
+                              ELSE p.last_seen END
+                        WITH p
+                        MATCH (c:Chunk {arena: $arena, id: $cid})
+                        MERGE (p)-[r:COMMUNICATED]->(c)
+                        ON CREATE SET r.channel = $channel,
+                                      r.direction = $direction,
+                                      r.occurred_at = $occurred_at,
+                                      r.weight = 1.0
+                        """,
+                        arena=arena, name=cname, cid=n["id"],
+                        channel=channel, direction=direction,
+                        occurred_at=occurred_at, now=now_iso,
+                    )
+                    l3_entities += 1
+                    # Link name→email node so the relationships query
+                    # can resolve either alias to the same person.
+                    if person_email_node:
+                        session.run(
+                            """
+                            MATCH (n:Person {arena: $arena, name: $name})
+                            MATCH (e:Person {arena: $arena, email: $email})
+                            MERGE (n)-[:KNOWN_AS]->(e)
+                            """,
+                            arena=arena, name=cname, email=person_email_node,
+                        )
         driver.close()
     except Exception as e:
         log.error(f"L3 KG write failed: {e}")
@@ -1530,16 +1777,43 @@ async def index_internal_batch(req: IndexInternalBatchRequest) -> dict:
 @app.post("/forget-internal")
 async def forget_internal(request: Request) -> dict:
-    """Wipe L0 + L4-qmd + L3. Used by bench harness to reset between runs."""
+    """Wipe L0 + L4-qmd + L3.
+    Two modes:
+      - Tenant-scoped (default, safe): pass `{"arena": "<tenant>"}` and
+        only that tenant's rows are deleted. Used by tenant offboarding
+        and by tests.
+      - Global (unsafe): the bench harness needs to wipe everything
+        between runs. Require an explicit `{"confirm": "GLOBAL_WIPE"}`
+        flag — without it we refuse rather than nuke shared infra.
+    Pre-fix this endpoint silently ignored the arena param and always
+    deleted globally. That meant a tenant offboarding script — or any
+    caller that read the param-name and trusted it — would erase every
+    other tenant's L3 graph and wipe the shared sqlite stores. Hence
+    the explicit confirm gate now.
+    """
     try:
         body = await request.json()
     except Exception:
         body = {}
-    arena = body.get("arena")  # optional scoping
+    arena = body.get("arena")
+    confirm = body.get("confirm")
+    if not arena and confirm != "GLOBAL_WIPE":
+        raise HTTPException(
+            status_code=400,
+            detail="forget-internal requires either 'arena' (tenant-scoped) "
+                   "or 'confirm: GLOBAL_WIPE' (unsafe, deletes everything).",
+        )
     deleted = {"l0": 0, "l4_qmd": 0, "l3_entities": 0, "l3_chunks": 0}
+    # ---- L0 BM25 (sqlite) ----------------------------------------------
+    # The L0 chunks table doesn't carry an arena column today, so we
+    # only support GLOBAL_WIPE here. Tenant-scoped L0 deletes are a
+    # follow-up (needs schema migration to add `arena` to L0 rows).
     try:
         l0_db = Path(os.environ.get("PME_MEMORY_DB", str(L0_MEMORY_DB)))
-        if l0_db.exists():
+        if l0_db.exists() and confirm == "GLOBAL_WIPE":
             conn = sqlite3.connect(str(l0_db), timeout=5)
             cur = conn.execute("DELETE FROM chunks")
             deleted["l0"] = cur.rowcount
@@ -1550,25 +1824,310 @@ async def forget_internal(request: Request) -> dict:
             conn.commit(); conn.close()
     except Exception as e:
         log.error(f"L0 forget failed: {e}")
+    # ---- L4 sqlite-vec --------------------------------------------------
+    # Same situation as L0 — no per-arena column on chunks. Global only
+    # for now; tenant-scoped delete is a follow-up.
     try:
-        if Path(QMD_DB_PATH).exists():
+        if Path(QMD_DB_PATH).exists() and confirm == "GLOBAL_WIPE":
             conn = sqlite3.connect(QMD_DB_PATH, timeout=5)
             cur = conn.execute("DELETE FROM chunks")
             deleted["l4_qmd"] = cur.rowcount
             conn.commit(); conn.close()
     except Exception as e:
         log.error(f"L4 QMD forget failed: {e}")
+    # ---- L3 Neo4j -------------------------------------------------------
+    # Neo4j chunks AND entities both carry arena now, so tenant-scoped
+    # delete works correctly here even if L0/L4 still need a migration.
     try:
         driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
         with driver.session() as session:
-            r1 = session.run("MATCH (c:Chunk) DETACH DELETE c RETURN count(c) AS n")
-            deleted["l3_chunks"] = r1.single()["n"]
-            r2 = session.run("MATCH (e:Entity) DETACH DELETE e RETURN count(e) AS n")
-            deleted["l3_entities"] = r2.single()["n"]
+            if arena:
+                r1 = session.run(
+                    "MATCH (c:Chunk {arena: $arena}) DETACH DELETE c RETURN count(c) AS n",
+                    arena=arena,
+                )
+                deleted["l3_chunks"] = r1.single()["n"]
+                r2 = session.run(
+                    "MATCH (e:Entity {arena: $arena}) DETACH DELETE e RETURN count(e) AS n",
+                    arena=arena,
+                )
+                deleted["l3_entities"] = r2.single()["n"]
+            else:  # confirm == "GLOBAL_WIPE", validated above
+                r1 = session.run("MATCH (c:Chunk) DETACH DELETE c RETURN count(c) AS n")
+                deleted["l3_chunks"] = r1.single()["n"]
+                r2 = session.run("MATCH (e:Entity) DETACH DELETE e RETURN count(e) AS n")
+                deleted["l3_entities"] = r2.single()["n"]
         driver.close()
     except Exception as e:
         log.error(f"L3 forget failed: {e}")
-    return {"status": "ok", "deleted": deleted, "arena": arena}
+    return {"status": "ok", "deleted": deleted, "arena": arena, "global_wipe": confirm == "GLOBAL_WIPE"}
+class AggregateInternalRequest(BaseModel):
+    """Aggregate (:Person)-[:COMMUNICATED]->(:Chunk) edges by group_by keys.
+    The relationships UI pre-#28 went through a metadata-filtered
+    /search and grouped client-side, capped at the engine over-fetch
+    ceiling. With typed-Person nodes in L3 we can run a single Cypher
+    aggregate that scales to any volume — no over-fetch, no cap.
+    Required: arena (the tenant scope) plus enough metadata to identify
+    the Person node we're rolling up. Today that means contact_email
+    (the canonical Person key), but the shape leaves room for future
+    Person identifiers (e.g. slack_user_id, hubspot_contact_id) without
+    a wire change.
+    """
+    arena: str
+    contact_email: Optional[str] = None
+    contact_name: Optional[str] = None
+    # Group by these properties on the COMMUNICATED edge. Only the
+    # relationship-page-supported keys are honoured; unknown keys are
+    # silently dropped (no useful aggregate shape for them).
+    group_by: List[str] = ["channel"]
+class AggregateBucket(BaseModel):
+    keys: Dict[str, Optional[str]]
+    count: int
+    inbound: int
+    outbound: int
+    last_seen: Optional[str] = None
+    first_seen: Optional[str] = None
+class AggregateInternalResponse(BaseModel):
+    arena: str
+    total: int
+    last_seen: Optional[str] = None
+    buckets: List[AggregateBucket]
+# Whitelist of group_by keys we know how to project. Cypher
+# parameter-substitution doesn't work on property names, so we
+# template the keys into the query — this whitelist is the safety
+# rail that keeps the templating from accepting arbitrary input.
+_AGGREGATE_GROUP_BY_KEYS = {"channel", "direction"}
+@app.post("/aggregate-internal", response_model=AggregateInternalResponse)
+async def aggregate_internal(req: AggregateInternalRequest) -> AggregateInternalResponse:
+    """Aggregate Person→Chunk COMMUNICATED edges by edge properties.
+    Returns one bucket per (group_by key combination) with count,
+    inbound/outbound split, and time bounds. The Person match is
+    arena-scoped (mandatory) and additionally filtered by whatever
+    Person identifier the caller supplies.
+    No fallback to chunk scanning — if the typed-Person nodes don't
+    exist for this contact, the response is `total: 0` with no
+    buckets, and the caller falls back to whatever it had before.
+    That's intentional: the over-fetch path is in TES (#273); this
+    endpoint is the scaling answer that doesn't have one.
+    """
+    arena = (req.arena or "").strip()
+    if not arena:
+        raise HTTPException(status_code=400, detail="arena is required")
+    contact_email = (req.contact_email or "").strip().lower()
+    contact_name = (req.contact_name or "").strip()
+    if not contact_email and not contact_name:
+        raise HTTPException(
+            status_code=400,
+            detail="provide contact_email and/or contact_name to identify the Person",
+        )
+    # Filter group_by to the supported keys; preserve order so a caller
+    # asking for ["direction", "channel"] gets buckets keyed in that
+    # order on the response.
+    seen: set[str] = set()
+    safe_group_by: List[str] = []
+    for k in req.group_by or []:
+        if k in _AGGREGATE_GROUP_BY_KEYS and k not in seen:
+            seen.add(k)
+            safe_group_by.append(k)
+    try:
+        driver = GraphDatabase.driver(NEO4J_URI, auth=NEO4J_AUTH)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"neo4j connect: {e}")
+    try:
+        with driver.session() as session:
+            # Fast path: read from the ChannelStat denormalisation
+            # whenever the caller has an email and is grouping by
+            # channel. ChannelStats are written by /index-internal-batch
+            # on every store with contact_email metadata, so any tenant
+            # with new ingest gets O(channels) reads instead of an
+            # edge walk over every COMMUNICATED relationship.
+            #
+            # Conditions for the fast path:
+            #   - contact_email set (stats are email-keyed; name-only
+            #     contacts fall through to the edge walk).
+            #   - group_by is exactly ["channel"] OR no group_by (single
+            #     bucket). Other group_by combinations (e.g. with
+            #     direction) need the edge granularity the stats
+            #     don't carry.
+            fast_path_eligible = bool(contact_email) and (
+                not safe_group_by or safe_group_by == ["channel"]
+            )
+            if fast_path_eligible:
+                stats_rows = list(session.run(
+                    "MATCH (s:ChannelStat {arena: $arena, person_email: $email})\n"
+                    "RETURN s.channel AS channel,\n"
+                    "       s.count AS count,\n"
+                    "       s.inbound AS inbound,\n"
+                    "       s.outbound AS outbound,\n"
+                    "       s.last_seen AS last_seen,\n"
+                    "       s.first_seen AS first_seen\n"
+                    "ORDER BY s.count DESC\n",
+                    arena=arena, email=contact_email,
+                ))
+                if stats_rows:
+                    # Build buckets directly. When group_by=[] we
+                    # collapse to a single overall bucket; otherwise
+                    # one bucket per channel.
+                    if safe_group_by == ["channel"]:
+                        buckets = [
+                            AggregateBucket(
+                                keys={"channel": rec["channel"]},
+                                count=int(rec["count"] or 0),
+                                inbound=int(rec["inbound"] or 0),
+                                outbound=int(rec["outbound"] or 0),
+                                last_seen=str(rec["last_seen"]) if rec["last_seen"] else None,
+                                first_seen=str(rec["first_seen"]) if rec["first_seen"] else None,
+                            )
+                            for rec in stats_rows
+                        ]
+                        total = sum(b.count for b in buckets)
+                        latest = None
+                        for b in buckets:
+                            if b.last_seen and (latest is None or b.last_seen > latest):
+                                latest = b.last_seen
+                    else:
+                        # Single global bucket — sum across channels.
+                        total = sum(int(rec["count"] or 0) for rec in stats_rows)
+                        inbound = sum(int(rec["inbound"] or 0) for rec in stats_rows)
+                        outbound = sum(int(rec["outbound"] or 0) for rec in stats_rows)
+                        last_seens = [rec["last_seen"] for rec in stats_rows if rec["last_seen"]]
+                        first_seens = [rec["first_seen"] for rec in stats_rows if rec["first_seen"]]
+                        latest = max((str(x) for x in last_seens), default=None)
+                        earliest = min((str(x) for x in first_seens), default=None)
+                        buckets = [AggregateBucket(
+                            keys={},
+                            count=total,
+                            inbound=inbound,
+                            outbound=outbound,
+                            last_seen=latest,
+                            first_seen=earliest,
+                        )]
+                    return AggregateInternalResponse(
+                        arena=arena,
+                        total=total,
+                        last_seen=latest,
+                        buckets=buckets,
+                    )
+                # else: stats absent (older tenant pre-rollup, or this
+                # contact has no email-keyed Person yet) → fall through
+                # to the edge-walk path.
+            # Edge-walk path (original Cypher). Used when:
+            #   - caller has only contact_name (no email-keyed stats)
+            #   - caller asked for a group_by we don't denormalise (e.g.
+            #     direction)
+            #   - tenant predates the rollup writer (no stats nodes yet)
+            # Both paths return the same response shape, so callers
+            # don't need to know which served them.
+            #
+            # Build the Person match. We want either email-keyed or
+            # name-keyed Person nodes; when both are supplied we OR
+            # them so a caller can hit either alias. Both branches
+            # arena-scope the Person.
+            person_clauses: List[str] = []
+            params: Dict[str, Any] = {"arena": arena}
+            if contact_email:
+                person_clauses.append("(p.email = $contact_email)")
+                params["contact_email"] = contact_email
+            if contact_name:
+                person_clauses.append("(p.name = $contact_name)")
+                params["contact_name"] = contact_name
+            person_filter = " OR ".join(person_clauses)
+            # group_by keys go into the WITH clause. Cypher doesn't
+            # support property-name parameters, so we template them
+            # in — the whitelist above is the safety rail against
+            # injection. Built up separately rather than via f-string
+            # so the static MATCH clause stays a plain string and the
+            # arena-safety lint can parse it cleanly.
+            with_keys = ", ".join(f"r.{k} AS {k}" for k in safe_group_by)
+            return_keys = ", ".join(safe_group_by)
+            # Static base — arena scope on both Person and Chunk so the
+            # lint catches any future copy-paste that forgets it.
+            base = (
+                "MATCH (p:Person {arena: $arena})-[r:COMMUNICATED]->(c:Chunk {arena: $arena})\n"
+                "WHERE " + person_filter + "\n"
+            )
+            agg_select = (
+                "count(*) AS count,\n"
+                "sum(CASE WHEN _direction = 'inbound' THEN 1 ELSE 0 END) AS inbound,\n"
+                "sum(CASE WHEN _direction = 'outbound' THEN 1 ELSE 0 END) AS outbound,\n"
+                "max(_occurred_at) AS last_seen,\n"
+                "min(_occurred_at) AS first_seen\n"
+            )
+            if safe_group_by:
+                cypher = (
+                    base
+                    + f"WITH {with_keys}, r.direction AS _direction, r.occurred_at AS _occurred_at\n"
+                    + f"RETURN {return_keys},\n"
+                    + agg_select
+                    + "ORDER BY count DESC\n"
+                )
+            else:
+                # No group_by → one global bucket (just the overall
+                # totals for this Person). Useful for "total comms
+                # with X" without per-channel breakdown.
+                cypher = (
+                    base
+                    + "WITH r.direction AS _direction, r.occurred_at AS _occurred_at\n"
+                    + "RETURN " + agg_select
+                )
+            buckets: List[AggregateBucket] = []
+            total = 0
+            latest: Optional[str] = None
+            for rec in session.run(cypher, **params):
+                count = int(rec["count"] or 0)
+                total += count
+                last_seen = rec["last_seen"]
+                if last_seen and (latest is None or str(last_seen) > latest):
+                    latest = str(last_seen)
+                bucket_keys: Dict[str, Optional[str]] = (
+                    {k: rec[k] for k in safe_group_by} if safe_group_by else {}
+                )
+                buckets.append(AggregateBucket(
+                    keys=bucket_keys,
+                    count=count,
+                    inbound=int(rec["inbound"] or 0),
+                    outbound=int(rec["outbound"] or 0),
+                    last_seen=str(last_seen) if last_seen else None,
+                    first_seen=str(rec["first_seen"]) if rec["first_seen"] else None,
+                ))
+            return AggregateInternalResponse(
+                arena=arena,
+                total=total,
+                last_seen=latest,
+                buckets=buckets,
+            )
+    except HTTPException:
+        raise
+    except Exception as e:
+        log.error(f"aggregate-internal failed: {e}")
+        raise HTTPException(status_code=500, detail=f"aggregate failed: {e}")
+    finally:
+        driver.close()
 @app.get("/index-internal-stats")