npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.10.12 → 0.10.14 - Mend

@pentatonic-ai/ai-agent-sdk 0.10.12 → 0.10.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/index.cjs CHANGED Viewed

@@ -878,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
 }
 // src/telemetry.js
-var VERSION = "0.10.12";
+var VERSION = "0.10.14";
 var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
 function machineId() {
   const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";

package/dist/index.js CHANGED Viewed

@@ -847,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
 }
 // src/telemetry.js
-var VERSION = "0.10.12";
+var VERSION = "0.10.14";
 var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
 function machineId() {
   const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pentatonic-ai/ai-agent-sdk",
-  "version": "0.10.12",
+  "version": "0.10.14",
   "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
   "type": "module",
   "main": "./dist/index.cjs",

package/packages/memory-engine-v2/fusion_drive/fusion_queue.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""fusion_queue helpers — enqueue (deduped), claim (leased), mark.
+Mirrors the distiller's distillation_queue claim semantics (FOR UPDATE SKIP
+LOCKED + claim_expires_at lease for crash recovery + attempts cap). Producer
+(sweep) calls enqueue_candidate; the consumer pool calls claim_batch then
+mark_done / mark_failed / release.
+dedup_key is the stable identity of a candidate (independent of row content),
+so re-sweeps don't pile duplicate jobs and a candidate already decided ('done')
+isn't re-adjudicated every pass.
+"""
+from __future__ import annotations
+import hashlib
+import json
+from typing import Any
+CLAIM_TTL_SEC_DEFAULT = 900
+MAX_ATTEMPTS_DEFAULT = 3
+def dedup_key(arena: str, kind: str, member_ids: list[str]) -> str:
+    """Stable hash over (arena, kind, sorted member ids). Order-independent so
+    the same candidate hashes identically no matter how it's assembled."""
+    members = "|".join(sorted(member_ids))
+    raw = f"{arena}\x1f{kind}\x1f{members}"
+    return "fq_" + hashlib.sha256(raw.encode()).hexdigest()[:32]
+def _members(kind: str, payload: dict) -> list[str]:
+    if kind == "entity_cooccurrence":
+        return [payload["junk_id"], *payload.get("candidate_ids", [])]
+    if kind == "semantic_fact":
+        return list(payload.get("fact_ids", []))
+    raise ValueError(f"unknown kind: {kind}")
+def enqueue_candidate(cur, arena: str, kind: str, payload: dict) -> bool:
+    """Insert one candidate job iff no live-or-decided job for it exists.
+    Returns True if a row was inserted, False if it was already queued/decided.
+    The partial unique index uq_fusion_queue_dedup_active is the race backstop."""
+    key = dedup_key(arena, kind, _members(kind, payload))
+    cur.execute(
+        """INSERT INTO fusion_queue (arena, kind, payload, dedup_key)
+           SELECT %s, %s, %s::jsonb, %s
+           WHERE NOT EXISTS (
+             SELECT 1 FROM fusion_queue
+             WHERE dedup_key = %s AND status IN ('pending', 'claimed', 'done')
+           )""",
+        (arena, kind, json.dumps(payload), key, key),
+    )
+    return cur.rowcount > 0
+def claim_batch(cur, worker_id: str, limit: int,
+                claim_ttl_sec: int = CLAIM_TTL_SEC_DEFAULT,
+                max_attempts: int = MAX_ATTEMPTS_DEFAULT) -> list[dict]:
+    """Atomically claim up to `limit` jobs (pending or lease-expired), oldest
+    first. FOR UPDATE SKIP LOCKED → concurrent consumers never collide."""
+    cur.execute(
+        """UPDATE fusion_queue SET
+             status = 'claimed',
+             claimed_by = %s,
+             claimed_at = NOW(),
+             claim_expires_at = NOW() + (%s || ' seconds')::interval,
+             attempts = attempts + 1
+           WHERE id IN (
+             SELECT id FROM fusion_queue
+             WHERE (status = 'pending'
+                    OR (status = 'claimed' AND claim_expires_at < NOW()))
+               AND attempts < %s
+             ORDER BY id
+             FOR UPDATE SKIP LOCKED
+             LIMIT %s
+           )
+           RETURNING id, arena, kind, payload, attempts""",
+        (worker_id, claim_ttl_sec, max_attempts, limit),
+    )
+    rows = cur.fetchall()
+    out = []
+    for r in rows:
+        # tolerate dict_row or tuple cursors
+        if isinstance(r, dict):
+            out.append(r)
+        else:
+            out.append({"id": r[0], "arena": r[1], "kind": r[2],
+                        "payload": r[3], "attempts": r[4]})
+    return out
+def mark_done(cur, queue_id: int, result: dict[str, Any] | None = None) -> None:
+    cur.execute(
+        """UPDATE fusion_queue
+           SET status='done', completed_at=NOW(), last_error=NULL, result=%s::jsonb
+           WHERE id=%s""",
+        (json.dumps(result or {}), queue_id),
+    )
+def mark_failed(cur, queue_id: int, error: str) -> None:
+    cur.execute(
+        "UPDATE fusion_queue SET status='failed', completed_at=NOW(), last_error=%s WHERE id=%s",
+        (error[:2000], queue_id),
+    )
+def release(cur, queue_id: int, error: str) -> None:
+    """Return a job to 'pending' (retryable error). attempts is already bumped
+    by the claim, so the attempts cap still bounds retries."""
+    cur.execute(
+        """UPDATE fusion_queue SET
+             status='pending', claimed_by=NULL, claimed_at=NULL,
+             claim_expires_at=NULL, last_error=%s
+           WHERE id=%s""",
+        (error[:2000], queue_id),
+    )
+def pending_depth(cur) -> int:
+    cur.execute("SELECT count(*) AS n FROM fusion_queue WHERE status='pending'")
+    r = cur.fetchone()
+    return (r["n"] if isinstance(r, dict) else r[0]) or 0

package/packages/memory-engine-v2/fusion_drive/test_fusion_queue.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""fusion_queue helper tests — dedup-key identity + enqueue/claim contracts.
+The SQL itself (FOR UPDATE SKIP LOCKED, lease expiry, partial-unique dedup) is
+exercised against a real DB on the box; here we lock down the pure identity
+logic and the thin-wrapper contracts with a fake cursor."""
+from __future__ import annotations
+import os
+import sys
+sys.path.insert(0, os.path.dirname(__file__))
+import fusion_queue as fq  # noqa: E402
+# ── dedup_key ────────────────────────────────────────────────────────
+def test_dedup_key_is_order_independent():
+    a = fq.dedup_key("arena", "entity_cooccurrence", ["x", "y", "z"])
+    b = fq.dedup_key("arena", "entity_cooccurrence", ["z", "x", "y"])
+    assert a == b, "member order must not change the key"
+def test_dedup_key_distinguishes_members_kind_arena():
+    base = fq.dedup_key("arena", "entity_cooccurrence", ["x", "y"])
+    assert base != fq.dedup_key("arena", "entity_cooccurrence", ["x", "z"])  # members
+    assert base != fq.dedup_key("arena", "semantic_fact", ["x", "y"])        # kind
+    assert base != fq.dedup_key("other", "entity_cooccurrence", ["x", "y"])  # arena
+def test_members_extraction_per_kind():
+    assert fq._members("entity_cooccurrence", {"junk_id": "j", "candidate_ids": ["a", "b"]}) == ["j", "a", "b"]
+    assert fq._members("semantic_fact", {"fact_ids": ["f1", "f2"]}) == ["f1", "f2"]
+# ── enqueue / claim contracts (fake cursor) ──────────────────────────
+class FakeCursor:
+    """Simulates the WHERE-NOT-EXISTS dedup server-side: tracks active keys."""
+    def __init__(self):
+        self.active: set[str] = set()
+        self.rowcount = 0
+        self._fetch: list = []
+    def execute(self, sql, params=None):
+        s = " ".join(sql.split())
+        if s.startswith("INSERT INTO fusion_queue"):
+            # params = (arena, kind, payload_json, key, key)
+            key = params[3]
+            if key in self.active:
+                self.rowcount = 0
+            else:
+                self.active.add(key)
+                self.rowcount = 1
+        elif s.startswith("UPDATE fusion_queue SET status = 'claimed'"):
+            self._fetch = []  # nothing to claim in this fake by default
+    def fetchall(self):
+        return self._fetch
+    def fetchone(self):
+        return self._fetch[0] if self._fetch else (0,)
+def test_enqueue_is_idempotent_per_candidate():
+    cur = FakeCursor()
+    p = {"junk_id": "j", "candidate_ids": ["a"]}
+    assert fq.enqueue_candidate(cur, "arena", "entity_cooccurrence", p) is True   # first
+    assert fq.enqueue_candidate(cur, "arena", "entity_cooccurrence", p) is False  # dup
+    # different candidate set → new job
+    assert fq.enqueue_candidate(cur, "arena", "entity_cooccurrence",
+                                {"junk_id": "j", "candidate_ids": ["b"]}) is True
+def test_claim_batch_normalizes_tuple_rows():
+    cur = FakeCursor()
+    cur._fetch = [(7, "arena", "semantic_fact", {"fact_ids": ["f"]}, 1)]
+    # monkeypatch execute to not clear _fetch for the claim path
+    cur.execute = lambda *a, **k: None
+    out = fq.claim_batch(cur, "w", 10)
+    assert out == [{"id": 7, "arena": "arena", "kind": "semantic_fact",
+                    "payload": {"fact_ids": ["f"]}, "attempts": 1}]

package/packages/memory-engine-v2/org-model/migrations/007_audit_canonical_no_cascade.sql ADDED Viewed

@@ -0,0 +1,22 @@
+-- Fusion Drive audit integrity: the merge-audit tables must never lose rows
+-- when the row they reference changes.
+--
+-- 002 (entity_merges) and 006 (fact_merges) declared canonical_id as
+--   REFERENCES entities/facts(id) ON DELETE CASCADE
+-- That destroys audit + rollback history on CHAINED merges. When fact/entity A
+-- is merged into canonical K, a receipt is written with canonical_id = K. If K
+-- is itself later merged away (deleted), the ON DELETE CASCADE deletes A's
+-- receipt too — A stays deleted with no record of why, and no rollback payload.
+-- Observed 2026-06-15: a single arena --apply left 4 facts deleted with no
+-- fact_merges row (their canonical was absorbed into a longer "standing by"
+-- statement, cascading the receipts away).
+--
+-- An append-only audit log must outlive its referents. deprecated_id was always
+-- a plain TEXT column (no FK — the row it names is deleted by definition);
+-- canonical_id should be treated the same. Drop the cascading FK and keep
+-- canonical_id as a plain TEXT column. (We deliberately do NOT re-add a
+-- SET NULL FK: canonical_id must stay populated for forensics even after the
+-- canonical row is gone.)
+ALTER TABLE entity_merges DROP CONSTRAINT IF EXISTS entity_merges_canonical_id_fkey;
+ALTER TABLE fact_merges   DROP CONSTRAINT IF EXISTS fact_merges_canonical_id_fkey;

package/packages/memory-engine-v2/org-model/migrations/008_fusion_queue.sql ADDED Viewed

@@ -0,0 +1,64 @@
+-- Fusion Drive work queue — async, GPU-shared LLM-tier fusion.
+--
+-- The fusion sweep used to call the in-VPC distiller INLINE, serially, per
+-- arena. That meant a single big arena (usr_252 ~4k candidates, cf037 ~9k)
+-- blocked the whole sweep for 1-2h each, competed uncoordinated with live
+-- distillation for the GPU, and risked overlapping sweeps (no lock). The
+-- deterministic tiers (exact-name entity merges, exact-statement fact dedup)
+-- stay inline — they're fast and GPU-free. Only the LLM-adjudicated tier
+-- (co-occurrence entity pairs, semantic-fact groups) is decoupled here.
+--
+-- This mirrors distillation_queue (001_init.sql): same 4-state lifecycle,
+-- claim lease for crash recovery, attempts cap. The PRODUCER (sweep) detects
+-- candidates and enqueues; a CONSUMER pool drains via the shared vLLM and
+-- applies merges (re-validating each candidate is still live first, since
+-- detect and apply are now decoupled in time). The autoscaler scales the
+-- distiller fleet on COMBINED distillation_queue + fusion_queue depth, with
+-- distillation prioritised (ingest/latency path) and fusion as background
+-- that fills the troughs.
+CREATE TABLE IF NOT EXISTS fusion_queue (
+  id                BIGSERIAL PRIMARY KEY,
+  arena             TEXT NOT NULL,
+  -- entity_cooccurrence: a junk-leaning node + its candidate masters, to
+  --   adjudicate (junk vs each candidate, merge on first affirmative "same").
+  -- semantic_fact: a (subject, predicate) group of facts with differing
+  --   statements, to adjudicate pairwise and fuse the genuine same-assertions.
+  kind              TEXT NOT NULL CHECK (kind IN ('entity_cooccurrence', 'semantic_fact')),
+  -- Identifying ids only (NOT content snapshots): the consumer re-fetches the
+  -- live rows at process time and re-validates before mutating, because the
+  -- graph may have changed between enqueue and claim.
+  --   entity_cooccurrence: {"junk_id": "...", "candidate_ids": ["...", ...]}
+  --   semantic_fact:       {"fact_ids": ["...", ...]}
+  payload           JSONB NOT NULL,
+  -- Stable identity of the candidate (sha over arena+kind+sorted member ids),
+  -- so re-sweeps don't pile duplicate jobs and already-decided candidates are
+  -- not re-adjudicated every pass (the inline version re-ground the same ~179k
+  -- candidates forever).
+  dedup_key         TEXT NOT NULL,
+  status            TEXT NOT NULL DEFAULT 'pending'
+                      CHECK (status IN ('pending', 'claimed', 'done', 'failed')),
+  claimed_by        TEXT,
+  claimed_at        TIMESTAMPTZ,
+  claim_expires_at  TIMESTAMPTZ,
+  attempts          INT NOT NULL DEFAULT 0,
+  last_error        TEXT,
+  -- What the consumer decided (verdict, merged ids) — observability + audit.
+  result            JSONB,
+  enqueued_at       TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+  completed_at      TIMESTAMPTZ
+);
+-- Claim scan: pending OR lease-expired claimed, oldest first.
+CREATE INDEX IF NOT EXISTS idx_fusion_queue_status ON fusion_queue(status);
+CREATE INDEX IF NOT EXISTS idx_fusion_queue_claim_expires
+  ON fusion_queue(claim_expires_at) WHERE status = 'claimed';
+CREATE INDEX IF NOT EXISTS idx_fusion_queue_arena ON fusion_queue(arena);
+-- Enqueue dedup: at most one live-or-decided job per candidate identity.
+-- (Partial: 'failed' rows are NOT covered, so a candidate can be retried by a
+-- later sweep after a terminal failure.)
+CREATE UNIQUE INDEX IF NOT EXISTS uq_fusion_queue_dedup_active
+  ON fusion_queue(dedup_key)
+  WHERE status IN ('pending', 'claimed', 'done');

package/packages/memory-engine-v2/scripts/fusion_drive_consumer.py ADDED Viewed

@@ -0,0 +1,223 @@
+#!/usr/bin/env python3
+"""Fusion queue consumer — drains fusion_queue via the shared in-VPC distiller.
+The producer (fusion_drive_fuse.py --enqueue) applies the deterministic tier
+inline and enqueues only the LLM-tier candidates. This consumer claims those
+jobs (leased, crash-safe — mirrors the distiller worker), adjudicates them on
+the same vLLM the distiller uses, and applies the merge transactionally.
+Because detect and apply are decoupled in time, every job is RE-VALIDATED
+against the live graph before mutating: rows must still exist and still be
+mergeable, else the job is a no-op (the graph moved on). All merges reuse the
+exact same apply path as the inline fuse (audit receipts + the
+deleted==receipts invariant), so correctness is identical — only the execution
+model changed.
+Run as a long-lived worker (systemd / container):
+  PG_DSN=... PME_V2_LLM_ENDPOINT=http://<distiller>:8005/v1/chat/completions \
+  LLM_MODEL=qwen3.6-27b-fp8 python fusion_drive_consumer.py
+Env: PG_DSN, PME_V2_LLM_ENDPOINT, LLM_MODEL, FUSION_POLL_INTERVAL_SEC (10),
+FUSION_BATCH_SIZE (16), FUSION_CLAIM_TTL_SEC (900), FUSION_MAX_ATTEMPTS (3),
+FUSION_ONCE (set to drain-and-exit instead of looping — for tests/cron).
+"""
+from __future__ import annotations
+import os
+import socket
+import sys
+import time
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "fusion_drive"))
+import psycopg  # noqa: E402
+from psycopg.rows import dict_row  # noqa: E402
+import fusion_queue as fq  # noqa: E402
+import canonical as C  # noqa: E402
+from merge import build_entity_merge_plan, build_fact_merge_plan  # noqa: E402
+from adjudicate import adjudicate_entities, adjudicate_facts  # noqa: E402
+# Reuse the inline fuse's apply + detection helpers verbatim (single source of
+# truth for the merge mutation + audit + invariant).
+import fusion_drive_fuse as F  # noqa: E402
+WORKER_ID = f"{socket.gethostname()}:{os.getpid()}"
+POLL = int(os.environ.get("FUSION_POLL_INTERVAL_SEC", "10"))
+BATCH = int(os.environ.get("FUSION_BATCH_SIZE", "16"))
+TTL = int(os.environ.get("FUSION_CLAIM_TTL_SEC", str(fq.CLAIM_TTL_SEC_DEFAULT)))
+MAX_ATT = int(os.environ.get("FUSION_MAX_ATTEMPTS", str(fq.MAX_ATTEMPTS_DEFAULT)))
+def _fetch_entities(cur, arena, ids):
+    if not ids:
+        return {}
+    cur.execute(
+        """SELECT id, entity_type, canonical_name, aliases, provenance_event_ids, disclosure_class
+           FROM entities WHERE arena=%s AND id = ANY(%s)""",
+        (arena, list(ids)),
+    )
+    return {r["id"]: r for r in cur.fetchall()}
+def _apply_entity_merge(cur, arena, master, losers, model):
+    """Same apply path as the inline fuse do_merge: authority-scored master
+    pick, plan, execute, post-merge fact dedup. Returns loser count."""
+    group = [master, *losers]
+    sig = F._authority_signals(cur, arena, [e["id"] for e in group], model)
+    master_c, losers_c = C.pick_master(F._candidates(group, sig))
+    loser_ids = [l.entity_id for l in losers_c]
+    if not loser_ids:
+        return 0, None
+    by_id = {e["id"]: e for e in group}
+    m = by_id[master_c.entity_id]
+    ls = [by_id[i] for i in loser_ids]
+    facts, rels = F._touching(cur, arena, loser_ids)
+    plan = build_entity_merge_plan(arena=arena, master=m, losers=ls, facts=facts, relationships=rels)
+    F._execute_entity_plan(cur, plan)
+    F._dedup_master_facts(cur, arena, m["id"])
+    return len(loser_ids), m["canonical_name"]
+def _process_entity_cooccurrence(cur, conn, arena, payload, post_fn, model):
+    """Re-validate the junk node + candidates, adjudicate, merge on first
+    affirmative 'same'. Returns a result dict for the queue receipt."""
+    junk_id = payload["junk_id"]
+    cand_ids = payload.get("candidate_ids", [])
+    rows = _fetch_entities(cur, arena, [junk_id, *cand_ids])
+    junk = rows.get(junk_id)
+    if junk is None:
+        return {"merged": False, "reason": "stale: junk node gone"}
+    if not C.looks_like_id(junk["canonical_name"]):
+        return {"merged": False, "reason": "stale: node no longer junk-leaning"}
+    jctx = F._entity_context(cur, arena, junk_id)
+    for cid in cand_ids:
+        cand = rows.get(cid)
+        if cand is None:
+            continue  # candidate merged away since enqueue
+        v = adjudicate_entities({**junk, "context": jctx},
+                                {**cand, "context": F._entity_context(cur, arena, cid)},
+                                post_fn)
+        if v.get("same"):
+            with conn.transaction():
+                # re-fetch inside the txn to be certain both still exist
+                live = _fetch_entities(cur, arena, [junk_id, cid])
+                if junk_id not in live or cid not in live:
+                    return {"merged": False, "reason": "stale at apply"}
+                n, master_name = _apply_entity_merge(cur, arena, live[cid], [live[junk_id]], model)
+            return {"merged": bool(n), "master": master_name, "absorbed": junk["canonical_name"],
+                    "reason": v.get("reason", "")[:160]}
+    return {"merged": False, "reason": "no candidate adjudicated same"}
+def _process_semantic_fact(cur, conn, arena, payload, post_fn):
+    fact_ids = payload.get("fact_ids", [])
+    cur.execute(
+        """SELECT id, subject_entity_id, predicate, object_entity_id, statement,
+                  confidence, provenance_event_ids
+           FROM facts WHERE arena=%s AND id = ANY(%s)""",
+        (arena, list(fact_ids)),
+    )
+    facts = cur.fetchall()
+    if len(facts) < 2:
+        return {"merged": 0, "reason": "stale: <2 facts remain"}
+    ranked = sorted(facts, key=lambda f: (f.get("confidence", 0) or 0, f["id"]), reverse=True)
+    keep, same = ranked[0], [ranked[0]]
+    for other in ranked[1:]:
+        if adjudicate_facts(keep["statement"], other["statement"], post_fn).get("same"):
+            same.append(other)
+    if len(same) <= 1:
+        return {"merged": 0, "reason": "no semantic match"}
+    plan = build_fact_merge_plan(arena=arena, dup_facts=same)
+    if not plan:
+        return {"merged": 0, "reason": "no plan"}
+    with conn.transaction():
+        # re-validate every fact still exists before mutating
+        cur.execute("SELECT id FROM facts WHERE arena=%s AND id = ANY(%s)",
+                    (arena, [f["id"] for f in same]))
+        live = {r["id"] for r in cur.fetchall()}
+        if not all(f["id"] in live for f in same):
+            return {"merged": 0, "reason": "stale at apply"}
+        cur.execute("UPDATE facts SET provenance_event_ids=%s WHERE id=%s",
+                    (plan["master_provenance"], plan["master_id"]))
+        import json as _json
+        import uuid as _uuid
+        for a in plan["audit_rows"]:
+            cur.execute(
+                """INSERT INTO fact_merges (id, arena, canonical_id, deprecated_id,
+                     deprecated_statement, merge_signal, provenance_unioned, rollback_payload)
+                   VALUES (%s,%s,%s,%s,%s,'llm_adjudication',%s,%s::jsonb)""",
+                ("fm_" + _uuid.uuid4().hex[:20], a["arena"], a["canonical_id"], a["deprecated_id"],
+                 a["deprecated_statement"], a["provenance_unioned"],
+                 _json.dumps(a["rollback_payload"], default=str)))
+        cur.execute("DELETE FROM facts WHERE id = ANY(%s)", (plan["deprecated_ids"],))
+    return {"merged": len(same) - 1, "kept": keep["statement"][:80]}
+def process_job(conn, cur, job, post_fn, model) -> dict:
+    if job["kind"] == "entity_cooccurrence":
+        return _process_entity_cooccurrence(cur, conn, job["arena"], job["payload"], post_fn, model)
+    if job["kind"] == "semantic_fact":
+        return _process_semantic_fact(cur, conn, job["arena"], job["payload"], post_fn)
+    raise ValueError(f"unknown kind {job['kind']}")
+def drain(conn, post_fn, model) -> int:
+    """Claim + process one batch. Returns number of jobs handled."""
+    with conn.cursor() as cur:
+        jobs = fq.claim_batch(cur, WORKER_ID, BATCH, TTL, MAX_ATT)
+        conn.commit()
+        for job in jobs:
+            try:
+                result = process_job(conn, cur, job, post_fn, model)
+                fq.mark_done(cur, job["id"], result)
+                conn.commit()
+                print(f"[fusion-consumer] done id={job['id']} {job['kind']} {result}")
+            except Exception as exc:  # noqa: BLE001
+                conn.rollback()
+                msg = f"{type(exc).__name__}: {exc}"
+                if job["attempts"] >= MAX_ATT:
+                    fq.mark_failed(cur, job["id"], msg)
+                    print(f"[fusion-consumer] FAILED id={job['id']} (attempts exhausted): {msg}", file=sys.stderr)
+                else:
+                    fq.release(cur, job["id"], msg)
+                    print(f"[fusion-consumer] release id={job['id']} for retry: {msg}", file=sys.stderr)
+                conn.commit()
+        return len(jobs)
+def main() -> int:
+    dsn = os.environ.get("PG_DSN")
+    endpoint = os.environ.get("PME_V2_LLM_ENDPOINT")
+    model = os.environ.get("LLM_MODEL", F.DEFAULT_MODEL)
+    if not dsn:
+        print("PG_DSN required", file=sys.stderr); return 2
+    if not endpoint:
+        print("PME_V2_LLM_ENDPOINT required (the in-VPC distiller)", file=sys.stderr); return 2
+    post_fn = F._distiller_post_fn(endpoint, model)
+    once = bool(os.environ.get("FUSION_ONCE"))
+    print(f"[fusion-consumer] worker={WORKER_ID} endpoint={endpoint} model={model} "
+          f"batch={BATCH} poll={POLL}s once={once}")
+    with psycopg.connect(dsn, row_factory=dict_row) as conn:
+        if once:
+            total = 0
+            while True:
+                n = drain(conn, post_fn, model)
+                total += n
+                if n == 0:
+                    break
+            print(f"[fusion-consumer] drained {total} job(s), exiting (FUSION_ONCE)")
+            return 0
+        while True:
+            try:
+                n = drain(conn, post_fn, model)
+                if n == 0:
+                    time.sleep(POLL)
+            except Exception as exc:  # noqa: BLE001
+                print(f"[fusion-consumer] loop error: {exc}", file=sys.stderr)
+                time.sleep(POLL * 2)
+if __name__ == "__main__":
+    raise SystemExit(main())

package/packages/memory-engine-v2/scripts/fusion_drive_fuse.py CHANGED Viewed

@@ -38,6 +38,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "fusion_drive")
 import canonical as C  # noqa: E402
 from merge import build_entity_merge_plan, build_fact_merge_plan  # noqa: E402
 from adjudicate import adjudicate_entities, adjudicate_facts  # noqa: E402
+import fusion_queue as fq  # noqa: E402  (producer enqueue helpers)
 try:
     import httpx
@@ -355,6 +356,12 @@ def main() -> int:
                     help="in-VPC distiller /v1/chat/completions for adjudication "
                          "(no egress). Omit to skip the LLM tier (deterministic only).")
     ap.add_argument("--model", default=DEFAULT_MODEL)
+    ap.add_argument("--enqueue", action="store_true",
+                    help="producer mode: apply the deterministic tier inline, but "
+                         "ENQUEUE the LLM-tier candidates (co-occurrence + semantic "
+                         "fact) into fusion_queue for the consumer pool to adjudicate "
+                         "async, instead of calling the distiller inline. The scalable "
+                         "shape — keeps the sweep fast and off the GPU.")
     args = ap.parse_args()
     if not args.pg_dsn:
         print("PG_DSN required", file=sys.stderr)
@@ -386,15 +393,46 @@ def main() -> int:
                 merged += len(loser_ids)
                 return len(loser_ids)
+            # Audit invariant baseline: every entity/fact deletion MUST leave a
+            # rollback receipt. Capture pre-counts; cross-check after the run.
+            def _counts():
+                return {t: cur.execute(
+                            f"SELECT count(*) AS n FROM {t} WHERE arena=%s",
+                            (args.arena,)).fetchone()["n"]
+                        for t in ("entities", "facts", "entity_merges", "fact_merges")}
+            pre_counts = _counts()
             # Tier 1 — deterministic: exact normalized-name dupes only
             # (case/whitespace variants). Co-occurrence is NOT auto-merged.
             for group in _entity_dup_sets(cur, args.arena):
                 do_merge(group)
-            # Tier 2 — LLM adjudication via the in-VPC distiller (no egress).
-            # ALL co-occurrence merges live here now — single- and multi-
-            # candidate alike — because co-occurrence never proves identity.
-            if post_fn:
+            # Tier 2 — LLM-adjudicated fusion.
+            #
+            # PRODUCER MODE (--enqueue, the scalable shape): detect the LLM-tier
+            # candidates and enqueue them into fusion_queue; a consumer pool
+            # drains them async via the shared distiller. The sweep stays fast
+            # and GPU-free. Deterministic Tier 1 above already applied inline.
+            enqueued = 0
+            if args.enqueue:
+                for amb in _cooccurrence_candidates(cur, args.arena):
+                    if fq.enqueue_candidate(cur, args.arena, "entity_cooccurrence",
+                                            {"junk_id": amb["junk"]["id"],
+                                             "candidate_ids": [c["id"] for c in amb["candidates"]]}):
+                        enqueued += 1
+                for fg in _semantic_fact_groups(cur, args.arena):
+                    if fq.enqueue_candidate(cur, args.arena, "semantic_fact",
+                                            {"fact_ids": [f["id"] for f in fg]}):
+                        enqueued += 1
+                if args.apply:
+                    conn.commit()
+                print(f"  [enqueue] {enqueued} LLM-tier candidate job(s) -> fusion_queue")
+            # INLINE MODE (--llm-endpoint, legacy / manual / single-arena review):
+            # adjudicate + merge synchronously. ALL co-occurrence merges live
+            # here — single- and multi-candidate alike — because co-occurrence
+            # never proves identity.
+            elif post_fn:
                 # 2a. co-occurrence: does the junk node match a real entity?
                 for amb in _cooccurrence_candidates(cur, args.arena):
                     j = amb["junk"]
@@ -434,10 +472,27 @@ def main() -> int:
                             conn.commit()
                         llm_fact_merges += len(same) - 1
+            # Audit invariant: deletions must equal receipts written this run.
+            # A mismatch means some rows were deleted with no rollback receipt —
+            # e.g. a chained-merge cascade eating audit rows (fixed in migration
+            # 007, guarded here so it can never recur silently). Detective, not
+            # preventive (merges commit per-group), but it turns a silent leak
+            # into a loud, recorded failure. Holds trivially for dry-run (0==0).
+            post_counts = _counts()
+            ent_deleted = pre_counts["entities"] - post_counts["entities"]
+            fact_deleted = pre_counts["facts"] - post_counts["facts"]
+            ent_audited = post_counts["entity_merges"] - pre_counts["entity_merges"]
+            fact_audited = post_counts["fact_merges"] - pre_counts["fact_merges"]
+            audit_ok = (ent_deleted == ent_audited) and (fact_deleted == fact_audited)
             run_id = "fdr_" + uuid.uuid4().hex[:20]
             detail = {"proposals": proposals, "merged": merged,
                       "llm_entity_merges": llm_entity_merges, "llm_fact_merges": llm_fact_merges,
-                      "llm_tier": bool(post_fn)}
+                      "llm_tier": "enqueue" if args.enqueue else bool(post_fn),
+                      "enqueued": enqueued,
+                      "audit": {"ok": audit_ok,
+                                "entities_deleted": ent_deleted, "entities_audited": ent_audited,
+                                "facts_deleted": fact_deleted, "facts_audited": fact_audited}}
             cur.execute(
                 """INSERT INTO fusion_drive_runs (id, arena, pass_kind, mode, scanned, changed, detail, finished_at)
                    VALUES (%s,%s,'fusion',%s,%s,%s,%s::jsonb,NOW())""",
@@ -450,6 +505,14 @@ def main() -> int:
           f"{merged} entities merged ({llm_entity_merges} via llm), {llm_fact_merges} facts merged via llm. "
           f"LLM tier: {'on (distiller)' if post_fn else 'off'}")
     print(f"  ledger: {run_id}")
+    if not audit_ok:
+        print(f"[fusion-drive:fuse] AUDIT INVARIANT VIOLATED arena={args.arena}: "
+              f"entities deleted={ent_deleted} audited={ent_audited}; "
+              f"facts deleted={fact_deleted} audited={fact_audited}. "
+              f"Deletions without a rollback receipt — do NOT trust audit-based "
+              f"rollback for this run; restore from backup if needed.",
+              file=sys.stderr)
+        return 3
     return 0