npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.10.13 → 0.10.15 - Mend

@pentatonic-ai/ai-agent-sdk 0.10.13 → 0.10.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/index.cjs CHANGED Viewed

@@ -878,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
 }
 // src/telemetry.js
-var VERSION = "0.10.13";
+var VERSION = "0.10.15";
 var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
 function machineId() {
   const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";

package/dist/index.js CHANGED Viewed

@@ -847,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
 }
 // src/telemetry.js
-var VERSION = "0.10.13";
+var VERSION = "0.10.15";
 var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
 function machineId() {
   const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pentatonic-ai/ai-agent-sdk",
-  "version": "0.10.13",
+  "version": "0.10.15",
   "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
   "type": "module",
   "main": "./dist/index.cjs",

package/packages/memory-engine-v2/extractor-async/extraction_schema.py CHANGED Viewed

@@ -35,9 +35,26 @@ from typing import Any
 # Allowed-value enums. Moved here from worker.py (which now imports
 # them) so the schema pins to the SAME constants the KV prompt and
 # downstream normalisation use — change them in one place only.
+#
+# 2026-06-16 — ONTOLOGY ALIGNMENT (entity-ontology-the-spine.md). This enum is
+# specifically the set of types the LLM extracts FROM PROSE as named entities.
+# Removed the NLP byproducts that polluted ~28% of the graph and are not
+# business entities:
+#   - `place`, `date` → ATTRIBUTES of real entities (a meeting's location/time),
+#     never standalone entities. The guided enum no longer admits them, so the
+#     model stops minting bare place/date nodes (the info still lands in facts).
+#   - `concept`     → folds into `topic` (the model now emits `topic`).
+# NOT added here (deliberately): meeting / document / thread / task / decision.
+# Those are NOT LLM-prose entities — they are created by structured-event paths
+# (meetings/actions/thread module projections; the sync extractor already emits
+# `document`) or modelled as facts (`decision` category). Forcing the LLM to
+# mint them from prose would create spurious nodes. They join the ontology via
+# their own paths, not this enum.
+# Forward-only: existing place/date/concept rows are untouched and demoted at
+# READ time by the ontology ENGINE_TYPE_MAP (concept→topic, place/date→attribute,
+# other→unresolved). No re-distill required for this change to take effect.
 ALLOWED_ENT_TYPES = {
-    "person", "org", "product", "place", "project",
-    "concept", "topic", "date", "other",
+    "person", "org", "product", "project", "topic", "other",
 }
 ALLOWED_FCT_CATEGORIES = {
     "decision", "commitment", "state", "mention",

package/packages/memory-engine-v2/extractor-async/test_guided_json_parser.py CHANGED Viewed

@@ -61,6 +61,20 @@ def test_schema_enums_pin_to_shared_constants() -> None:
     assert fct_enum == sorted(fct_enum)
+def test_entity_type_enum_is_ontology_aligned() -> None:
+    """Ontology alignment (entity-ontology-the-spine.md): the LLM-extracted
+    entity types are the genuine named-entity work types — NOT the NLP
+    byproducts. Pins the decision so a future edit can't silently re-admit
+    place/date/concept (which polluted ~28% of the graph). meeting/document/
+    thread/task/decision are deliberately NOT here — they come from
+    structured-event paths / are facts, not LLM prose."""
+    assert xs.ALLOWED_ENT_TYPES == {
+        "person", "org", "product", "project", "topic", "other",
+    }
+    for byproduct in ("place", "date", "concept"):
+        assert byproduct not in xs.ALLOWED_ENT_TYPES
 def test_schema_caps_mirror_prompt_hard_caps() -> None:
     """8 ENT / 6 FCT / 6 REL per event, statement <= 140 — what
     BATCH_SYSTEM_PROMPT requests, the schema enforces."""

package/packages/memory-engine-v2/extractor-async/worker.py CHANGED Viewed

@@ -222,7 +222,10 @@ RULES:
 matching the input index). NEVER skip an event — if an event has \
 nothing to extract, emit ONLY the header.
 - ENT lines have 3 or 4 fields: literal `ENT`, type, name, [email].
-  type ∈ {person, org, product, place, project, concept, topic, date, other}
+  type ∈ {person, org, product, project, topic, other}
+  Do NOT emit a bare date or place as an entity — those are attributes of
+  other entities (a meeting's time/location), not entities themselves. An
+  abstract idea or theme is a `topic`. Use `other` only when nothing fits.
   email (OPTIONAL, person only): when the event body or attributes
   show an email address that unambiguously identifies the person,
   append it as the 4th field. This pairs the name+email forms so a
@@ -277,8 +280,10 @@ Each per-event object has:
 RULES:
 - NEVER skip an event — if an event has nothing to extract, emit its \
 object with "index" set and empty arrays.
-- entities: type ∈ {person, org, product, place, project, concept, \
-topic, date, other}.
+- entities: type ∈ {person, org, product, project, topic, other}. \
+Do NOT emit a bare date or place as an entity (those are attributes of other \
+entities, not entities); an abstract idea or theme is a `topic`; use `other` \
+only when nothing else fits.
   email (OPTIONAL, person only): when the event body or attributes
   show an email address that unambiguously identifies the person,
   include it. This pairs the name+email forms so a later event seeing

package/packages/memory-engine-v2/fusion_drive/fusion_queue.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""fusion_queue helpers — enqueue (deduped), claim (leased), mark.
+Mirrors the distiller's distillation_queue claim semantics (FOR UPDATE SKIP
+LOCKED + claim_expires_at lease for crash recovery + attempts cap). Producer
+(sweep) calls enqueue_candidate; the consumer pool calls claim_batch then
+mark_done / mark_failed / release.
+dedup_key is the stable identity of a candidate (independent of row content),
+so re-sweeps don't pile duplicate jobs and a candidate already decided ('done')
+isn't re-adjudicated every pass.
+"""
+from __future__ import annotations
+import hashlib
+import json
+from typing import Any
+CLAIM_TTL_SEC_DEFAULT = 900
+MAX_ATTEMPTS_DEFAULT = 3
+def dedup_key(arena: str, kind: str, member_ids: list[str]) -> str:
+    """Stable hash over (arena, kind, sorted member ids). Order-independent so
+    the same candidate hashes identically no matter how it's assembled."""
+    members = "|".join(sorted(member_ids))
+    raw = f"{arena}\x1f{kind}\x1f{members}"
+    return "fq_" + hashlib.sha256(raw.encode()).hexdigest()[:32]
+def _members(kind: str, payload: dict) -> list[str]:
+    if kind == "entity_cooccurrence":
+        return [payload["junk_id"], *payload.get("candidate_ids", [])]
+    if kind == "semantic_fact":
+        return list(payload.get("fact_ids", []))
+    raise ValueError(f"unknown kind: {kind}")
+def enqueue_candidate(cur, arena: str, kind: str, payload: dict) -> bool:
+    """Insert one candidate job iff no live-or-decided job for it exists.
+    Returns True if a row was inserted, False if it was already queued/decided.
+    The partial unique index uq_fusion_queue_dedup_active is the race backstop."""
+    key = dedup_key(arena, kind, _members(kind, payload))
+    cur.execute(
+        """INSERT INTO fusion_queue (arena, kind, payload, dedup_key)
+           SELECT %s, %s, %s::jsonb, %s
+           WHERE NOT EXISTS (
+             SELECT 1 FROM fusion_queue
+             WHERE dedup_key = %s AND status IN ('pending', 'claimed', 'done')
+           )""",
+        (arena, kind, json.dumps(payload), key, key),
+    )
+    return cur.rowcount > 0
+def claim_batch(cur, worker_id: str, limit: int,
+                claim_ttl_sec: int = CLAIM_TTL_SEC_DEFAULT,
+                max_attempts: int = MAX_ATTEMPTS_DEFAULT) -> list[dict]:
+    """Atomically claim up to `limit` jobs (pending or lease-expired), oldest
+    first. FOR UPDATE SKIP LOCKED → concurrent consumers never collide."""
+    cur.execute(
+        """UPDATE fusion_queue SET
+             status = 'claimed',
+             claimed_by = %s,
+             claimed_at = NOW(),
+             claim_expires_at = NOW() + (%s || ' seconds')::interval,
+             attempts = attempts + 1
+           WHERE id IN (
+             SELECT id FROM fusion_queue
+             WHERE (status = 'pending'
+                    OR (status = 'claimed' AND claim_expires_at < NOW()))
+               AND attempts < %s
+             ORDER BY id
+             FOR UPDATE SKIP LOCKED
+             LIMIT %s
+           )
+           RETURNING id, arena, kind, payload, attempts""",
+        (worker_id, claim_ttl_sec, max_attempts, limit),
+    )
+    rows = cur.fetchall()
+    out = []
+    for r in rows:
+        # tolerate dict_row or tuple cursors
+        if isinstance(r, dict):
+            out.append(r)
+        else:
+            out.append({"id": r[0], "arena": r[1], "kind": r[2],
+                        "payload": r[3], "attempts": r[4]})
+    return out
+def mark_done(cur, queue_id: int, result: dict[str, Any] | None = None) -> None:
+    cur.execute(
+        """UPDATE fusion_queue
+           SET status='done', completed_at=NOW(), last_error=NULL, result=%s::jsonb
+           WHERE id=%s""",
+        (json.dumps(result or {}), queue_id),
+    )
+def mark_failed(cur, queue_id: int, error: str) -> None:
+    cur.execute(
+        "UPDATE fusion_queue SET status='failed', completed_at=NOW(), last_error=%s WHERE id=%s",
+        (error[:2000], queue_id),
+    )
+def release(cur, queue_id: int, error: str) -> None:
+    """Return a job to 'pending' (retryable error). attempts is already bumped
+    by the claim, so the attempts cap still bounds retries."""
+    cur.execute(
+        """UPDATE fusion_queue SET
+             status='pending', claimed_by=NULL, claimed_at=NULL,
+             claim_expires_at=NULL, last_error=%s
+           WHERE id=%s""",
+        (error[:2000], queue_id),
+    )
+def pending_depth(cur) -> int:
+    cur.execute("SELECT count(*) AS n FROM fusion_queue WHERE status='pending'")
+    r = cur.fetchone()
+    return (r["n"] if isinstance(r, dict) else r[0]) or 0

package/packages/memory-engine-v2/fusion_drive/test_fusion_queue.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""fusion_queue helper tests — dedup-key identity + enqueue/claim contracts.
+The SQL itself (FOR UPDATE SKIP LOCKED, lease expiry, partial-unique dedup) is
+exercised against a real DB on the box; here we lock down the pure identity
+logic and the thin-wrapper contracts with a fake cursor."""
+from __future__ import annotations
+import os
+import sys
+sys.path.insert(0, os.path.dirname(__file__))
+import fusion_queue as fq  # noqa: E402
+# ── dedup_key ────────────────────────────────────────────────────────
+def test_dedup_key_is_order_independent():
+    a = fq.dedup_key("arena", "entity_cooccurrence", ["x", "y", "z"])
+    b = fq.dedup_key("arena", "entity_cooccurrence", ["z", "x", "y"])
+    assert a == b, "member order must not change the key"
+def test_dedup_key_distinguishes_members_kind_arena():
+    base = fq.dedup_key("arena", "entity_cooccurrence", ["x", "y"])
+    assert base != fq.dedup_key("arena", "entity_cooccurrence", ["x", "z"])  # members
+    assert base != fq.dedup_key("arena", "semantic_fact", ["x", "y"])        # kind
+    assert base != fq.dedup_key("other", "entity_cooccurrence", ["x", "y"])  # arena
+def test_members_extraction_per_kind():
+    assert fq._members("entity_cooccurrence", {"junk_id": "j", "candidate_ids": ["a", "b"]}) == ["j", "a", "b"]
+    assert fq._members("semantic_fact", {"fact_ids": ["f1", "f2"]}) == ["f1", "f2"]
+# ── enqueue / claim contracts (fake cursor) ──────────────────────────
+class FakeCursor:
+    """Simulates the WHERE-NOT-EXISTS dedup server-side: tracks active keys."""
+    def __init__(self):
+        self.active: set[str] = set()
+        self.rowcount = 0
+        self._fetch: list = []
+    def execute(self, sql, params=None):
+        s = " ".join(sql.split())
+        if s.startswith("INSERT INTO fusion_queue"):
+            # params = (arena, kind, payload_json, key, key)
+            key = params[3]
+            if key in self.active:
+                self.rowcount = 0
+            else:
+                self.active.add(key)
+                self.rowcount = 1
+        elif s.startswith("UPDATE fusion_queue SET status = 'claimed'"):
+            self._fetch = []  # nothing to claim in this fake by default
+    def fetchall(self):
+        return self._fetch
+    def fetchone(self):
+        return self._fetch[0] if self._fetch else (0,)
+def test_enqueue_is_idempotent_per_candidate():
+    cur = FakeCursor()
+    p = {"junk_id": "j", "candidate_ids": ["a"]}
+    assert fq.enqueue_candidate(cur, "arena", "entity_cooccurrence", p) is True   # first
+    assert fq.enqueue_candidate(cur, "arena", "entity_cooccurrence", p) is False  # dup
+    # different candidate set → new job
+    assert fq.enqueue_candidate(cur, "arena", "entity_cooccurrence",
+                                {"junk_id": "j", "candidate_ids": ["b"]}) is True
+def test_claim_batch_normalizes_tuple_rows():
+    cur = FakeCursor()
+    cur._fetch = [(7, "arena", "semantic_fact", {"fact_ids": ["f"]}, 1)]
+    # monkeypatch execute to not clear _fetch for the claim path
+    cur.execute = lambda *a, **k: None
+    out = fq.claim_batch(cur, "w", 10)
+    assert out == [{"id": 7, "arena": "arena", "kind": "semantic_fact",
+                    "payload": {"fact_ids": ["f"]}, "attempts": 1}]

package/packages/memory-engine-v2/org-model/migrations/008_fusion_queue.sql ADDED Viewed

@@ -0,0 +1,64 @@
+-- Fusion Drive work queue — async, GPU-shared LLM-tier fusion.
+--
+-- The fusion sweep used to call the in-VPC distiller INLINE, serially, per
+-- arena. That meant a single big arena (usr_252 ~4k candidates, cf037 ~9k)
+-- blocked the whole sweep for 1-2h each, competed uncoordinated with live
+-- distillation for the GPU, and risked overlapping sweeps (no lock). The
+-- deterministic tiers (exact-name entity merges, exact-statement fact dedup)
+-- stay inline — they're fast and GPU-free. Only the LLM-adjudicated tier
+-- (co-occurrence entity pairs, semantic-fact groups) is decoupled here.
+--
+-- This mirrors distillation_queue (001_init.sql): same 4-state lifecycle,
+-- claim lease for crash recovery, attempts cap. The PRODUCER (sweep) detects
+-- candidates and enqueues; a CONSUMER pool drains via the shared vLLM and
+-- applies merges (re-validating each candidate is still live first, since
+-- detect and apply are now decoupled in time). The autoscaler scales the
+-- distiller fleet on COMBINED distillation_queue + fusion_queue depth, with
+-- distillation prioritised (ingest/latency path) and fusion as background
+-- that fills the troughs.
+CREATE TABLE IF NOT EXISTS fusion_queue (
+  id                BIGSERIAL PRIMARY KEY,
+  arena             TEXT NOT NULL,
+  -- entity_cooccurrence: a junk-leaning node + its candidate masters, to
+  --   adjudicate (junk vs each candidate, merge on first affirmative "same").
+  -- semantic_fact: a (subject, predicate) group of facts with differing
+  --   statements, to adjudicate pairwise and fuse the genuine same-assertions.
+  kind              TEXT NOT NULL CHECK (kind IN ('entity_cooccurrence', 'semantic_fact')),
+  -- Identifying ids only (NOT content snapshots): the consumer re-fetches the
+  -- live rows at process time and re-validates before mutating, because the
+  -- graph may have changed between enqueue and claim.
+  --   entity_cooccurrence: {"junk_id": "...", "candidate_ids": ["...", ...]}
+  --   semantic_fact:       {"fact_ids": ["...", ...]}
+  payload           JSONB NOT NULL,
+  -- Stable identity of the candidate (sha over arena+kind+sorted member ids),
+  -- so re-sweeps don't pile duplicate jobs and already-decided candidates are
+  -- not re-adjudicated every pass (the inline version re-ground the same ~179k
+  -- candidates forever).
+  dedup_key         TEXT NOT NULL,
+  status            TEXT NOT NULL DEFAULT 'pending'
+                      CHECK (status IN ('pending', 'claimed', 'done', 'failed')),
+  claimed_by        TEXT,
+  claimed_at        TIMESTAMPTZ,
+  claim_expires_at  TIMESTAMPTZ,
+  attempts          INT NOT NULL DEFAULT 0,
+  last_error        TEXT,
+  -- What the consumer decided (verdict, merged ids) — observability + audit.
+  result            JSONB,
+  enqueued_at       TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+  completed_at      TIMESTAMPTZ
+);
+-- Claim scan: pending OR lease-expired claimed, oldest first.
+CREATE INDEX IF NOT EXISTS idx_fusion_queue_status ON fusion_queue(status);
+CREATE INDEX IF NOT EXISTS idx_fusion_queue_claim_expires
+  ON fusion_queue(claim_expires_at) WHERE status = 'claimed';
+CREATE INDEX IF NOT EXISTS idx_fusion_queue_arena ON fusion_queue(arena);
+-- Enqueue dedup: at most one live-or-decided job per candidate identity.
+-- (Partial: 'failed' rows are NOT covered, so a candidate can be retried by a
+-- later sweep after a terminal failure.)
+CREATE UNIQUE INDEX IF NOT EXISTS uq_fusion_queue_dedup_active
+  ON fusion_queue(dedup_key)
+  WHERE status IN ('pending', 'claimed', 'done');

package/packages/memory-engine-v2/scripts/fusion_drive_consumer.py ADDED Viewed

@@ -0,0 +1,223 @@
+#!/usr/bin/env python3
+"""Fusion queue consumer — drains fusion_queue via the shared in-VPC distiller.
+The producer (fusion_drive_fuse.py --enqueue) applies the deterministic tier
+inline and enqueues only the LLM-tier candidates. This consumer claims those
+jobs (leased, crash-safe — mirrors the distiller worker), adjudicates them on
+the same vLLM the distiller uses, and applies the merge transactionally.
+Because detect and apply are decoupled in time, every job is RE-VALIDATED
+against the live graph before mutating: rows must still exist and still be
+mergeable, else the job is a no-op (the graph moved on). All merges reuse the
+exact same apply path as the inline fuse (audit receipts + the
+deleted==receipts invariant), so correctness is identical — only the execution
+model changed.
+Run as a long-lived worker (systemd / container):
+  PG_DSN=... PME_V2_LLM_ENDPOINT=http://<distiller>:8005/v1/chat/completions \
+  LLM_MODEL=qwen3.6-27b-fp8 python fusion_drive_consumer.py
+Env: PG_DSN, PME_V2_LLM_ENDPOINT, LLM_MODEL, FUSION_POLL_INTERVAL_SEC (10),
+FUSION_BATCH_SIZE (16), FUSION_CLAIM_TTL_SEC (900), FUSION_MAX_ATTEMPTS (3),
+FUSION_ONCE (set to drain-and-exit instead of looping — for tests/cron).
+"""
+from __future__ import annotations
+import os
+import socket
+import sys
+import time
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "fusion_drive"))
+import psycopg  # noqa: E402
+from psycopg.rows import dict_row  # noqa: E402
+import fusion_queue as fq  # noqa: E402
+import canonical as C  # noqa: E402
+from merge import build_entity_merge_plan, build_fact_merge_plan  # noqa: E402
+from adjudicate import adjudicate_entities, adjudicate_facts  # noqa: E402
+# Reuse the inline fuse's apply + detection helpers verbatim (single source of
+# truth for the merge mutation + audit + invariant).
+import fusion_drive_fuse as F  # noqa: E402
+WORKER_ID = f"{socket.gethostname()}:{os.getpid()}"
+POLL = int(os.environ.get("FUSION_POLL_INTERVAL_SEC", "10"))
+BATCH = int(os.environ.get("FUSION_BATCH_SIZE", "16"))
+TTL = int(os.environ.get("FUSION_CLAIM_TTL_SEC", str(fq.CLAIM_TTL_SEC_DEFAULT)))
+MAX_ATT = int(os.environ.get("FUSION_MAX_ATTEMPTS", str(fq.MAX_ATTEMPTS_DEFAULT)))
+def _fetch_entities(cur, arena, ids):
+    if not ids:
+        return {}
+    cur.execute(
+        """SELECT id, entity_type, canonical_name, aliases, provenance_event_ids, disclosure_class
+           FROM entities WHERE arena=%s AND id = ANY(%s)""",
+        (arena, list(ids)),
+    )
+    return {r["id"]: r for r in cur.fetchall()}
+def _apply_entity_merge(cur, arena, master, losers, model):
+    """Same apply path as the inline fuse do_merge: authority-scored master
+    pick, plan, execute, post-merge fact dedup. Returns loser count."""
+    group = [master, *losers]
+    sig = F._authority_signals(cur, arena, [e["id"] for e in group], model)
+    master_c, losers_c = C.pick_master(F._candidates(group, sig))
+    loser_ids = [l.entity_id for l in losers_c]
+    if not loser_ids:
+        return 0, None
+    by_id = {e["id"]: e for e in group}
+    m = by_id[master_c.entity_id]
+    ls = [by_id[i] for i in loser_ids]
+    facts, rels = F._touching(cur, arena, loser_ids)
+    plan = build_entity_merge_plan(arena=arena, master=m, losers=ls, facts=facts, relationships=rels)
+    F._execute_entity_plan(cur, plan)
+    F._dedup_master_facts(cur, arena, m["id"])
+    return len(loser_ids), m["canonical_name"]
+def _process_entity_cooccurrence(cur, conn, arena, payload, post_fn, model):
+    """Re-validate the junk node + candidates, adjudicate, merge on first
+    affirmative 'same'. Returns a result dict for the queue receipt."""
+    junk_id = payload["junk_id"]
+    cand_ids = payload.get("candidate_ids", [])
+    rows = _fetch_entities(cur, arena, [junk_id, *cand_ids])
+    junk = rows.get(junk_id)
+    if junk is None:
+        return {"merged": False, "reason": "stale: junk node gone"}
+    if not C.looks_like_id(junk["canonical_name"]):
+        return {"merged": False, "reason": "stale: node no longer junk-leaning"}
+    jctx = F._entity_context(cur, arena, junk_id)
+    for cid in cand_ids:
+        cand = rows.get(cid)
+        if cand is None:
+            continue  # candidate merged away since enqueue
+        v = adjudicate_entities({**junk, "context": jctx},
+                                {**cand, "context": F._entity_context(cur, arena, cid)},
+                                post_fn)
+        if v.get("same"):
+            with conn.transaction():
+                # re-fetch inside the txn to be certain both still exist
+                live = _fetch_entities(cur, arena, [junk_id, cid])
+                if junk_id not in live or cid not in live:
+                    return {"merged": False, "reason": "stale at apply"}
+                n, master_name = _apply_entity_merge(cur, arena, live[cid], [live[junk_id]], model)
+            return {"merged": bool(n), "master": master_name, "absorbed": junk["canonical_name"],
+                    "reason": v.get("reason", "")[:160]}
+    return {"merged": False, "reason": "no candidate adjudicated same"}
+def _process_semantic_fact(cur, conn, arena, payload, post_fn):
+    fact_ids = payload.get("fact_ids", [])
+    cur.execute(
+        """SELECT id, subject_entity_id, predicate, object_entity_id, statement,
+                  confidence, provenance_event_ids
+           FROM facts WHERE arena=%s AND id = ANY(%s)""",
+        (arena, list(fact_ids)),
+    )
+    facts = cur.fetchall()
+    if len(facts) < 2:
+        return {"merged": 0, "reason": "stale: <2 facts remain"}
+    ranked = sorted(facts, key=lambda f: (f.get("confidence", 0) or 0, f["id"]), reverse=True)
+    keep, same = ranked[0], [ranked[0]]
+    for other in ranked[1:]:
+        if adjudicate_facts(keep["statement"], other["statement"], post_fn).get("same"):
+            same.append(other)
+    if len(same) <= 1:
+        return {"merged": 0, "reason": "no semantic match"}
+    plan = build_fact_merge_plan(arena=arena, dup_facts=same)
+    if not plan:
+        return {"merged": 0, "reason": "no plan"}
+    with conn.transaction():
+        # re-validate every fact still exists before mutating
+        cur.execute("SELECT id FROM facts WHERE arena=%s AND id = ANY(%s)",
+                    (arena, [f["id"] for f in same]))
+        live = {r["id"] for r in cur.fetchall()}
+        if not all(f["id"] in live for f in same):
+            return {"merged": 0, "reason": "stale at apply"}
+        cur.execute("UPDATE facts SET provenance_event_ids=%s WHERE id=%s",
+                    (plan["master_provenance"], plan["master_id"]))
+        import json as _json
+        import uuid as _uuid
+        for a in plan["audit_rows"]:
+            cur.execute(
+                """INSERT INTO fact_merges (id, arena, canonical_id, deprecated_id,
+                     deprecated_statement, merge_signal, provenance_unioned, rollback_payload)
+                   VALUES (%s,%s,%s,%s,%s,'llm_adjudication',%s,%s::jsonb)""",
+                ("fm_" + _uuid.uuid4().hex[:20], a["arena"], a["canonical_id"], a["deprecated_id"],
+                 a["deprecated_statement"], a["provenance_unioned"],
+                 _json.dumps(a["rollback_payload"], default=str)))
+        cur.execute("DELETE FROM facts WHERE id = ANY(%s)", (plan["deprecated_ids"],))
+    return {"merged": len(same) - 1, "kept": keep["statement"][:80]}
+def process_job(conn, cur, job, post_fn, model) -> dict:
+    if job["kind"] == "entity_cooccurrence":
+        return _process_entity_cooccurrence(cur, conn, job["arena"], job["payload"], post_fn, model)
+    if job["kind"] == "semantic_fact":
+        return _process_semantic_fact(cur, conn, job["arena"], job["payload"], post_fn)
+    raise ValueError(f"unknown kind {job['kind']}")
+def drain(conn, post_fn, model) -> int:
+    """Claim + process one batch. Returns number of jobs handled."""
+    with conn.cursor() as cur:
+        jobs = fq.claim_batch(cur, WORKER_ID, BATCH, TTL, MAX_ATT)
+        conn.commit()
+        for job in jobs:
+            try:
+                result = process_job(conn, cur, job, post_fn, model)
+                fq.mark_done(cur, job["id"], result)
+                conn.commit()
+                print(f"[fusion-consumer] done id={job['id']} {job['kind']} {result}")
+            except Exception as exc:  # noqa: BLE001
+                conn.rollback()
+                msg = f"{type(exc).__name__}: {exc}"
+                if job["attempts"] >= MAX_ATT:
+                    fq.mark_failed(cur, job["id"], msg)
+                    print(f"[fusion-consumer] FAILED id={job['id']} (attempts exhausted): {msg}", file=sys.stderr)
+                else:
+                    fq.release(cur, job["id"], msg)
+                    print(f"[fusion-consumer] release id={job['id']} for retry: {msg}", file=sys.stderr)
+                conn.commit()
+        return len(jobs)
+def main() -> int:
+    dsn = os.environ.get("PG_DSN")
+    endpoint = os.environ.get("PME_V2_LLM_ENDPOINT")
+    model = os.environ.get("LLM_MODEL", F.DEFAULT_MODEL)
+    if not dsn:
+        print("PG_DSN required", file=sys.stderr); return 2
+    if not endpoint:
+        print("PME_V2_LLM_ENDPOINT required (the in-VPC distiller)", file=sys.stderr); return 2
+    post_fn = F._distiller_post_fn(endpoint, model)
+    once = bool(os.environ.get("FUSION_ONCE"))
+    print(f"[fusion-consumer] worker={WORKER_ID} endpoint={endpoint} model={model} "
+          f"batch={BATCH} poll={POLL}s once={once}")
+    with psycopg.connect(dsn, row_factory=dict_row) as conn:
+        if once:
+            total = 0
+            while True:
+                n = drain(conn, post_fn, model)
+                total += n
+                if n == 0:
+                    break
+            print(f"[fusion-consumer] drained {total} job(s), exiting (FUSION_ONCE)")
+            return 0
+        while True:
+            try:
+                n = drain(conn, post_fn, model)
+                if n == 0:
+                    time.sleep(POLL)
+            except Exception as exc:  # noqa: BLE001
+                print(f"[fusion-consumer] loop error: {exc}", file=sys.stderr)
+                time.sleep(POLL * 2)
+if __name__ == "__main__":
+    raise SystemExit(main())

package/packages/memory-engine-v2/scripts/fusion_drive_fuse.py CHANGED Viewed

@@ -38,6 +38,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "fusion_drive")
 import canonical as C  # noqa: E402
 from merge import build_entity_merge_plan, build_fact_merge_plan  # noqa: E402
 from adjudicate import adjudicate_entities, adjudicate_facts  # noqa: E402
+import fusion_queue as fq  # noqa: E402  (producer enqueue helpers)
 try:
     import httpx
@@ -355,6 +356,12 @@ def main() -> int:
                     help="in-VPC distiller /v1/chat/completions for adjudication "
                          "(no egress). Omit to skip the LLM tier (deterministic only).")
     ap.add_argument("--model", default=DEFAULT_MODEL)
+    ap.add_argument("--enqueue", action="store_true",
+                    help="producer mode: apply the deterministic tier inline, but "
+                         "ENQUEUE the LLM-tier candidates (co-occurrence + semantic "
+                         "fact) into fusion_queue for the consumer pool to adjudicate "
+                         "async, instead of calling the distiller inline. The scalable "
+                         "shape — keeps the sweep fast and off the GPU.")
     args = ap.parse_args()
     if not args.pg_dsn:
         print("PG_DSN required", file=sys.stderr)
@@ -400,10 +407,32 @@ def main() -> int:
             for group in _entity_dup_sets(cur, args.arena):
                 do_merge(group)
-            # Tier 2 — LLM adjudication via the in-VPC distiller (no egress).
-            # ALL co-occurrence merges live here now — single- and multi-
-            # candidate alike — because co-occurrence never proves identity.
-            if post_fn:
+            # Tier 2 — LLM-adjudicated fusion.
+            #
+            # PRODUCER MODE (--enqueue, the scalable shape): detect the LLM-tier
+            # candidates and enqueue them into fusion_queue; a consumer pool
+            # drains them async via the shared distiller. The sweep stays fast
+            # and GPU-free. Deterministic Tier 1 above already applied inline.
+            enqueued = 0
+            if args.enqueue:
+                for amb in _cooccurrence_candidates(cur, args.arena):
+                    if fq.enqueue_candidate(cur, args.arena, "entity_cooccurrence",
+                                            {"junk_id": amb["junk"]["id"],
+                                             "candidate_ids": [c["id"] for c in amb["candidates"]]}):
+                        enqueued += 1
+                for fg in _semantic_fact_groups(cur, args.arena):
+                    if fq.enqueue_candidate(cur, args.arena, "semantic_fact",
+                                            {"fact_ids": [f["id"] for f in fg]}):
+                        enqueued += 1
+                if args.apply:
+                    conn.commit()
+                print(f"  [enqueue] {enqueued} LLM-tier candidate job(s) -> fusion_queue")
+            # INLINE MODE (--llm-endpoint, legacy / manual / single-arena review):
+            # adjudicate + merge synchronously. ALL co-occurrence merges live
+            # here — single- and multi-candidate alike — because co-occurrence
+            # never proves identity.
+            elif post_fn:
                 # 2a. co-occurrence: does the junk node match a real entity?
                 for amb in _cooccurrence_candidates(cur, args.arena):
                     j = amb["junk"]
@@ -459,7 +488,8 @@ def main() -> int:
             run_id = "fdr_" + uuid.uuid4().hex[:20]
             detail = {"proposals": proposals, "merged": merged,
                       "llm_entity_merges": llm_entity_merges, "llm_fact_merges": llm_fact_merges,
-                      "llm_tier": bool(post_fn),
+                      "llm_tier": "enqueue" if args.enqueue else bool(post_fn),
+                      "enqueued": enqueued,
                       "audit": {"ok": audit_ok,
                                 "entities_deleted": ent_deleted, "entities_audited": ent_audited,
                                 "facts_deleted": fact_deleted, "facts_audited": fact_audited}}