@pentatonic-ai/ai-agent-sdk 0.10.13 → 0.10.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -878,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
878
878
  }
879
879
 
880
880
  // src/telemetry.js
881
- var VERSION = "0.10.13";
881
+ var VERSION = "0.10.15";
882
882
  var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
883
883
  function machineId() {
884
884
  const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
package/dist/index.js CHANGED
@@ -847,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
847
847
  }
848
848
 
849
849
  // src/telemetry.js
850
- var VERSION = "0.10.13";
850
+ var VERSION = "0.10.15";
851
851
  var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
852
852
  function machineId() {
853
853
  const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.10.13",
3
+ "version": "0.10.15",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -35,9 +35,26 @@ from typing import Any
35
35
  # Allowed-value enums. Moved here from worker.py (which now imports
36
36
  # them) so the schema pins to the SAME constants the KV prompt and
37
37
  # downstream normalisation use — change them in one place only.
38
+ #
39
+ # 2026-06-16 — ONTOLOGY ALIGNMENT (entity-ontology-the-spine.md). This enum is
40
+ # specifically the set of types the LLM extracts FROM PROSE as named entities.
41
+ # Removed the NLP byproducts that polluted ~28% of the graph and are not
42
+ # business entities:
43
+ # - `place`, `date` → ATTRIBUTES of real entities (a meeting's location/time),
44
+ # never standalone entities. The guided enum no longer admits them, so the
45
+ # model stops minting bare place/date nodes (the info still lands in facts).
46
+ # - `concept` → folds into `topic` (the model now emits `topic`).
47
+ # NOT added here (deliberately): meeting / document / thread / task / decision.
48
+ # Those are NOT LLM-prose entities — they are created by structured-event paths
49
+ # (meetings/actions/thread module projections; the sync extractor already emits
50
+ # `document`) or modelled as facts (`decision` category). Forcing the LLM to
51
+ # mint them from prose would create spurious nodes. They join the ontology via
52
+ # their own paths, not this enum.
53
+ # Forward-only: existing place/date/concept rows are untouched and demoted at
54
+ # READ time by the ontology ENGINE_TYPE_MAP (concept→topic, place/date→attribute,
55
+ # other→unresolved). No re-distill required for this change to take effect.
38
56
  ALLOWED_ENT_TYPES = {
39
- "person", "org", "product", "place", "project",
40
- "concept", "topic", "date", "other",
57
+ "person", "org", "product", "project", "topic", "other",
41
58
  }
42
59
  ALLOWED_FCT_CATEGORIES = {
43
60
  "decision", "commitment", "state", "mention",
@@ -61,6 +61,20 @@ def test_schema_enums_pin_to_shared_constants() -> None:
61
61
  assert fct_enum == sorted(fct_enum)
62
62
 
63
63
 
64
+ def test_entity_type_enum_is_ontology_aligned() -> None:
65
+ """Ontology alignment (entity-ontology-the-spine.md): the LLM-extracted
66
+ entity types are the genuine named-entity work types — NOT the NLP
67
+ byproducts. Pins the decision so a future edit can't silently re-admit
68
+ place/date/concept (which polluted ~28% of the graph). meeting/document/
69
+ thread/task/decision are deliberately NOT here — they come from
70
+ structured-event paths / are facts, not LLM prose."""
71
+ assert xs.ALLOWED_ENT_TYPES == {
72
+ "person", "org", "product", "project", "topic", "other",
73
+ }
74
+ for byproduct in ("place", "date", "concept"):
75
+ assert byproduct not in xs.ALLOWED_ENT_TYPES
76
+
77
+
64
78
  def test_schema_caps_mirror_prompt_hard_caps() -> None:
65
79
  """8 ENT / 6 FCT / 6 REL per event, statement <= 140 — what
66
80
  BATCH_SYSTEM_PROMPT requests, the schema enforces."""
@@ -222,7 +222,10 @@ RULES:
222
222
  matching the input index). NEVER skip an event — if an event has \
223
223
  nothing to extract, emit ONLY the header.
224
224
  - ENT lines have 3 or 4 fields: literal `ENT`, type, name, [email].
225
- type ∈ {person, org, product, place, project, concept, topic, date, other}
225
+ type ∈ {person, org, product, project, topic, other}
226
+ Do NOT emit a bare date or place as an entity — those are attributes of
227
+ other entities (a meeting's time/location), not entities themselves. An
228
+ abstract idea or theme is a `topic`. Use `other` only when nothing fits.
226
229
  email (OPTIONAL, person only): when the event body or attributes
227
230
  show an email address that unambiguously identifies the person,
228
231
  append it as the 4th field. This pairs the name+email forms so a
@@ -277,8 +280,10 @@ Each per-event object has:
277
280
  RULES:
278
281
  - NEVER skip an event — if an event has nothing to extract, emit its \
279
282
  object with "index" set and empty arrays.
280
- - entities: type ∈ {person, org, product, place, project, concept, \
281
- topic, date, other}.
283
+ - entities: type ∈ {person, org, product, project, topic, other}. \
284
+ Do NOT emit a bare date or place as an entity (those are attributes of other \
285
+ entities, not entities); an abstract idea or theme is a `topic`; use `other` \
286
+ only when nothing else fits.
282
287
  email (OPTIONAL, person only): when the event body or attributes
283
288
  show an email address that unambiguously identifies the person,
284
289
  include it. This pairs the name+email forms so a later event seeing
@@ -0,0 +1,123 @@
1
+ """fusion_queue helpers — enqueue (deduped), claim (leased), mark.
2
+
3
+ Mirrors the distiller's distillation_queue claim semantics (FOR UPDATE SKIP
4
+ LOCKED + claim_expires_at lease for crash recovery + attempts cap). Producer
5
+ (sweep) calls enqueue_candidate; the consumer pool calls claim_batch then
6
+ mark_done / mark_failed / release.
7
+
8
+ dedup_key is the stable identity of a candidate (independent of row content),
9
+ so re-sweeps don't pile duplicate jobs and a candidate already decided ('done')
10
+ isn't re-adjudicated every pass.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import hashlib
16
+ import json
17
+ from typing import Any
18
+
19
+ CLAIM_TTL_SEC_DEFAULT = 900
20
+ MAX_ATTEMPTS_DEFAULT = 3
21
+
22
+
23
+ def dedup_key(arena: str, kind: str, member_ids: list[str]) -> str:
24
+ """Stable hash over (arena, kind, sorted member ids). Order-independent so
25
+ the same candidate hashes identically no matter how it's assembled."""
26
+ members = "|".join(sorted(member_ids))
27
+ raw = f"{arena}\x1f{kind}\x1f{members}"
28
+ return "fq_" + hashlib.sha256(raw.encode()).hexdigest()[:32]
29
+
30
+
31
+ def _members(kind: str, payload: dict) -> list[str]:
32
+ if kind == "entity_cooccurrence":
33
+ return [payload["junk_id"], *payload.get("candidate_ids", [])]
34
+ if kind == "semantic_fact":
35
+ return list(payload.get("fact_ids", []))
36
+ raise ValueError(f"unknown kind: {kind}")
37
+
38
+
39
+ def enqueue_candidate(cur, arena: str, kind: str, payload: dict) -> bool:
40
+ """Insert one candidate job iff no live-or-decided job for it exists.
41
+ Returns True if a row was inserted, False if it was already queued/decided.
42
+ The partial unique index uq_fusion_queue_dedup_active is the race backstop."""
43
+ key = dedup_key(arena, kind, _members(kind, payload))
44
+ cur.execute(
45
+ """INSERT INTO fusion_queue (arena, kind, payload, dedup_key)
46
+ SELECT %s, %s, %s::jsonb, %s
47
+ WHERE NOT EXISTS (
48
+ SELECT 1 FROM fusion_queue
49
+ WHERE dedup_key = %s AND status IN ('pending', 'claimed', 'done')
50
+ )""",
51
+ (arena, kind, json.dumps(payload), key, key),
52
+ )
53
+ return cur.rowcount > 0
54
+
55
+
56
+ def claim_batch(cur, worker_id: str, limit: int,
57
+ claim_ttl_sec: int = CLAIM_TTL_SEC_DEFAULT,
58
+ max_attempts: int = MAX_ATTEMPTS_DEFAULT) -> list[dict]:
59
+ """Atomically claim up to `limit` jobs (pending or lease-expired), oldest
60
+ first. FOR UPDATE SKIP LOCKED → concurrent consumers never collide."""
61
+ cur.execute(
62
+ """UPDATE fusion_queue SET
63
+ status = 'claimed',
64
+ claimed_by = %s,
65
+ claimed_at = NOW(),
66
+ claim_expires_at = NOW() + (%s || ' seconds')::interval,
67
+ attempts = attempts + 1
68
+ WHERE id IN (
69
+ SELECT id FROM fusion_queue
70
+ WHERE (status = 'pending'
71
+ OR (status = 'claimed' AND claim_expires_at < NOW()))
72
+ AND attempts < %s
73
+ ORDER BY id
74
+ FOR UPDATE SKIP LOCKED
75
+ LIMIT %s
76
+ )
77
+ RETURNING id, arena, kind, payload, attempts""",
78
+ (worker_id, claim_ttl_sec, max_attempts, limit),
79
+ )
80
+ rows = cur.fetchall()
81
+ out = []
82
+ for r in rows:
83
+ # tolerate dict_row or tuple cursors
84
+ if isinstance(r, dict):
85
+ out.append(r)
86
+ else:
87
+ out.append({"id": r[0], "arena": r[1], "kind": r[2],
88
+ "payload": r[3], "attempts": r[4]})
89
+ return out
90
+
91
+
92
+ def mark_done(cur, queue_id: int, result: dict[str, Any] | None = None) -> None:
93
+ cur.execute(
94
+ """UPDATE fusion_queue
95
+ SET status='done', completed_at=NOW(), last_error=NULL, result=%s::jsonb
96
+ WHERE id=%s""",
97
+ (json.dumps(result or {}), queue_id),
98
+ )
99
+
100
+
101
+ def mark_failed(cur, queue_id: int, error: str) -> None:
102
+ cur.execute(
103
+ "UPDATE fusion_queue SET status='failed', completed_at=NOW(), last_error=%s WHERE id=%s",
104
+ (error[:2000], queue_id),
105
+ )
106
+
107
+
108
+ def release(cur, queue_id: int, error: str) -> None:
109
+ """Return a job to 'pending' (retryable error). attempts is already bumped
110
+ by the claim, so the attempts cap still bounds retries."""
111
+ cur.execute(
112
+ """UPDATE fusion_queue SET
113
+ status='pending', claimed_by=NULL, claimed_at=NULL,
114
+ claim_expires_at=NULL, last_error=%s
115
+ WHERE id=%s""",
116
+ (error[:2000], queue_id),
117
+ )
118
+
119
+
120
+ def pending_depth(cur) -> int:
121
+ cur.execute("SELECT count(*) AS n FROM fusion_queue WHERE status='pending'")
122
+ r = cur.fetchone()
123
+ return (r["n"] if isinstance(r, dict) else r[0]) or 0
@@ -0,0 +1,81 @@
1
+ """fusion_queue helper tests — dedup-key identity + enqueue/claim contracts.
2
+
3
+ The SQL itself (FOR UPDATE SKIP LOCKED, lease expiry, partial-unique dedup) is
4
+ exercised against a real DB on the box; here we lock down the pure identity
5
+ logic and the thin-wrapper contracts with a fake cursor."""
6
+
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import sys
11
+
12
+ sys.path.insert(0, os.path.dirname(__file__))
13
+ import fusion_queue as fq # noqa: E402
14
+
15
+
16
+ # ── dedup_key ────────────────────────────────────────────────────────
17
+ def test_dedup_key_is_order_independent():
18
+ a = fq.dedup_key("arena", "entity_cooccurrence", ["x", "y", "z"])
19
+ b = fq.dedup_key("arena", "entity_cooccurrence", ["z", "x", "y"])
20
+ assert a == b, "member order must not change the key"
21
+
22
+
23
+ def test_dedup_key_distinguishes_members_kind_arena():
24
+ base = fq.dedup_key("arena", "entity_cooccurrence", ["x", "y"])
25
+ assert base != fq.dedup_key("arena", "entity_cooccurrence", ["x", "z"]) # members
26
+ assert base != fq.dedup_key("arena", "semantic_fact", ["x", "y"]) # kind
27
+ assert base != fq.dedup_key("other", "entity_cooccurrence", ["x", "y"]) # arena
28
+
29
+
30
+ def test_members_extraction_per_kind():
31
+ assert fq._members("entity_cooccurrence", {"junk_id": "j", "candidate_ids": ["a", "b"]}) == ["j", "a", "b"]
32
+ assert fq._members("semantic_fact", {"fact_ids": ["f1", "f2"]}) == ["f1", "f2"]
33
+
34
+
35
+ # ── enqueue / claim contracts (fake cursor) ──────────────────────────
36
+ class FakeCursor:
37
+ """Simulates the WHERE-NOT-EXISTS dedup server-side: tracks active keys."""
38
+
39
+ def __init__(self):
40
+ self.active: set[str] = set()
41
+ self.rowcount = 0
42
+ self._fetch: list = []
43
+
44
+ def execute(self, sql, params=None):
45
+ s = " ".join(sql.split())
46
+ if s.startswith("INSERT INTO fusion_queue"):
47
+ # params = (arena, kind, payload_json, key, key)
48
+ key = params[3]
49
+ if key in self.active:
50
+ self.rowcount = 0
51
+ else:
52
+ self.active.add(key)
53
+ self.rowcount = 1
54
+ elif s.startswith("UPDATE fusion_queue SET status = 'claimed'"):
55
+ self._fetch = [] # nothing to claim in this fake by default
56
+
57
+ def fetchall(self):
58
+ return self._fetch
59
+
60
+ def fetchone(self):
61
+ return self._fetch[0] if self._fetch else (0,)
62
+
63
+
64
+ def test_enqueue_is_idempotent_per_candidate():
65
+ cur = FakeCursor()
66
+ p = {"junk_id": "j", "candidate_ids": ["a"]}
67
+ assert fq.enqueue_candidate(cur, "arena", "entity_cooccurrence", p) is True # first
68
+ assert fq.enqueue_candidate(cur, "arena", "entity_cooccurrence", p) is False # dup
69
+ # different candidate set → new job
70
+ assert fq.enqueue_candidate(cur, "arena", "entity_cooccurrence",
71
+ {"junk_id": "j", "candidate_ids": ["b"]}) is True
72
+
73
+
74
+ def test_claim_batch_normalizes_tuple_rows():
75
+ cur = FakeCursor()
76
+ cur._fetch = [(7, "arena", "semantic_fact", {"fact_ids": ["f"]}, 1)]
77
+ # monkeypatch execute to not clear _fetch for the claim path
78
+ cur.execute = lambda *a, **k: None
79
+ out = fq.claim_batch(cur, "w", 10)
80
+ assert out == [{"id": 7, "arena": "arena", "kind": "semantic_fact",
81
+ "payload": {"fact_ids": ["f"]}, "attempts": 1}]
@@ -0,0 +1,64 @@
1
+ -- Fusion Drive work queue — async, GPU-shared LLM-tier fusion.
2
+ --
3
+ -- The fusion sweep used to call the in-VPC distiller INLINE, serially, per
4
+ -- arena. That meant a single big arena (usr_252 ~4k candidates, cf037 ~9k)
5
+ -- blocked the whole sweep for 1-2h each, competed uncoordinated with live
6
+ -- distillation for the GPU, and risked overlapping sweeps (no lock). The
7
+ -- deterministic tiers (exact-name entity merges, exact-statement fact dedup)
8
+ -- stay inline — they're fast and GPU-free. Only the LLM-adjudicated tier
9
+ -- (co-occurrence entity pairs, semantic-fact groups) is decoupled here.
10
+ --
11
+ -- This mirrors distillation_queue (001_init.sql): same 4-state lifecycle,
12
+ -- claim lease for crash recovery, attempts cap. The PRODUCER (sweep) detects
13
+ -- candidates and enqueues; a CONSUMER pool drains via the shared vLLM and
14
+ -- applies merges (re-validating each candidate is still live first, since
15
+ -- detect and apply are now decoupled in time). The autoscaler scales the
16
+ -- distiller fleet on COMBINED distillation_queue + fusion_queue depth, with
17
+ -- distillation prioritised (ingest/latency path) and fusion as background
18
+ -- that fills the troughs.
19
+
20
+ CREATE TABLE IF NOT EXISTS fusion_queue (
21
+ id BIGSERIAL PRIMARY KEY,
22
+ arena TEXT NOT NULL,
23
+ -- entity_cooccurrence: a junk-leaning node + its candidate masters, to
24
+ -- adjudicate (junk vs each candidate, merge on first affirmative "same").
25
+ -- semantic_fact: a (subject, predicate) group of facts with differing
26
+ -- statements, to adjudicate pairwise and fuse the genuine same-assertions.
27
+ kind TEXT NOT NULL CHECK (kind IN ('entity_cooccurrence', 'semantic_fact')),
28
+ -- Identifying ids only (NOT content snapshots): the consumer re-fetches the
29
+ -- live rows at process time and re-validates before mutating, because the
30
+ -- graph may have changed between enqueue and claim.
31
+ -- entity_cooccurrence: {"junk_id": "...", "candidate_ids": ["...", ...]}
32
+ -- semantic_fact: {"fact_ids": ["...", ...]}
33
+ payload JSONB NOT NULL,
34
+ -- Stable identity of the candidate (sha over arena+kind+sorted member ids),
35
+ -- so re-sweeps don't pile duplicate jobs and already-decided candidates are
36
+ -- not re-adjudicated every pass (the inline version re-ground the same ~179k
37
+ -- candidates forever).
38
+ dedup_key TEXT NOT NULL,
39
+
40
+ status TEXT NOT NULL DEFAULT 'pending'
41
+ CHECK (status IN ('pending', 'claimed', 'done', 'failed')),
42
+ claimed_by TEXT,
43
+ claimed_at TIMESTAMPTZ,
44
+ claim_expires_at TIMESTAMPTZ,
45
+ attempts INT NOT NULL DEFAULT 0,
46
+ last_error TEXT,
47
+ -- What the consumer decided (verdict, merged ids) — observability + audit.
48
+ result JSONB,
49
+ enqueued_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
50
+ completed_at TIMESTAMPTZ
51
+ );
52
+
53
+ -- Claim scan: pending OR lease-expired claimed, oldest first.
54
+ CREATE INDEX IF NOT EXISTS idx_fusion_queue_status ON fusion_queue(status);
55
+ CREATE INDEX IF NOT EXISTS idx_fusion_queue_claim_expires
56
+ ON fusion_queue(claim_expires_at) WHERE status = 'claimed';
57
+ CREATE INDEX IF NOT EXISTS idx_fusion_queue_arena ON fusion_queue(arena);
58
+
59
+ -- Enqueue dedup: at most one live-or-decided job per candidate identity.
60
+ -- (Partial: 'failed' rows are NOT covered, so a candidate can be retried by a
61
+ -- later sweep after a terminal failure.)
62
+ CREATE UNIQUE INDEX IF NOT EXISTS uq_fusion_queue_dedup_active
63
+ ON fusion_queue(dedup_key)
64
+ WHERE status IN ('pending', 'claimed', 'done');
@@ -0,0 +1,223 @@
1
+ #!/usr/bin/env python3
2
+ """Fusion queue consumer — drains fusion_queue via the shared in-VPC distiller.
3
+
4
+ The producer (fusion_drive_fuse.py --enqueue) applies the deterministic tier
5
+ inline and enqueues only the LLM-tier candidates. This consumer claims those
6
+ jobs (leased, crash-safe — mirrors the distiller worker), adjudicates them on
7
+ the same vLLM the distiller uses, and applies the merge transactionally.
8
+
9
+ Because detect and apply are decoupled in time, every job is RE-VALIDATED
10
+ against the live graph before mutating: rows must still exist and still be
11
+ mergeable, else the job is a no-op (the graph moved on). All merges reuse the
12
+ exact same apply path as the inline fuse (audit receipts + the
13
+ deleted==receipts invariant), so correctness is identical — only the execution
14
+ model changed.
15
+
16
+ Run as a long-lived worker (systemd / container):
17
+ PG_DSN=... PME_V2_LLM_ENDPOINT=http://<distiller>:8005/v1/chat/completions \
18
+ LLM_MODEL=qwen3.6-27b-fp8 python fusion_drive_consumer.py
19
+
20
+ Env: PG_DSN, PME_V2_LLM_ENDPOINT, LLM_MODEL, FUSION_POLL_INTERVAL_SEC (10),
21
+ FUSION_BATCH_SIZE (16), FUSION_CLAIM_TTL_SEC (900), FUSION_MAX_ATTEMPTS (3),
22
+ FUSION_ONCE (set to drain-and-exit instead of looping — for tests/cron).
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import os
28
+ import socket
29
+ import sys
30
+ import time
31
+
32
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "fusion_drive"))
33
+
34
+ import psycopg # noqa: E402
35
+ from psycopg.rows import dict_row # noqa: E402
36
+
37
+ import fusion_queue as fq # noqa: E402
38
+ import canonical as C # noqa: E402
39
+ from merge import build_entity_merge_plan, build_fact_merge_plan # noqa: E402
40
+ from adjudicate import adjudicate_entities, adjudicate_facts # noqa: E402
41
+
42
+ # Reuse the inline fuse's apply + detection helpers verbatim (single source of
43
+ # truth for the merge mutation + audit + invariant).
44
+ import fusion_drive_fuse as F # noqa: E402
45
+
46
+ WORKER_ID = f"{socket.gethostname()}:{os.getpid()}"
47
+ POLL = int(os.environ.get("FUSION_POLL_INTERVAL_SEC", "10"))
48
+ BATCH = int(os.environ.get("FUSION_BATCH_SIZE", "16"))
49
+ TTL = int(os.environ.get("FUSION_CLAIM_TTL_SEC", str(fq.CLAIM_TTL_SEC_DEFAULT)))
50
+ MAX_ATT = int(os.environ.get("FUSION_MAX_ATTEMPTS", str(fq.MAX_ATTEMPTS_DEFAULT)))
51
+
52
+
53
+ def _fetch_entities(cur, arena, ids):
54
+ if not ids:
55
+ return {}
56
+ cur.execute(
57
+ """SELECT id, entity_type, canonical_name, aliases, provenance_event_ids, disclosure_class
58
+ FROM entities WHERE arena=%s AND id = ANY(%s)""",
59
+ (arena, list(ids)),
60
+ )
61
+ return {r["id"]: r for r in cur.fetchall()}
62
+
63
+
64
+ def _apply_entity_merge(cur, arena, master, losers, model):
65
+ """Same apply path as the inline fuse do_merge: authority-scored master
66
+ pick, plan, execute, post-merge fact dedup. Returns loser count."""
67
+ group = [master, *losers]
68
+ sig = F._authority_signals(cur, arena, [e["id"] for e in group], model)
69
+ master_c, losers_c = C.pick_master(F._candidates(group, sig))
70
+ loser_ids = [l.entity_id for l in losers_c]
71
+ if not loser_ids:
72
+ return 0, None
73
+ by_id = {e["id"]: e for e in group}
74
+ m = by_id[master_c.entity_id]
75
+ ls = [by_id[i] for i in loser_ids]
76
+ facts, rels = F._touching(cur, arena, loser_ids)
77
+ plan = build_entity_merge_plan(arena=arena, master=m, losers=ls, facts=facts, relationships=rels)
78
+ F._execute_entity_plan(cur, plan)
79
+ F._dedup_master_facts(cur, arena, m["id"])
80
+ return len(loser_ids), m["canonical_name"]
81
+
82
+
83
+ def _process_entity_cooccurrence(cur, conn, arena, payload, post_fn, model):
84
+ """Re-validate the junk node + candidates, adjudicate, merge on first
85
+ affirmative 'same'. Returns a result dict for the queue receipt."""
86
+ junk_id = payload["junk_id"]
87
+ cand_ids = payload.get("candidate_ids", [])
88
+ rows = _fetch_entities(cur, arena, [junk_id, *cand_ids])
89
+ junk = rows.get(junk_id)
90
+ if junk is None:
91
+ return {"merged": False, "reason": "stale: junk node gone"}
92
+ if not C.looks_like_id(junk["canonical_name"]):
93
+ return {"merged": False, "reason": "stale: node no longer junk-leaning"}
94
+ jctx = F._entity_context(cur, arena, junk_id)
95
+ for cid in cand_ids:
96
+ cand = rows.get(cid)
97
+ if cand is None:
98
+ continue # candidate merged away since enqueue
99
+ v = adjudicate_entities({**junk, "context": jctx},
100
+ {**cand, "context": F._entity_context(cur, arena, cid)},
101
+ post_fn)
102
+ if v.get("same"):
103
+ with conn.transaction():
104
+ # re-fetch inside the txn to be certain both still exist
105
+ live = _fetch_entities(cur, arena, [junk_id, cid])
106
+ if junk_id not in live or cid not in live:
107
+ return {"merged": False, "reason": "stale at apply"}
108
+ n, master_name = _apply_entity_merge(cur, arena, live[cid], [live[junk_id]], model)
109
+ return {"merged": bool(n), "master": master_name, "absorbed": junk["canonical_name"],
110
+ "reason": v.get("reason", "")[:160]}
111
+ return {"merged": False, "reason": "no candidate adjudicated same"}
112
+
113
+
114
+ def _process_semantic_fact(cur, conn, arena, payload, post_fn):
115
+ fact_ids = payload.get("fact_ids", [])
116
+ cur.execute(
117
+ """SELECT id, subject_entity_id, predicate, object_entity_id, statement,
118
+ confidence, provenance_event_ids
119
+ FROM facts WHERE arena=%s AND id = ANY(%s)""",
120
+ (arena, list(fact_ids)),
121
+ )
122
+ facts = cur.fetchall()
123
+ if len(facts) < 2:
124
+ return {"merged": 0, "reason": "stale: <2 facts remain"}
125
+ ranked = sorted(facts, key=lambda f: (f.get("confidence", 0) or 0, f["id"]), reverse=True)
126
+ keep, same = ranked[0], [ranked[0]]
127
+ for other in ranked[1:]:
128
+ if adjudicate_facts(keep["statement"], other["statement"], post_fn).get("same"):
129
+ same.append(other)
130
+ if len(same) <= 1:
131
+ return {"merged": 0, "reason": "no semantic match"}
132
+ plan = build_fact_merge_plan(arena=arena, dup_facts=same)
133
+ if not plan:
134
+ return {"merged": 0, "reason": "no plan"}
135
+ with conn.transaction():
136
+ # re-validate every fact still exists before mutating
137
+ cur.execute("SELECT id FROM facts WHERE arena=%s AND id = ANY(%s)",
138
+ (arena, [f["id"] for f in same]))
139
+ live = {r["id"] for r in cur.fetchall()}
140
+ if not all(f["id"] in live for f in same):
141
+ return {"merged": 0, "reason": "stale at apply"}
142
+ cur.execute("UPDATE facts SET provenance_event_ids=%s WHERE id=%s",
143
+ (plan["master_provenance"], plan["master_id"]))
144
+ import json as _json
145
+ import uuid as _uuid
146
+ for a in plan["audit_rows"]:
147
+ cur.execute(
148
+ """INSERT INTO fact_merges (id, arena, canonical_id, deprecated_id,
149
+ deprecated_statement, merge_signal, provenance_unioned, rollback_payload)
150
+ VALUES (%s,%s,%s,%s,%s,'llm_adjudication',%s,%s::jsonb)""",
151
+ ("fm_" + _uuid.uuid4().hex[:20], a["arena"], a["canonical_id"], a["deprecated_id"],
152
+ a["deprecated_statement"], a["provenance_unioned"],
153
+ _json.dumps(a["rollback_payload"], default=str)))
154
+ cur.execute("DELETE FROM facts WHERE id = ANY(%s)", (plan["deprecated_ids"],))
155
+ return {"merged": len(same) - 1, "kept": keep["statement"][:80]}
156
+
157
+
158
+ def process_job(conn, cur, job, post_fn, model) -> dict:
159
+ if job["kind"] == "entity_cooccurrence":
160
+ return _process_entity_cooccurrence(cur, conn, job["arena"], job["payload"], post_fn, model)
161
+ if job["kind"] == "semantic_fact":
162
+ return _process_semantic_fact(cur, conn, job["arena"], job["payload"], post_fn)
163
+ raise ValueError(f"unknown kind {job['kind']}")
164
+
165
+
166
+ def drain(conn, post_fn, model) -> int:
167
+ """Claim + process one batch. Returns number of jobs handled."""
168
+ with conn.cursor() as cur:
169
+ jobs = fq.claim_batch(cur, WORKER_ID, BATCH, TTL, MAX_ATT)
170
+ conn.commit()
171
+ for job in jobs:
172
+ try:
173
+ result = process_job(conn, cur, job, post_fn, model)
174
+ fq.mark_done(cur, job["id"], result)
175
+ conn.commit()
176
+ print(f"[fusion-consumer] done id={job['id']} {job['kind']} {result}")
177
+ except Exception as exc: # noqa: BLE001
178
+ conn.rollback()
179
+ msg = f"{type(exc).__name__}: {exc}"
180
+ if job["attempts"] >= MAX_ATT:
181
+ fq.mark_failed(cur, job["id"], msg)
182
+ print(f"[fusion-consumer] FAILED id={job['id']} (attempts exhausted): {msg}", file=sys.stderr)
183
+ else:
184
+ fq.release(cur, job["id"], msg)
185
+ print(f"[fusion-consumer] release id={job['id']} for retry: {msg}", file=sys.stderr)
186
+ conn.commit()
187
+ return len(jobs)
188
+
189
+
190
+ def main() -> int:
191
+ dsn = os.environ.get("PG_DSN")
192
+ endpoint = os.environ.get("PME_V2_LLM_ENDPOINT")
193
+ model = os.environ.get("LLM_MODEL", F.DEFAULT_MODEL)
194
+ if not dsn:
195
+ print("PG_DSN required", file=sys.stderr); return 2
196
+ if not endpoint:
197
+ print("PME_V2_LLM_ENDPOINT required (the in-VPC distiller)", file=sys.stderr); return 2
198
+ post_fn = F._distiller_post_fn(endpoint, model)
199
+ once = bool(os.environ.get("FUSION_ONCE"))
200
+ print(f"[fusion-consumer] worker={WORKER_ID} endpoint={endpoint} model={model} "
201
+ f"batch={BATCH} poll={POLL}s once={once}")
202
+ with psycopg.connect(dsn, row_factory=dict_row) as conn:
203
+ if once:
204
+ total = 0
205
+ while True:
206
+ n = drain(conn, post_fn, model)
207
+ total += n
208
+ if n == 0:
209
+ break
210
+ print(f"[fusion-consumer] drained {total} job(s), exiting (FUSION_ONCE)")
211
+ return 0
212
+ while True:
213
+ try:
214
+ n = drain(conn, post_fn, model)
215
+ if n == 0:
216
+ time.sleep(POLL)
217
+ except Exception as exc: # noqa: BLE001
218
+ print(f"[fusion-consumer] loop error: {exc}", file=sys.stderr)
219
+ time.sleep(POLL * 2)
220
+
221
+
222
+ if __name__ == "__main__":
223
+ raise SystemExit(main())
@@ -38,6 +38,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "fusion_drive")
38
38
  import canonical as C # noqa: E402
39
39
  from merge import build_entity_merge_plan, build_fact_merge_plan # noqa: E402
40
40
  from adjudicate import adjudicate_entities, adjudicate_facts # noqa: E402
41
+ import fusion_queue as fq # noqa: E402 (producer enqueue helpers)
41
42
 
42
43
  try:
43
44
  import httpx
@@ -355,6 +356,12 @@ def main() -> int:
355
356
  help="in-VPC distiller /v1/chat/completions for adjudication "
356
357
  "(no egress). Omit to skip the LLM tier (deterministic only).")
357
358
  ap.add_argument("--model", default=DEFAULT_MODEL)
359
+ ap.add_argument("--enqueue", action="store_true",
360
+ help="producer mode: apply the deterministic tier inline, but "
361
+ "ENQUEUE the LLM-tier candidates (co-occurrence + semantic "
362
+ "fact) into fusion_queue for the consumer pool to adjudicate "
363
+ "async, instead of calling the distiller inline. The scalable "
364
+ "shape — keeps the sweep fast and off the GPU.")
358
365
  args = ap.parse_args()
359
366
  if not args.pg_dsn:
360
367
  print("PG_DSN required", file=sys.stderr)
@@ -400,10 +407,32 @@ def main() -> int:
400
407
  for group in _entity_dup_sets(cur, args.arena):
401
408
  do_merge(group)
402
409
 
403
- # Tier 2 — LLM adjudication via the in-VPC distiller (no egress).
404
- # ALL co-occurrence merges live here now — single- and multi-
405
- # candidate alike because co-occurrence never proves identity.
406
- if post_fn:
410
+ # Tier 2 — LLM-adjudicated fusion.
411
+ #
412
+ # PRODUCER MODE (--enqueue, the scalable shape): detect the LLM-tier
413
+ # candidates and enqueue them into fusion_queue; a consumer pool
414
+ # drains them async via the shared distiller. The sweep stays fast
415
+ # and GPU-free. Deterministic Tier 1 above already applied inline.
416
+ enqueued = 0
417
+ if args.enqueue:
418
+ for amb in _cooccurrence_candidates(cur, args.arena):
419
+ if fq.enqueue_candidate(cur, args.arena, "entity_cooccurrence",
420
+ {"junk_id": amb["junk"]["id"],
421
+ "candidate_ids": [c["id"] for c in amb["candidates"]]}):
422
+ enqueued += 1
423
+ for fg in _semantic_fact_groups(cur, args.arena):
424
+ if fq.enqueue_candidate(cur, args.arena, "semantic_fact",
425
+ {"fact_ids": [f["id"] for f in fg]}):
426
+ enqueued += 1
427
+ if args.apply:
428
+ conn.commit()
429
+ print(f" [enqueue] {enqueued} LLM-tier candidate job(s) -> fusion_queue")
430
+
431
+ # INLINE MODE (--llm-endpoint, legacy / manual / single-arena review):
432
+ # adjudicate + merge synchronously. ALL co-occurrence merges live
433
+ # here — single- and multi-candidate alike — because co-occurrence
434
+ # never proves identity.
435
+ elif post_fn:
407
436
  # 2a. co-occurrence: does the junk node match a real entity?
408
437
  for amb in _cooccurrence_candidates(cur, args.arena):
409
438
  j = amb["junk"]
@@ -459,7 +488,8 @@ def main() -> int:
459
488
  run_id = "fdr_" + uuid.uuid4().hex[:20]
460
489
  detail = {"proposals": proposals, "merged": merged,
461
490
  "llm_entity_merges": llm_entity_merges, "llm_fact_merges": llm_fact_merges,
462
- "llm_tier": bool(post_fn),
491
+ "llm_tier": "enqueue" if args.enqueue else bool(post_fn),
492
+ "enqueued": enqueued,
463
493
  "audit": {"ok": audit_ok,
464
494
  "entities_deleted": ent_deleted, "entities_audited": ent_audited,
465
495
  "facts_deleted": fact_deleted, "facts_audited": fact_audited}}