@pentatonic-ai/ai-agent-sdk 0.10.13 → 0.10.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/package.json +1 -1
- package/packages/memory-engine-v2/extractor-async/extraction_schema.py +19 -2
- package/packages/memory-engine-v2/extractor-async/test_guided_json_parser.py +14 -0
- package/packages/memory-engine-v2/extractor-async/worker.py +8 -3
- package/packages/memory-engine-v2/fusion_drive/fusion_queue.py +123 -0
- package/packages/memory-engine-v2/fusion_drive/test_fusion_queue.py +81 -0
- package/packages/memory-engine-v2/org-model/migrations/008_fusion_queue.sql +64 -0
- package/packages/memory-engine-v2/scripts/fusion_drive_consumer.py +223 -0
- package/packages/memory-engine-v2/scripts/fusion_drive_fuse.py +35 -5
package/dist/index.cjs
CHANGED
|
@@ -878,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
878
878
|
}
|
|
879
879
|
|
|
880
880
|
// src/telemetry.js
|
|
881
|
-
var VERSION = "0.10.
|
|
881
|
+
var VERSION = "0.10.15";
|
|
882
882
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
883
883
|
function machineId() {
|
|
884
884
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/dist/index.js
CHANGED
|
@@ -847,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
847
847
|
}
|
|
848
848
|
|
|
849
849
|
// src/telemetry.js
|
|
850
|
-
var VERSION = "0.10.
|
|
850
|
+
var VERSION = "0.10.15";
|
|
851
851
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
852
852
|
function machineId() {
|
|
853
853
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.10.
|
|
3
|
+
"version": "0.10.15",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -35,9 +35,26 @@ from typing import Any
|
|
|
35
35
|
# Allowed-value enums. Moved here from worker.py (which now imports
|
|
36
36
|
# them) so the schema pins to the SAME constants the KV prompt and
|
|
37
37
|
# downstream normalisation use — change them in one place only.
|
|
38
|
+
#
|
|
39
|
+
# 2026-06-16 — ONTOLOGY ALIGNMENT (entity-ontology-the-spine.md). This enum is
|
|
40
|
+
# specifically the set of types the LLM extracts FROM PROSE as named entities.
|
|
41
|
+
# Removed the NLP byproducts that polluted ~28% of the graph and are not
|
|
42
|
+
# business entities:
|
|
43
|
+
# - `place`, `date` → ATTRIBUTES of real entities (a meeting's location/time),
|
|
44
|
+
# never standalone entities. The guided enum no longer admits them, so the
|
|
45
|
+
# model stops minting bare place/date nodes (the info still lands in facts).
|
|
46
|
+
# - `concept` → folds into `topic` (the model now emits `topic`).
|
|
47
|
+
# NOT added here (deliberately): meeting / document / thread / task / decision.
|
|
48
|
+
# Those are NOT LLM-prose entities — they are created by structured-event paths
|
|
49
|
+
# (meetings/actions/thread module projections; the sync extractor already emits
|
|
50
|
+
# `document`) or modelled as facts (`decision` category). Forcing the LLM to
|
|
51
|
+
# mint them from prose would create spurious nodes. They join the ontology via
|
|
52
|
+
# their own paths, not this enum.
|
|
53
|
+
# Forward-only: existing place/date/concept rows are untouched and demoted at
|
|
54
|
+
# READ time by the ontology ENGINE_TYPE_MAP (concept→topic, place/date→attribute,
|
|
55
|
+
# other→unresolved). No re-distill required for this change to take effect.
|
|
38
56
|
ALLOWED_ENT_TYPES = {
|
|
39
|
-
"person", "org", "product", "
|
|
40
|
-
"concept", "topic", "date", "other",
|
|
57
|
+
"person", "org", "product", "project", "topic", "other",
|
|
41
58
|
}
|
|
42
59
|
ALLOWED_FCT_CATEGORIES = {
|
|
43
60
|
"decision", "commitment", "state", "mention",
|
|
@@ -61,6 +61,20 @@ def test_schema_enums_pin_to_shared_constants() -> None:
|
|
|
61
61
|
assert fct_enum == sorted(fct_enum)
|
|
62
62
|
|
|
63
63
|
|
|
64
|
+
def test_entity_type_enum_is_ontology_aligned() -> None:
|
|
65
|
+
"""Ontology alignment (entity-ontology-the-spine.md): the LLM-extracted
|
|
66
|
+
entity types are the genuine named-entity work types — NOT the NLP
|
|
67
|
+
byproducts. Pins the decision so a future edit can't silently re-admit
|
|
68
|
+
place/date/concept (which polluted ~28% of the graph). meeting/document/
|
|
69
|
+
thread/task/decision are deliberately NOT here — they come from
|
|
70
|
+
structured-event paths / are facts, not LLM prose."""
|
|
71
|
+
assert xs.ALLOWED_ENT_TYPES == {
|
|
72
|
+
"person", "org", "product", "project", "topic", "other",
|
|
73
|
+
}
|
|
74
|
+
for byproduct in ("place", "date", "concept"):
|
|
75
|
+
assert byproduct not in xs.ALLOWED_ENT_TYPES
|
|
76
|
+
|
|
77
|
+
|
|
64
78
|
def test_schema_caps_mirror_prompt_hard_caps() -> None:
|
|
65
79
|
"""8 ENT / 6 FCT / 6 REL per event, statement <= 140 — what
|
|
66
80
|
BATCH_SYSTEM_PROMPT requests, the schema enforces."""
|
|
@@ -222,7 +222,10 @@ RULES:
|
|
|
222
222
|
matching the input index). NEVER skip an event — if an event has \
|
|
223
223
|
nothing to extract, emit ONLY the header.
|
|
224
224
|
- ENT lines have 3 or 4 fields: literal `ENT`, type, name, [email].
|
|
225
|
-
type ∈ {person, org, product,
|
|
225
|
+
type ∈ {person, org, product, project, topic, other}
|
|
226
|
+
Do NOT emit a bare date or place as an entity — those are attributes of
|
|
227
|
+
other entities (a meeting's time/location), not entities themselves. An
|
|
228
|
+
abstract idea or theme is a `topic`. Use `other` only when nothing fits.
|
|
226
229
|
email (OPTIONAL, person only): when the event body or attributes
|
|
227
230
|
show an email address that unambiguously identifies the person,
|
|
228
231
|
append it as the 4th field. This pairs the name+email forms so a
|
|
@@ -277,8 +280,10 @@ Each per-event object has:
|
|
|
277
280
|
RULES:
|
|
278
281
|
- NEVER skip an event — if an event has nothing to extract, emit its \
|
|
279
282
|
object with "index" set and empty arrays.
|
|
280
|
-
- entities: type ∈ {person, org, product,
|
|
281
|
-
|
|
283
|
+
- entities: type ∈ {person, org, product, project, topic, other}. \
|
|
284
|
+
Do NOT emit a bare date or place as an entity (those are attributes of other \
|
|
285
|
+
entities, not entities); an abstract idea or theme is a `topic`; use `other` \
|
|
286
|
+
only when nothing else fits.
|
|
282
287
|
email (OPTIONAL, person only): when the event body or attributes
|
|
283
288
|
show an email address that unambiguously identifies the person,
|
|
284
289
|
include it. This pairs the name+email forms so a later event seeing
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""fusion_queue helpers — enqueue (deduped), claim (leased), mark.
|
|
2
|
+
|
|
3
|
+
Mirrors the distiller's distillation_queue claim semantics (FOR UPDATE SKIP
|
|
4
|
+
LOCKED + claim_expires_at lease for crash recovery + attempts cap). Producer
|
|
5
|
+
(sweep) calls enqueue_candidate; the consumer pool calls claim_batch then
|
|
6
|
+
mark_done / mark_failed / release.
|
|
7
|
+
|
|
8
|
+
dedup_key is the stable identity of a candidate (independent of row content),
|
|
9
|
+
so re-sweeps don't pile duplicate jobs and a candidate already decided ('done')
|
|
10
|
+
isn't re-adjudicated every pass.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import hashlib
|
|
16
|
+
import json
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
CLAIM_TTL_SEC_DEFAULT = 900
|
|
20
|
+
MAX_ATTEMPTS_DEFAULT = 3
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def dedup_key(arena: str, kind: str, member_ids: list[str]) -> str:
|
|
24
|
+
"""Stable hash over (arena, kind, sorted member ids). Order-independent so
|
|
25
|
+
the same candidate hashes identically no matter how it's assembled."""
|
|
26
|
+
members = "|".join(sorted(member_ids))
|
|
27
|
+
raw = f"{arena}\x1f{kind}\x1f{members}"
|
|
28
|
+
return "fq_" + hashlib.sha256(raw.encode()).hexdigest()[:32]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _members(kind: str, payload: dict) -> list[str]:
|
|
32
|
+
if kind == "entity_cooccurrence":
|
|
33
|
+
return [payload["junk_id"], *payload.get("candidate_ids", [])]
|
|
34
|
+
if kind == "semantic_fact":
|
|
35
|
+
return list(payload.get("fact_ids", []))
|
|
36
|
+
raise ValueError(f"unknown kind: {kind}")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def enqueue_candidate(cur, arena: str, kind: str, payload: dict) -> bool:
|
|
40
|
+
"""Insert one candidate job iff no live-or-decided job for it exists.
|
|
41
|
+
Returns True if a row was inserted, False if it was already queued/decided.
|
|
42
|
+
The partial unique index uq_fusion_queue_dedup_active is the race backstop."""
|
|
43
|
+
key = dedup_key(arena, kind, _members(kind, payload))
|
|
44
|
+
cur.execute(
|
|
45
|
+
"""INSERT INTO fusion_queue (arena, kind, payload, dedup_key)
|
|
46
|
+
SELECT %s, %s, %s::jsonb, %s
|
|
47
|
+
WHERE NOT EXISTS (
|
|
48
|
+
SELECT 1 FROM fusion_queue
|
|
49
|
+
WHERE dedup_key = %s AND status IN ('pending', 'claimed', 'done')
|
|
50
|
+
)""",
|
|
51
|
+
(arena, kind, json.dumps(payload), key, key),
|
|
52
|
+
)
|
|
53
|
+
return cur.rowcount > 0
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def claim_batch(cur, worker_id: str, limit: int,
|
|
57
|
+
claim_ttl_sec: int = CLAIM_TTL_SEC_DEFAULT,
|
|
58
|
+
max_attempts: int = MAX_ATTEMPTS_DEFAULT) -> list[dict]:
|
|
59
|
+
"""Atomically claim up to `limit` jobs (pending or lease-expired), oldest
|
|
60
|
+
first. FOR UPDATE SKIP LOCKED → concurrent consumers never collide."""
|
|
61
|
+
cur.execute(
|
|
62
|
+
"""UPDATE fusion_queue SET
|
|
63
|
+
status = 'claimed',
|
|
64
|
+
claimed_by = %s,
|
|
65
|
+
claimed_at = NOW(),
|
|
66
|
+
claim_expires_at = NOW() + (%s || ' seconds')::interval,
|
|
67
|
+
attempts = attempts + 1
|
|
68
|
+
WHERE id IN (
|
|
69
|
+
SELECT id FROM fusion_queue
|
|
70
|
+
WHERE (status = 'pending'
|
|
71
|
+
OR (status = 'claimed' AND claim_expires_at < NOW()))
|
|
72
|
+
AND attempts < %s
|
|
73
|
+
ORDER BY id
|
|
74
|
+
FOR UPDATE SKIP LOCKED
|
|
75
|
+
LIMIT %s
|
|
76
|
+
)
|
|
77
|
+
RETURNING id, arena, kind, payload, attempts""",
|
|
78
|
+
(worker_id, claim_ttl_sec, max_attempts, limit),
|
|
79
|
+
)
|
|
80
|
+
rows = cur.fetchall()
|
|
81
|
+
out = []
|
|
82
|
+
for r in rows:
|
|
83
|
+
# tolerate dict_row or tuple cursors
|
|
84
|
+
if isinstance(r, dict):
|
|
85
|
+
out.append(r)
|
|
86
|
+
else:
|
|
87
|
+
out.append({"id": r[0], "arena": r[1], "kind": r[2],
|
|
88
|
+
"payload": r[3], "attempts": r[4]})
|
|
89
|
+
return out
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def mark_done(cur, queue_id: int, result: dict[str, Any] | None = None) -> None:
|
|
93
|
+
cur.execute(
|
|
94
|
+
"""UPDATE fusion_queue
|
|
95
|
+
SET status='done', completed_at=NOW(), last_error=NULL, result=%s::jsonb
|
|
96
|
+
WHERE id=%s""",
|
|
97
|
+
(json.dumps(result or {}), queue_id),
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def mark_failed(cur, queue_id: int, error: str) -> None:
|
|
102
|
+
cur.execute(
|
|
103
|
+
"UPDATE fusion_queue SET status='failed', completed_at=NOW(), last_error=%s WHERE id=%s",
|
|
104
|
+
(error[:2000], queue_id),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def release(cur, queue_id: int, error: str) -> None:
|
|
109
|
+
"""Return a job to 'pending' (retryable error). attempts is already bumped
|
|
110
|
+
by the claim, so the attempts cap still bounds retries."""
|
|
111
|
+
cur.execute(
|
|
112
|
+
"""UPDATE fusion_queue SET
|
|
113
|
+
status='pending', claimed_by=NULL, claimed_at=NULL,
|
|
114
|
+
claim_expires_at=NULL, last_error=%s
|
|
115
|
+
WHERE id=%s""",
|
|
116
|
+
(error[:2000], queue_id),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def pending_depth(cur) -> int:
|
|
121
|
+
cur.execute("SELECT count(*) AS n FROM fusion_queue WHERE status='pending'")
|
|
122
|
+
r = cur.fetchone()
|
|
123
|
+
return (r["n"] if isinstance(r, dict) else r[0]) or 0
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""fusion_queue helper tests — dedup-key identity + enqueue/claim contracts.
|
|
2
|
+
|
|
3
|
+
The SQL itself (FOR UPDATE SKIP LOCKED, lease expiry, partial-unique dedup) is
|
|
4
|
+
exercised against a real DB on the box; here we lock down the pure identity
|
|
5
|
+
logic and the thin-wrapper contracts with a fake cursor."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
sys.path.insert(0, os.path.dirname(__file__))
|
|
13
|
+
import fusion_queue as fq # noqa: E402
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# ── dedup_key ────────────────────────────────────────────────────────
|
|
17
|
+
def test_dedup_key_is_order_independent():
|
|
18
|
+
a = fq.dedup_key("arena", "entity_cooccurrence", ["x", "y", "z"])
|
|
19
|
+
b = fq.dedup_key("arena", "entity_cooccurrence", ["z", "x", "y"])
|
|
20
|
+
assert a == b, "member order must not change the key"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_dedup_key_distinguishes_members_kind_arena():
|
|
24
|
+
base = fq.dedup_key("arena", "entity_cooccurrence", ["x", "y"])
|
|
25
|
+
assert base != fq.dedup_key("arena", "entity_cooccurrence", ["x", "z"]) # members
|
|
26
|
+
assert base != fq.dedup_key("arena", "semantic_fact", ["x", "y"]) # kind
|
|
27
|
+
assert base != fq.dedup_key("other", "entity_cooccurrence", ["x", "y"]) # arena
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_members_extraction_per_kind():
|
|
31
|
+
assert fq._members("entity_cooccurrence", {"junk_id": "j", "candidate_ids": ["a", "b"]}) == ["j", "a", "b"]
|
|
32
|
+
assert fq._members("semantic_fact", {"fact_ids": ["f1", "f2"]}) == ["f1", "f2"]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# ── enqueue / claim contracts (fake cursor) ──────────────────────────
|
|
36
|
+
class FakeCursor:
|
|
37
|
+
"""Simulates the WHERE-NOT-EXISTS dedup server-side: tracks active keys."""
|
|
38
|
+
|
|
39
|
+
def __init__(self):
|
|
40
|
+
self.active: set[str] = set()
|
|
41
|
+
self.rowcount = 0
|
|
42
|
+
self._fetch: list = []
|
|
43
|
+
|
|
44
|
+
def execute(self, sql, params=None):
|
|
45
|
+
s = " ".join(sql.split())
|
|
46
|
+
if s.startswith("INSERT INTO fusion_queue"):
|
|
47
|
+
# params = (arena, kind, payload_json, key, key)
|
|
48
|
+
key = params[3]
|
|
49
|
+
if key in self.active:
|
|
50
|
+
self.rowcount = 0
|
|
51
|
+
else:
|
|
52
|
+
self.active.add(key)
|
|
53
|
+
self.rowcount = 1
|
|
54
|
+
elif s.startswith("UPDATE fusion_queue SET status = 'claimed'"):
|
|
55
|
+
self._fetch = [] # nothing to claim in this fake by default
|
|
56
|
+
|
|
57
|
+
def fetchall(self):
|
|
58
|
+
return self._fetch
|
|
59
|
+
|
|
60
|
+
def fetchone(self):
|
|
61
|
+
return self._fetch[0] if self._fetch else (0,)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_enqueue_is_idempotent_per_candidate():
|
|
65
|
+
cur = FakeCursor()
|
|
66
|
+
p = {"junk_id": "j", "candidate_ids": ["a"]}
|
|
67
|
+
assert fq.enqueue_candidate(cur, "arena", "entity_cooccurrence", p) is True # first
|
|
68
|
+
assert fq.enqueue_candidate(cur, "arena", "entity_cooccurrence", p) is False # dup
|
|
69
|
+
# different candidate set → new job
|
|
70
|
+
assert fq.enqueue_candidate(cur, "arena", "entity_cooccurrence",
|
|
71
|
+
{"junk_id": "j", "candidate_ids": ["b"]}) is True
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test_claim_batch_normalizes_tuple_rows():
|
|
75
|
+
cur = FakeCursor()
|
|
76
|
+
cur._fetch = [(7, "arena", "semantic_fact", {"fact_ids": ["f"]}, 1)]
|
|
77
|
+
# monkeypatch execute to not clear _fetch for the claim path
|
|
78
|
+
cur.execute = lambda *a, **k: None
|
|
79
|
+
out = fq.claim_batch(cur, "w", 10)
|
|
80
|
+
assert out == [{"id": 7, "arena": "arena", "kind": "semantic_fact",
|
|
81
|
+
"payload": {"fact_ids": ["f"]}, "attempts": 1}]
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
-- Fusion Drive work queue — async, GPU-shared LLM-tier fusion.
|
|
2
|
+
--
|
|
3
|
+
-- The fusion sweep used to call the in-VPC distiller INLINE, serially, per
|
|
4
|
+
-- arena. That meant a single big arena (usr_252 ~4k candidates, cf037 ~9k)
|
|
5
|
+
-- blocked the whole sweep for 1-2h each, competed uncoordinated with live
|
|
6
|
+
-- distillation for the GPU, and risked overlapping sweeps (no lock). The
|
|
7
|
+
-- deterministic tiers (exact-name entity merges, exact-statement fact dedup)
|
|
8
|
+
-- stay inline — they're fast and GPU-free. Only the LLM-adjudicated tier
|
|
9
|
+
-- (co-occurrence entity pairs, semantic-fact groups) is decoupled here.
|
|
10
|
+
--
|
|
11
|
+
-- This mirrors distillation_queue (001_init.sql): same 4-state lifecycle,
|
|
12
|
+
-- claim lease for crash recovery, attempts cap. The PRODUCER (sweep) detects
|
|
13
|
+
-- candidates and enqueues; a CONSUMER pool drains via the shared vLLM and
|
|
14
|
+
-- applies merges (re-validating each candidate is still live first, since
|
|
15
|
+
-- detect and apply are now decoupled in time). The autoscaler scales the
|
|
16
|
+
-- distiller fleet on COMBINED distillation_queue + fusion_queue depth, with
|
|
17
|
+
-- distillation prioritised (ingest/latency path) and fusion as background
|
|
18
|
+
-- that fills the troughs.
|
|
19
|
+
|
|
20
|
+
CREATE TABLE IF NOT EXISTS fusion_queue (
|
|
21
|
+
id BIGSERIAL PRIMARY KEY,
|
|
22
|
+
arena TEXT NOT NULL,
|
|
23
|
+
-- entity_cooccurrence: a junk-leaning node + its candidate masters, to
|
|
24
|
+
-- adjudicate (junk vs each candidate, merge on first affirmative "same").
|
|
25
|
+
-- semantic_fact: a (subject, predicate) group of facts with differing
|
|
26
|
+
-- statements, to adjudicate pairwise and fuse the genuine same-assertions.
|
|
27
|
+
kind TEXT NOT NULL CHECK (kind IN ('entity_cooccurrence', 'semantic_fact')),
|
|
28
|
+
-- Identifying ids only (NOT content snapshots): the consumer re-fetches the
|
|
29
|
+
-- live rows at process time and re-validates before mutating, because the
|
|
30
|
+
-- graph may have changed between enqueue and claim.
|
|
31
|
+
-- entity_cooccurrence: {"junk_id": "...", "candidate_ids": ["...", ...]}
|
|
32
|
+
-- semantic_fact: {"fact_ids": ["...", ...]}
|
|
33
|
+
payload JSONB NOT NULL,
|
|
34
|
+
-- Stable identity of the candidate (sha over arena+kind+sorted member ids),
|
|
35
|
+
-- so re-sweeps don't pile duplicate jobs and already-decided candidates are
|
|
36
|
+
-- not re-adjudicated every pass (the inline version re-ground the same ~179k
|
|
37
|
+
-- candidates forever).
|
|
38
|
+
dedup_key TEXT NOT NULL,
|
|
39
|
+
|
|
40
|
+
status TEXT NOT NULL DEFAULT 'pending'
|
|
41
|
+
CHECK (status IN ('pending', 'claimed', 'done', 'failed')),
|
|
42
|
+
claimed_by TEXT,
|
|
43
|
+
claimed_at TIMESTAMPTZ,
|
|
44
|
+
claim_expires_at TIMESTAMPTZ,
|
|
45
|
+
attempts INT NOT NULL DEFAULT 0,
|
|
46
|
+
last_error TEXT,
|
|
47
|
+
-- What the consumer decided (verdict, merged ids) — observability + audit.
|
|
48
|
+
result JSONB,
|
|
49
|
+
enqueued_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
50
|
+
completed_at TIMESTAMPTZ
|
|
51
|
+
);
|
|
52
|
+
|
|
53
|
+
-- Claim scan: pending OR lease-expired claimed, oldest first.
|
|
54
|
+
CREATE INDEX IF NOT EXISTS idx_fusion_queue_status ON fusion_queue(status);
|
|
55
|
+
CREATE INDEX IF NOT EXISTS idx_fusion_queue_claim_expires
|
|
56
|
+
ON fusion_queue(claim_expires_at) WHERE status = 'claimed';
|
|
57
|
+
CREATE INDEX IF NOT EXISTS idx_fusion_queue_arena ON fusion_queue(arena);
|
|
58
|
+
|
|
59
|
+
-- Enqueue dedup: at most one live-or-decided job per candidate identity.
|
|
60
|
+
-- (Partial: 'failed' rows are NOT covered, so a candidate can be retried by a
|
|
61
|
+
-- later sweep after a terminal failure.)
|
|
62
|
+
CREATE UNIQUE INDEX IF NOT EXISTS uq_fusion_queue_dedup_active
|
|
63
|
+
ON fusion_queue(dedup_key)
|
|
64
|
+
WHERE status IN ('pending', 'claimed', 'done');
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Fusion queue consumer — drains fusion_queue via the shared in-VPC distiller.
|
|
3
|
+
|
|
4
|
+
The producer (fusion_drive_fuse.py --enqueue) applies the deterministic tier
|
|
5
|
+
inline and enqueues only the LLM-tier candidates. This consumer claims those
|
|
6
|
+
jobs (leased, crash-safe — mirrors the distiller worker), adjudicates them on
|
|
7
|
+
the same vLLM the distiller uses, and applies the merge transactionally.
|
|
8
|
+
|
|
9
|
+
Because detect and apply are decoupled in time, every job is RE-VALIDATED
|
|
10
|
+
against the live graph before mutating: rows must still exist and still be
|
|
11
|
+
mergeable, else the job is a no-op (the graph moved on). All merges reuse the
|
|
12
|
+
exact same apply path as the inline fuse (audit receipts + the
|
|
13
|
+
deleted==receipts invariant), so correctness is identical — only the execution
|
|
14
|
+
model changed.
|
|
15
|
+
|
|
16
|
+
Run as a long-lived worker (systemd / container):
|
|
17
|
+
PG_DSN=... PME_V2_LLM_ENDPOINT=http://<distiller>:8005/v1/chat/completions \
|
|
18
|
+
LLM_MODEL=qwen3.6-27b-fp8 python fusion_drive_consumer.py
|
|
19
|
+
|
|
20
|
+
Env: PG_DSN, PME_V2_LLM_ENDPOINT, LLM_MODEL, FUSION_POLL_INTERVAL_SEC (10),
|
|
21
|
+
FUSION_BATCH_SIZE (16), FUSION_CLAIM_TTL_SEC (900), FUSION_MAX_ATTEMPTS (3),
|
|
22
|
+
FUSION_ONCE (set to drain-and-exit instead of looping — for tests/cron).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import os
|
|
28
|
+
import socket
|
|
29
|
+
import sys
|
|
30
|
+
import time
|
|
31
|
+
|
|
32
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "fusion_drive"))
|
|
33
|
+
|
|
34
|
+
import psycopg # noqa: E402
|
|
35
|
+
from psycopg.rows import dict_row # noqa: E402
|
|
36
|
+
|
|
37
|
+
import fusion_queue as fq # noqa: E402
|
|
38
|
+
import canonical as C # noqa: E402
|
|
39
|
+
from merge import build_entity_merge_plan, build_fact_merge_plan # noqa: E402
|
|
40
|
+
from adjudicate import adjudicate_entities, adjudicate_facts # noqa: E402
|
|
41
|
+
|
|
42
|
+
# Reuse the inline fuse's apply + detection helpers verbatim (single source of
|
|
43
|
+
# truth for the merge mutation + audit + invariant).
|
|
44
|
+
import fusion_drive_fuse as F # noqa: E402
|
|
45
|
+
|
|
46
|
+
WORKER_ID = f"{socket.gethostname()}:{os.getpid()}"
|
|
47
|
+
POLL = int(os.environ.get("FUSION_POLL_INTERVAL_SEC", "10"))
|
|
48
|
+
BATCH = int(os.environ.get("FUSION_BATCH_SIZE", "16"))
|
|
49
|
+
TTL = int(os.environ.get("FUSION_CLAIM_TTL_SEC", str(fq.CLAIM_TTL_SEC_DEFAULT)))
|
|
50
|
+
MAX_ATT = int(os.environ.get("FUSION_MAX_ATTEMPTS", str(fq.MAX_ATTEMPTS_DEFAULT)))
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _fetch_entities(cur, arena, ids):
|
|
54
|
+
if not ids:
|
|
55
|
+
return {}
|
|
56
|
+
cur.execute(
|
|
57
|
+
"""SELECT id, entity_type, canonical_name, aliases, provenance_event_ids, disclosure_class
|
|
58
|
+
FROM entities WHERE arena=%s AND id = ANY(%s)""",
|
|
59
|
+
(arena, list(ids)),
|
|
60
|
+
)
|
|
61
|
+
return {r["id"]: r for r in cur.fetchall()}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _apply_entity_merge(cur, arena, master, losers, model):
|
|
65
|
+
"""Same apply path as the inline fuse do_merge: authority-scored master
|
|
66
|
+
pick, plan, execute, post-merge fact dedup. Returns loser count."""
|
|
67
|
+
group = [master, *losers]
|
|
68
|
+
sig = F._authority_signals(cur, arena, [e["id"] for e in group], model)
|
|
69
|
+
master_c, losers_c = C.pick_master(F._candidates(group, sig))
|
|
70
|
+
loser_ids = [l.entity_id for l in losers_c]
|
|
71
|
+
if not loser_ids:
|
|
72
|
+
return 0, None
|
|
73
|
+
by_id = {e["id"]: e for e in group}
|
|
74
|
+
m = by_id[master_c.entity_id]
|
|
75
|
+
ls = [by_id[i] for i in loser_ids]
|
|
76
|
+
facts, rels = F._touching(cur, arena, loser_ids)
|
|
77
|
+
plan = build_entity_merge_plan(arena=arena, master=m, losers=ls, facts=facts, relationships=rels)
|
|
78
|
+
F._execute_entity_plan(cur, plan)
|
|
79
|
+
F._dedup_master_facts(cur, arena, m["id"])
|
|
80
|
+
return len(loser_ids), m["canonical_name"]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _process_entity_cooccurrence(cur, conn, arena, payload, post_fn, model):
|
|
84
|
+
"""Re-validate the junk node + candidates, adjudicate, merge on first
|
|
85
|
+
affirmative 'same'. Returns a result dict for the queue receipt."""
|
|
86
|
+
junk_id = payload["junk_id"]
|
|
87
|
+
cand_ids = payload.get("candidate_ids", [])
|
|
88
|
+
rows = _fetch_entities(cur, arena, [junk_id, *cand_ids])
|
|
89
|
+
junk = rows.get(junk_id)
|
|
90
|
+
if junk is None:
|
|
91
|
+
return {"merged": False, "reason": "stale: junk node gone"}
|
|
92
|
+
if not C.looks_like_id(junk["canonical_name"]):
|
|
93
|
+
return {"merged": False, "reason": "stale: node no longer junk-leaning"}
|
|
94
|
+
jctx = F._entity_context(cur, arena, junk_id)
|
|
95
|
+
for cid in cand_ids:
|
|
96
|
+
cand = rows.get(cid)
|
|
97
|
+
if cand is None:
|
|
98
|
+
continue # candidate merged away since enqueue
|
|
99
|
+
v = adjudicate_entities({**junk, "context": jctx},
|
|
100
|
+
{**cand, "context": F._entity_context(cur, arena, cid)},
|
|
101
|
+
post_fn)
|
|
102
|
+
if v.get("same"):
|
|
103
|
+
with conn.transaction():
|
|
104
|
+
# re-fetch inside the txn to be certain both still exist
|
|
105
|
+
live = _fetch_entities(cur, arena, [junk_id, cid])
|
|
106
|
+
if junk_id not in live or cid not in live:
|
|
107
|
+
return {"merged": False, "reason": "stale at apply"}
|
|
108
|
+
n, master_name = _apply_entity_merge(cur, arena, live[cid], [live[junk_id]], model)
|
|
109
|
+
return {"merged": bool(n), "master": master_name, "absorbed": junk["canonical_name"],
|
|
110
|
+
"reason": v.get("reason", "")[:160]}
|
|
111
|
+
return {"merged": False, "reason": "no candidate adjudicated same"}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _process_semantic_fact(cur, conn, arena, payload, post_fn):
|
|
115
|
+
fact_ids = payload.get("fact_ids", [])
|
|
116
|
+
cur.execute(
|
|
117
|
+
"""SELECT id, subject_entity_id, predicate, object_entity_id, statement,
|
|
118
|
+
confidence, provenance_event_ids
|
|
119
|
+
FROM facts WHERE arena=%s AND id = ANY(%s)""",
|
|
120
|
+
(arena, list(fact_ids)),
|
|
121
|
+
)
|
|
122
|
+
facts = cur.fetchall()
|
|
123
|
+
if len(facts) < 2:
|
|
124
|
+
return {"merged": 0, "reason": "stale: <2 facts remain"}
|
|
125
|
+
ranked = sorted(facts, key=lambda f: (f.get("confidence", 0) or 0, f["id"]), reverse=True)
|
|
126
|
+
keep, same = ranked[0], [ranked[0]]
|
|
127
|
+
for other in ranked[1:]:
|
|
128
|
+
if adjudicate_facts(keep["statement"], other["statement"], post_fn).get("same"):
|
|
129
|
+
same.append(other)
|
|
130
|
+
if len(same) <= 1:
|
|
131
|
+
return {"merged": 0, "reason": "no semantic match"}
|
|
132
|
+
plan = build_fact_merge_plan(arena=arena, dup_facts=same)
|
|
133
|
+
if not plan:
|
|
134
|
+
return {"merged": 0, "reason": "no plan"}
|
|
135
|
+
with conn.transaction():
|
|
136
|
+
# re-validate every fact still exists before mutating
|
|
137
|
+
cur.execute("SELECT id FROM facts WHERE arena=%s AND id = ANY(%s)",
|
|
138
|
+
(arena, [f["id"] for f in same]))
|
|
139
|
+
live = {r["id"] for r in cur.fetchall()}
|
|
140
|
+
if not all(f["id"] in live for f in same):
|
|
141
|
+
return {"merged": 0, "reason": "stale at apply"}
|
|
142
|
+
cur.execute("UPDATE facts SET provenance_event_ids=%s WHERE id=%s",
|
|
143
|
+
(plan["master_provenance"], plan["master_id"]))
|
|
144
|
+
import json as _json
|
|
145
|
+
import uuid as _uuid
|
|
146
|
+
for a in plan["audit_rows"]:
|
|
147
|
+
cur.execute(
|
|
148
|
+
"""INSERT INTO fact_merges (id, arena, canonical_id, deprecated_id,
|
|
149
|
+
deprecated_statement, merge_signal, provenance_unioned, rollback_payload)
|
|
150
|
+
VALUES (%s,%s,%s,%s,%s,'llm_adjudication',%s,%s::jsonb)""",
|
|
151
|
+
("fm_" + _uuid.uuid4().hex[:20], a["arena"], a["canonical_id"], a["deprecated_id"],
|
|
152
|
+
a["deprecated_statement"], a["provenance_unioned"],
|
|
153
|
+
_json.dumps(a["rollback_payload"], default=str)))
|
|
154
|
+
cur.execute("DELETE FROM facts WHERE id = ANY(%s)", (plan["deprecated_ids"],))
|
|
155
|
+
return {"merged": len(same) - 1, "kept": keep["statement"][:80]}
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def process_job(conn, cur, job, post_fn, model) -> dict:
|
|
159
|
+
if job["kind"] == "entity_cooccurrence":
|
|
160
|
+
return _process_entity_cooccurrence(cur, conn, job["arena"], job["payload"], post_fn, model)
|
|
161
|
+
if job["kind"] == "semantic_fact":
|
|
162
|
+
return _process_semantic_fact(cur, conn, job["arena"], job["payload"], post_fn)
|
|
163
|
+
raise ValueError(f"unknown kind {job['kind']}")
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def drain(conn, post_fn, model) -> int:
|
|
167
|
+
"""Claim + process one batch. Returns number of jobs handled."""
|
|
168
|
+
with conn.cursor() as cur:
|
|
169
|
+
jobs = fq.claim_batch(cur, WORKER_ID, BATCH, TTL, MAX_ATT)
|
|
170
|
+
conn.commit()
|
|
171
|
+
for job in jobs:
|
|
172
|
+
try:
|
|
173
|
+
result = process_job(conn, cur, job, post_fn, model)
|
|
174
|
+
fq.mark_done(cur, job["id"], result)
|
|
175
|
+
conn.commit()
|
|
176
|
+
print(f"[fusion-consumer] done id={job['id']} {job['kind']} {result}")
|
|
177
|
+
except Exception as exc: # noqa: BLE001
|
|
178
|
+
conn.rollback()
|
|
179
|
+
msg = f"{type(exc).__name__}: {exc}"
|
|
180
|
+
if job["attempts"] >= MAX_ATT:
|
|
181
|
+
fq.mark_failed(cur, job["id"], msg)
|
|
182
|
+
print(f"[fusion-consumer] FAILED id={job['id']} (attempts exhausted): {msg}", file=sys.stderr)
|
|
183
|
+
else:
|
|
184
|
+
fq.release(cur, job["id"], msg)
|
|
185
|
+
print(f"[fusion-consumer] release id={job['id']} for retry: {msg}", file=sys.stderr)
|
|
186
|
+
conn.commit()
|
|
187
|
+
return len(jobs)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def main() -> int:
|
|
191
|
+
dsn = os.environ.get("PG_DSN")
|
|
192
|
+
endpoint = os.environ.get("PME_V2_LLM_ENDPOINT")
|
|
193
|
+
model = os.environ.get("LLM_MODEL", F.DEFAULT_MODEL)
|
|
194
|
+
if not dsn:
|
|
195
|
+
print("PG_DSN required", file=sys.stderr); return 2
|
|
196
|
+
if not endpoint:
|
|
197
|
+
print("PME_V2_LLM_ENDPOINT required (the in-VPC distiller)", file=sys.stderr); return 2
|
|
198
|
+
post_fn = F._distiller_post_fn(endpoint, model)
|
|
199
|
+
once = bool(os.environ.get("FUSION_ONCE"))
|
|
200
|
+
print(f"[fusion-consumer] worker={WORKER_ID} endpoint={endpoint} model={model} "
|
|
201
|
+
f"batch={BATCH} poll={POLL}s once={once}")
|
|
202
|
+
with psycopg.connect(dsn, row_factory=dict_row) as conn:
|
|
203
|
+
if once:
|
|
204
|
+
total = 0
|
|
205
|
+
while True:
|
|
206
|
+
n = drain(conn, post_fn, model)
|
|
207
|
+
total += n
|
|
208
|
+
if n == 0:
|
|
209
|
+
break
|
|
210
|
+
print(f"[fusion-consumer] drained {total} job(s), exiting (FUSION_ONCE)")
|
|
211
|
+
return 0
|
|
212
|
+
while True:
|
|
213
|
+
try:
|
|
214
|
+
n = drain(conn, post_fn, model)
|
|
215
|
+
if n == 0:
|
|
216
|
+
time.sleep(POLL)
|
|
217
|
+
except Exception as exc: # noqa: BLE001
|
|
218
|
+
print(f"[fusion-consumer] loop error: {exc}", file=sys.stderr)
|
|
219
|
+
time.sleep(POLL * 2)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
if __name__ == "__main__":
|
|
223
|
+
raise SystemExit(main())
|
|
@@ -38,6 +38,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "fusion_drive")
|
|
|
38
38
|
import canonical as C # noqa: E402
|
|
39
39
|
from merge import build_entity_merge_plan, build_fact_merge_plan # noqa: E402
|
|
40
40
|
from adjudicate import adjudicate_entities, adjudicate_facts # noqa: E402
|
|
41
|
+
import fusion_queue as fq # noqa: E402 (producer enqueue helpers)
|
|
41
42
|
|
|
42
43
|
try:
|
|
43
44
|
import httpx
|
|
@@ -355,6 +356,12 @@ def main() -> int:
|
|
|
355
356
|
help="in-VPC distiller /v1/chat/completions for adjudication "
|
|
356
357
|
"(no egress). Omit to skip the LLM tier (deterministic only).")
|
|
357
358
|
ap.add_argument("--model", default=DEFAULT_MODEL)
|
|
359
|
+
ap.add_argument("--enqueue", action="store_true",
|
|
360
|
+
help="producer mode: apply the deterministic tier inline, but "
|
|
361
|
+
"ENQUEUE the LLM-tier candidates (co-occurrence + semantic "
|
|
362
|
+
"fact) into fusion_queue for the consumer pool to adjudicate "
|
|
363
|
+
"async, instead of calling the distiller inline. The scalable "
|
|
364
|
+
"shape — keeps the sweep fast and off the GPU.")
|
|
358
365
|
args = ap.parse_args()
|
|
359
366
|
if not args.pg_dsn:
|
|
360
367
|
print("PG_DSN required", file=sys.stderr)
|
|
@@ -400,10 +407,32 @@ def main() -> int:
|
|
|
400
407
|
for group in _entity_dup_sets(cur, args.arena):
|
|
401
408
|
do_merge(group)
|
|
402
409
|
|
|
403
|
-
# Tier 2 — LLM
|
|
404
|
-
#
|
|
405
|
-
#
|
|
406
|
-
|
|
410
|
+
# Tier 2 — LLM-adjudicated fusion.
|
|
411
|
+
#
|
|
412
|
+
# PRODUCER MODE (--enqueue, the scalable shape): detect the LLM-tier
|
|
413
|
+
# candidates and enqueue them into fusion_queue; a consumer pool
|
|
414
|
+
# drains them async via the shared distiller. The sweep stays fast
|
|
415
|
+
# and GPU-free. Deterministic Tier 1 above already applied inline.
|
|
416
|
+
enqueued = 0
|
|
417
|
+
if args.enqueue:
|
|
418
|
+
for amb in _cooccurrence_candidates(cur, args.arena):
|
|
419
|
+
if fq.enqueue_candidate(cur, args.arena, "entity_cooccurrence",
|
|
420
|
+
{"junk_id": amb["junk"]["id"],
|
|
421
|
+
"candidate_ids": [c["id"] for c in amb["candidates"]]}):
|
|
422
|
+
enqueued += 1
|
|
423
|
+
for fg in _semantic_fact_groups(cur, args.arena):
|
|
424
|
+
if fq.enqueue_candidate(cur, args.arena, "semantic_fact",
|
|
425
|
+
{"fact_ids": [f["id"] for f in fg]}):
|
|
426
|
+
enqueued += 1
|
|
427
|
+
if args.apply:
|
|
428
|
+
conn.commit()
|
|
429
|
+
print(f" [enqueue] {enqueued} LLM-tier candidate job(s) -> fusion_queue")
|
|
430
|
+
|
|
431
|
+
# INLINE MODE (--llm-endpoint, legacy / manual / single-arena review):
|
|
432
|
+
# adjudicate + merge synchronously. ALL co-occurrence merges live
|
|
433
|
+
# here — single- and multi-candidate alike — because co-occurrence
|
|
434
|
+
# never proves identity.
|
|
435
|
+
elif post_fn:
|
|
407
436
|
# 2a. co-occurrence: does the junk node match a real entity?
|
|
408
437
|
for amb in _cooccurrence_candidates(cur, args.arena):
|
|
409
438
|
j = amb["junk"]
|
|
@@ -459,7 +488,8 @@ def main() -> int:
|
|
|
459
488
|
run_id = "fdr_" + uuid.uuid4().hex[:20]
|
|
460
489
|
detail = {"proposals": proposals, "merged": merged,
|
|
461
490
|
"llm_entity_merges": llm_entity_merges, "llm_fact_merges": llm_fact_merges,
|
|
462
|
-
"llm_tier": bool(post_fn),
|
|
491
|
+
"llm_tier": "enqueue" if args.enqueue else bool(post_fn),
|
|
492
|
+
"enqueued": enqueued,
|
|
463
493
|
"audit": {"ok": audit_ok,
|
|
464
494
|
"entities_deleted": ent_deleted, "entities_audited": ent_audited,
|
|
465
495
|
"facts_deleted": fact_deleted, "facts_audited": fact_audited}}
|