@pentatonic-ai/ai-agent-sdk 0.10.7 → 0.10.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/dist/index.cjs +1 -1
  2. package/dist/index.js +1 -1
  3. package/package.json +1 -1
  4. package/packages/memory-engine-v2/RFC-decay-and-fusion.md +185 -0
  5. package/packages/memory-engine-v2/RFC-fusion-drive.md +199 -0
  6. package/packages/memory-engine-v2/extractor-async/confidence.py +37 -0
  7. package/packages/memory-engine-v2/extractor-async/source_time.py +63 -0
  8. package/packages/memory-engine-v2/extractor-async/test_born_salience_parity.py +35 -0
  9. package/packages/memory-engine-v2/extractor-async/test_source_time.py +102 -0
  10. package/packages/memory-engine-v2/extractor-async/worker.py +121 -18
  11. package/packages/memory-engine-v2/extractor-sync/Dockerfile +3 -1
  12. package/packages/memory-engine-v2/extractor-sync/confidence.py +99 -0
  13. package/packages/memory-engine-v2/extractor-sync/server.py +61 -11
  14. package/packages/memory-engine-v2/extractor-sync/source_time.py +63 -0
  15. package/packages/memory-engine-v2/extractor-sync/test_confidence_parity.py +18 -0
  16. package/packages/memory-engine-v2/extractor-sync/test_paired_extraction.py +2 -2
  17. package/packages/memory-engine-v2/fusion_drive/__init__.py +0 -0
  18. package/packages/memory-engine-v2/fusion_drive/adjudicate.py +85 -0
  19. package/packages/memory-engine-v2/fusion_drive/canonical.py +94 -0
  20. package/packages/memory-engine-v2/fusion_drive/conftest.py +8 -0
  21. package/packages/memory-engine-v2/fusion_drive/merge.py +178 -0
  22. package/packages/memory-engine-v2/fusion_drive/salience.py +118 -0
  23. package/packages/memory-engine-v2/fusion_drive/test_adjudicate.py +65 -0
  24. package/packages/memory-engine-v2/fusion_drive/test_canonical.py +76 -0
  25. package/packages/memory-engine-v2/fusion_drive/test_merge.py +112 -0
  26. package/packages/memory-engine-v2/fusion_drive/test_salience.py +93 -0
  27. package/packages/memory-engine-v2/org-model/migrations/006_fusion_drive.sql +80 -0
  28. package/packages/memory-engine-v2/scripts/fusion_drive_born_salience_backfill.py +113 -0
  29. package/packages/memory-engine-v2/scripts/fusion_drive_decay.py +200 -0
  30. package/packages/memory-engine-v2/scripts/fusion_drive_fuse.py +434 -0
@@ -0,0 +1,434 @@
1
+ #!/usr/bin/env python3
2
+ """Fusion Drive — fusion pass (detect duplicate nodes, fuse into a master).
3
+
4
+ Detects duplicate/near-duplicate entities (and exact-triple duplicate facts)
5
+ within an arena and merges each set into a single master, repointing facts +
6
+ relationships, unioning aliases/provenance, and writing reversible audit rows
7
+ (entity_merges / fact_merges).
8
+
9
+ Detection here is the TRACTABLE, no-LLM tier:
10
+ - entities: exact normalized-name / alias-overlap dupes, AND cross-run
11
+ shared-provenance dupes (two same-type entities citing the same event
12
+ where one is junk-leaning — the 7B numeric-ID vs new-teacher real-name
13
+ case). Embedding-band + LLM adjudication detection stays in
14
+ entity_resolution_v2.py (#82); this pass consumes its proposals too if
15
+ handed a --proposals file.
16
+ - facts: exact (subject, predicate, object) triples.
17
+ Master selection uses fusion_drive.canonical (directory-anchored scoring),
18
+ NOT richest-row-wins.
19
+
20
+ Safety: arena-scoped (required), DRY-RUN default, --apply to merge, each merge
21
+ its own transaction, every deprecated row recoverable from the audit table,
22
+ restricted disclosure never auto-merged.
23
+
24
+ Usage:
25
+ fusion_drive_fuse.py --arena 'X' # dry-run: list merge proposals
26
+ fusion_drive_fuse.py --arena 'X' --apply # execute merges (reversible)
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ import argparse
32
+ import json
33
+ import os
34
+ import sys
35
+ import uuid
36
+
37
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "fusion_drive"))
38
+ import canonical as C # noqa: E402
39
+ from merge import build_entity_merge_plan, build_fact_merge_plan # noqa: E402
40
+ from adjudicate import adjudicate_entities, adjudicate_facts # noqa: E402
41
+
42
+ try:
43
+ import httpx
44
+ import psycopg
45
+ from psycopg.rows import dict_row
46
+ except ModuleNotFoundError:
47
+ print("psycopg (+ httpx for --llm-endpoint) required", file=sys.stderr)
48
+ raise
49
+
50
+ # Current distiller served-model — the in-VPC LLM used for adjudication
51
+ # (no egress; same model that extracted this content). Override with --model.
52
+ DEFAULT_MODEL = "qwen3.6-27b-fp8"
53
+
54
+
55
+ def _distiller_post_fn(endpoint: str, model: str):
56
+ """Build a post_fn(messages)->str hitting the distiller's OpenAI
57
+ /v1/chat/completions (temperature 0, thinking off — same shape the worker
58
+ uses). In-VPC: memory content never leaves the network. Raises on failure
59
+ so the adjudicator fails closed (treats it as 'unsure', never merges)."""
60
+ def post(messages):
61
+ r = httpx.post(endpoint, timeout=60, json={
62
+ "model": model, "messages": messages, "temperature": 0.0,
63
+ "max_tokens": 120, "chat_template_kwargs": {"enable_thinking": False},
64
+ })
65
+ r.raise_for_status()
66
+ return r.json()["choices"][0]["message"]["content"]
67
+ return post
68
+
69
+
70
+ def _norm(s: str) -> str:
71
+ return " ".join(s.lower().split())
72
+
73
+
74
+ def _entity_dup_sets(cur, arena: str) -> list[list[dict]]:
75
+ """Group same-(type) entities that are exact normalized-name dupes OR
76
+ share a provenance event with a junk-leaning twin. Returns groups of >=2."""
77
+ cur.execute(
78
+ """SELECT id, entity_type, canonical_name, aliases, provenance_event_ids, disclosure_class
79
+ FROM entities WHERE arena = %s AND disclosure_class <> 'restricted'""",
80
+ (arena,),
81
+ )
82
+ ents = cur.fetchall()
83
+ groups: dict[tuple, list[dict]] = {}
84
+ # 1. exact normalized-name within (type)
85
+ for e in ents:
86
+ key = (e["entity_type"], _norm(e["canonical_name"]))
87
+ groups.setdefault(key, []).append(e)
88
+ exact = [g for g in groups.values() if len(g) > 1]
89
+
90
+ # 2. cross-run shared-provenance: same type + same event in provenance,
91
+ # where some members are junk-leaning (looks-like-id) — catches
92
+ # name-divergent dupes like "1716801984" vs "Katie Cooper" that never
93
+ # block on name.
94
+ #
95
+ # OVER-MERGE GUARD: a single event can legitimately mention several
96
+ # distinct same-type entities (an email naming Alice, Bob, AND a
97
+ # numeric-ID node). Merging the whole co-occurrence group would
98
+ # conflate Alice and Bob. So the no-LLM tier ONLY proposes when the
99
+ # group has EXACTLY ONE non-junk member: we fold the junk node(s) into
100
+ # that unambiguous real master. Groups with 0 or >=2 non-junk members
101
+ # are ambiguous and deferred to the LLM-adjudicated tier
102
+ # (entity_resolution_v2.py) rather than auto-merged.
103
+ by_event_type: dict[tuple, list[dict]] = {}
104
+ for e in ents:
105
+ for ev in (e["provenance_event_ids"] or []):
106
+ by_event_type.setdefault((e["entity_type"], ev), []).append(e)
107
+ cross = []
108
+ seen_ids: set[str] = set()
109
+ for members in by_event_type.values():
110
+ if len(members) < 2:
111
+ continue
112
+ junk = [m for m in members if C.looks_like_id(m["canonical_name"])]
113
+ non_junk = [m for m in members if not C.looks_like_id(m["canonical_name"])]
114
+ if not junk or len(non_junk) != 1:
115
+ continue # need junk to clean AND exactly one unambiguous master
116
+ group = non_junk + junk
117
+ ids = tuple(sorted(m["id"] for m in group))
118
+ if ids in seen_ids:
119
+ continue
120
+ seen_ids.add(ids)
121
+ cross.append(group)
122
+ return exact + cross
123
+
124
+
125
+ def _authority_signals(cur, arena: str, entity_ids: list[str], current_model: str) -> dict:
126
+ """Batch-resolve the canonical-scoring authority signals (#3) for a set of
127
+ entities, from data that actually exists:
128
+ - grounded: the entity's canonical_name appears verbatim in the content
129
+ of at least one of its provenance events (not a hallucinated name).
130
+ - from_current_teacher: at least one provenance event was distilled by
131
+ the CURRENT teacher (distillation_traces.llm_model = current_model) —
132
+ prefer the newer teacher's rendering over a superseded one.
133
+ (in_directory is left False — there's no authoritative directory/contacts
134
+ table in the schema yet; that's a separate data-source decision, noted in
135
+ the RFC. The scorer already supports it for when one lands.)
136
+ Returns {entity_id: {"grounded": bool, "from_current_teacher": bool}}."""
137
+ out = {eid: {"grounded": False, "from_current_teacher": False} for eid in entity_ids}
138
+ if not entity_ids:
139
+ return out
140
+ cur.execute(
141
+ """SELECT e.id, e.canonical_name,
142
+ EXISTS (SELECT 1 FROM events ev
143
+ WHERE ev.id = ANY(e.provenance_event_ids)
144
+ AND position(e.canonical_name in ev.content) > 0) AS grounded,
145
+ EXISTS (SELECT 1 FROM distillation_traces t
146
+ WHERE t.event_id = ANY(e.provenance_event_ids)
147
+ AND t.llm_model = %s) AS cur_teacher
148
+ FROM entities e WHERE e.arena = %s AND e.id = ANY(%s)""",
149
+ (current_model, arena, entity_ids),
150
+ )
151
+ for eid, name, grounded, cur_teacher in cur.fetchall():
152
+ # A numeric-ID-as-person name (e.g. "1716801984") substring-matches any
153
+ # stray digit-run in content (epochs, order/invoice numbers) → would
154
+ # falsely mark junk "grounded" and BOOST its authority. Never credit a
155
+ # looks-like-id name as grounded. (#96 review §2)
156
+ grounded = bool(grounded) and not C.looks_like_id(name)
157
+ out[eid] = {"grounded": grounded, "from_current_teacher": bool(cur_teacher)}
158
+ return out
159
+
160
+
161
+ def _candidates(group: list[dict], signals: dict | None = None) -> list[C.CanonicalCandidate]:
162
+ signals = signals or {}
163
+ return [
164
+ C.CanonicalCandidate(
165
+ entity_id=e["id"],
166
+ canonical_name=e["canonical_name"],
167
+ n_provenance=len(e["provenance_event_ids"] or []),
168
+ aliases=e["aliases"] or [],
169
+ grounded=signals.get(e["id"], {}).get("grounded", False),
170
+ from_current_teacher=signals.get(e["id"], {}).get("from_current_teacher", False),
171
+ )
172
+ for e in group
173
+ ]
174
+
175
+
176
+ def _touching(cur, arena: str, loser_ids: list[str]) -> tuple[list[dict], list[dict]]:
177
+ cur.execute(
178
+ """SELECT id, subject_entity_id, object_entity_id FROM facts
179
+ WHERE arena = %s AND (subject_entity_id = ANY(%s) OR object_entity_id = ANY(%s))""",
180
+ (arena, loser_ids, loser_ids),
181
+ )
182
+ facts = cur.fetchall()
183
+ cur.execute(
184
+ """SELECT id, from_entity_id, to_entity_id, relationship_type, weight, provenance_event_ids
185
+ FROM relationships WHERE arena = %s
186
+ AND (from_entity_id = ANY(%s) OR to_entity_id = ANY(%s)
187
+ OR from_entity_id = ANY(%s) OR to_entity_id = ANY(%s))""",
188
+ (arena, loser_ids, loser_ids, loser_ids, loser_ids),
189
+ )
190
+ rels = cur.fetchall()
191
+ return facts, rels
192
+
193
+
194
+ def _execute_entity_plan(cur, plan) -> None:
195
+ # master row
196
+ cur.execute("UPDATE entities SET aliases = %s, provenance_event_ids = %s, last_seen = NOW() WHERE id = %s",
197
+ (plan.master_aliases, plan.master_provenance, plan.master_id))
198
+ for fid in plan.fact_subject_repoints:
199
+ cur.execute("UPDATE facts SET subject_entity_id = %s WHERE id = %s", (plan.master_id, fid))
200
+ for fid in plan.fact_object_repoints:
201
+ cur.execute("UPDATE facts SET object_entity_id = %s WHERE id = %s", (plan.master_id, fid))
202
+ for rid in plan.rel_endpoint_repoints:
203
+ cur.execute(
204
+ """UPDATE relationships SET
205
+ from_entity_id = CASE WHEN from_entity_id = ANY(%s) THEN %s ELSE from_entity_id END,
206
+ to_entity_id = CASE WHEN to_entity_id = ANY(%s) THEN %s ELSE to_entity_id END
207
+ WHERE id = %s""",
208
+ (plan.deprecated_entity_ids, plan.master_id,
209
+ plan.deprecated_entity_ids, plan.master_id, rid),
210
+ )
211
+ for col in plan.rel_collisions:
212
+ cur.execute("UPDATE relationships SET weight = %s, provenance_event_ids = %s WHERE id = %s",
213
+ (col["summed_weight"], col["provenance"], col["keep"]))
214
+ cur.execute("DELETE FROM relationships WHERE id = %s", (col["drop"],))
215
+ for a in plan.audit_rows:
216
+ cur.execute(
217
+ """INSERT INTO entity_merges (id, arena, canonical_id, deprecated_id,
218
+ deprecated_canonical_name, deprecated_aliases, merge_signal,
219
+ facts_repointed, rollback_payload)
220
+ VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s::jsonb)""",
221
+ ("em_" + uuid.uuid4().hex[:20], a["arena"], a["canonical_id"], a["deprecated_id"],
222
+ a["deprecated_canonical_name"], a["deprecated_aliases"], a["merge_signal"],
223
+ len(plan.fact_subject_repoints) + len(plan.fact_object_repoints),
224
+ json.dumps(a["rollback_payload"], default=str)),
225
+ )
226
+ cur.execute("DELETE FROM entities WHERE id = ANY(%s)", (plan.deprecated_entity_ids,))
227
+
228
+
229
+ def _dedup_master_facts(cur, arena: str, master_id: str) -> int:
230
+ """After repointing facts onto the master, the master can hold several
231
+ facts with the same (subject, predicate, object) but different statements
232
+ (fact id is content_id(arena, statement), so they didn't collapse on
233
+ insert). Fuse each such triple-group via build_fact_merge_plan: keep the
234
+ best, union provenance, delete dups with a fact_merges receipt."""
235
+ cur.execute(
236
+ """SELECT id, predicate, object_entity_id, statement, confidence, provenance_event_ids
237
+ FROM facts
238
+ WHERE arena = %s AND (subject_entity_id = %s OR object_entity_id = %s)""",
239
+ (arena, master_id, master_id),
240
+ )
241
+ rows = cur.fetchall()
242
+ groups: dict[tuple, list[dict]] = {}
243
+ for r in rows:
244
+ # group key uses the master as the subject anchor + predicate + object
245
+ groups.setdefault((master_id, r["predicate"], r["object_entity_id"]), []).append(r)
246
+ deduped = 0
247
+ for dup in groups.values():
248
+ plan = build_fact_merge_plan(arena=arena, dup_facts=dup)
249
+ if not plan:
250
+ continue
251
+ cur.execute("UPDATE facts SET provenance_event_ids = %s WHERE id = %s",
252
+ (plan["master_provenance"], plan["master_id"]))
253
+ for a in plan["audit_rows"]:
254
+ cur.execute(
255
+ """INSERT INTO fact_merges (id, arena, canonical_id, deprecated_id,
256
+ deprecated_statement, merge_signal, provenance_unioned, rollback_payload)
257
+ VALUES (%s,%s,%s,%s,%s,%s,%s,%s::jsonb)""",
258
+ ("fm_" + uuid.uuid4().hex[:20], a["arena"], a["canonical_id"], a["deprecated_id"],
259
+ a["deprecated_statement"], a["merge_signal"], a["provenance_unioned"],
260
+ json.dumps(a["rollback_payload"], default=str)),
261
+ )
262
+ cur.execute("DELETE FROM facts WHERE id = ANY(%s)", (plan["deprecated_ids"],))
263
+ deduped += len(plan["deprecated_ids"])
264
+ return deduped
265
+
266
+
267
+ def _ambiguous_cross_run(cur, arena: str) -> list[dict]:
268
+ """Co-occurrence groups the deterministic tier PUNTED on: a junk-leaning
269
+ node sharing an event with MULTIPLE non-junk candidates (so which real
270
+ entity it belongs to is ambiguous). Returns [{junk, candidates:[...]}] for
271
+ the LLM tier to adjudicate. Restricted disclosure excluded."""
272
+ cur.execute(
273
+ """SELECT id, entity_type, canonical_name, aliases, provenance_event_ids
274
+ FROM entities WHERE arena = %s AND disclosure_class <> 'restricted'""",
275
+ (arena,),
276
+ )
277
+ ents = cur.fetchall()
278
+ by_event_type: dict[tuple, list[dict]] = {}
279
+ for e in ents:
280
+ for ev in (e["provenance_event_ids"] or []):
281
+ by_event_type.setdefault((e["entity_type"], ev), []).append(e)
282
+ out, seen = [], set()
283
+ for members in by_event_type.values():
284
+ junk = [m for m in members if C.looks_like_id(m["canonical_name"])]
285
+ non_junk = [m for m in members if not C.looks_like_id(m["canonical_name"])]
286
+ if not junk or len(non_junk) < 2:
287
+ continue
288
+ for j in junk:
289
+ key = (j["id"], tuple(sorted(c["id"] for c in non_junk)))
290
+ if key in seen:
291
+ continue
292
+ seen.add(key)
293
+ out.append({"junk": j, "candidates": non_junk})
294
+ return out
295
+
296
+
297
+ def _entity_context(cur, arena: str, eid: str) -> list[str]:
298
+ cur.execute(
299
+ "SELECT statement FROM facts WHERE arena=%s AND (subject_entity_id=%s OR object_entity_id=%s) LIMIT 5",
300
+ (arena, eid, eid))
301
+ return [r["statement"] for r in cur.fetchall()]
302
+
303
+
304
+ def _semantic_fact_groups(cur, arena: str) -> list[list[dict]]:
305
+ """Facts sharing (subject_entity_id, predicate) but with DIFFERENT
306
+ statements — candidate same-assertion-different-words pairs for LLM
307
+ adjudication. Exact-triple dupes already collapse elsewhere; this is the
308
+ semantic tier. Restricted disclosure excluded.
309
+
310
+ RECALL SCOPE (#96 review §5): grouping on (subject, predicate) catches only
311
+ same-predicate wording variants ("decided X" vs "decided X, confirmed").
312
+ CROSS-predicate synonyms ("joined Acme" / "works at Acme") are NOT grouped
313
+ here — that needs a predicate-synonym map or subject-level pairwise
314
+ adjudication (O(n²), deferred). adjudicate_facts() itself handles any pair;
315
+ it's the candidate generation that's intentionally narrow to bound LLM calls."""
316
+ cur.execute(
317
+ """SELECT id, subject_entity_id, predicate, object_entity_id, statement,
318
+ confidence, provenance_event_ids
319
+ FROM facts WHERE arena=%s AND disclosure_class <> 'restricted'
320
+ AND subject_entity_id IS NOT NULL""",
321
+ (arena,))
322
+ groups: dict[tuple, list[dict]] = {}
323
+ for f in cur.fetchall():
324
+ groups.setdefault((f["subject_entity_id"], f["predicate"]), []).append(f)
325
+ return [g for g in groups.values()
326
+ if len({_norm(x["statement"]) for x in g}) > 1] # >1 distinct statement
327
+
328
+
329
+ def main() -> int:
330
+ ap = argparse.ArgumentParser()
331
+ ap.add_argument("--arena", required=True)
332
+ ap.add_argument("--pg-dsn", default=os.environ.get("PG_DSN"))
333
+ ap.add_argument("--apply", action="store_true", help="execute merges (default: dry-run)")
334
+ ap.add_argument("--llm-endpoint", default=os.environ.get("PME_V2_LLM_ENDPOINT"),
335
+ help="in-VPC distiller /v1/chat/completions for adjudication "
336
+ "(no egress). Omit to skip the LLM tier (deterministic only).")
337
+ ap.add_argument("--model", default=DEFAULT_MODEL)
338
+ args = ap.parse_args()
339
+ if not args.pg_dsn:
340
+ print("PG_DSN required", file=sys.stderr)
341
+ return 2
342
+ post_fn = _distiller_post_fn(args.llm_endpoint, args.model) if args.llm_endpoint else None
343
+
344
+ proposals = merged = llm_entity_merges = llm_fact_merges = 0
345
+ with psycopg.connect(args.pg_dsn, row_factory=dict_row) as conn:
346
+ with conn.cursor() as cur:
347
+ def do_merge(group, signal_note=""):
348
+ nonlocal proposals, merged
349
+ sig = _authority_signals(cur, args.arena, [e["id"] for e in group], args.model)
350
+ master_c, losers_c = C.pick_master(_candidates(group, sig))
351
+ loser_ids = [l.entity_id for l in losers_c]
352
+ if not loser_ids:
353
+ return 0
354
+ proposals += 1
355
+ by_id = {e["id"]: e for e in group}
356
+ master, losers = by_id[master_c.entity_id], [by_id[i] for i in loser_ids]
357
+ facts, rels = _touching(cur, args.arena, loser_ids)
358
+ plan = build_entity_merge_plan(arena=args.arena, master=master, losers=losers,
359
+ facts=facts, relationships=rels)
360
+ print(f" MERGE{signal_note} → '{master['canonical_name']}' ({master['id']}) "
361
+ f"absorbs {[l['canonical_name'] for l in losers]}")
362
+ if args.apply:
363
+ _execute_entity_plan(cur, plan)
364
+ _dedup_master_facts(cur, args.arena, master["id"])
365
+ conn.commit()
366
+ merged += len(loser_ids)
367
+ return len(loser_ids)
368
+
369
+ # Tier 1 — deterministic (exact-name + unambiguous cross-run)
370
+ for group in _entity_dup_sets(cur, args.arena):
371
+ do_merge(group)
372
+
373
+ # Tier 2 — LLM adjudication via the in-VPC distiller (no egress)
374
+ if post_fn:
375
+ # 2a. ambiguous cross-run: which real entity does the junk match?
376
+ for amb in _ambiguous_cross_run(cur, args.arena):
377
+ j = amb["junk"]
378
+ jctx = _entity_context(cur, args.arena, j["id"])
379
+ for cand in amb["candidates"]:
380
+ v = adjudicate_entities(
381
+ {**j, "context": jctx},
382
+ {**cand, "context": _entity_context(cur, args.arena, cand["id"])},
383
+ post_fn)
384
+ if v["same"]:
385
+ print(f" [llm:{v['reason'][:40]}]", end="")
386
+ llm_entity_merges += do_merge([cand, j], signal_note=" (llm)")
387
+ break
388
+ # 2b. semantic fact fusion: same assertion, different words?
389
+ for fg in _semantic_fact_groups(cur, args.arena):
390
+ fg_sorted = sorted(fg, key=lambda f: (f.get("confidence", 0), f["id"]), reverse=True)
391
+ keep = fg_sorted[0]
392
+ same = [keep]
393
+ for other in fg_sorted[1:]:
394
+ if adjudicate_facts(keep["statement"], other["statement"], post_fn)["same"]:
395
+ same.append(other)
396
+ if len(same) > 1:
397
+ plan = build_fact_merge_plan(arena=args.arena, dup_facts=same)
398
+ print(f" FACT-MERGE (llm) → '{keep['statement'][:50]}' absorbs {len(same)-1}")
399
+ if args.apply and plan:
400
+ cur.execute("UPDATE facts SET provenance_event_ids=%s WHERE id=%s",
401
+ (plan["master_provenance"], plan["master_id"]))
402
+ for a in plan["audit_rows"]:
403
+ cur.execute(
404
+ """INSERT INTO fact_merges (id, arena, canonical_id, deprecated_id,
405
+ deprecated_statement, merge_signal, provenance_unioned, rollback_payload)
406
+ VALUES (%s,%s,%s,%s,%s,'llm_adjudication',%s,%s::jsonb)""",
407
+ ("fm_" + uuid.uuid4().hex[:20], a["arena"], a["canonical_id"],
408
+ a["deprecated_id"], a["deprecated_statement"], a["provenance_unioned"],
409
+ json.dumps(a["rollback_payload"], default=str)))
410
+ cur.execute("DELETE FROM facts WHERE id = ANY(%s)", (plan["deprecated_ids"],))
411
+ conn.commit()
412
+ llm_fact_merges += len(same) - 1
413
+
414
+ run_id = "fdr_" + uuid.uuid4().hex[:20]
415
+ detail = {"proposals": proposals, "merged": merged,
416
+ "llm_entity_merges": llm_entity_merges, "llm_fact_merges": llm_fact_merges,
417
+ "llm_tier": bool(post_fn)}
418
+ cur.execute(
419
+ """INSERT INTO fusion_drive_runs (id, arena, pass_kind, mode, scanned, changed, detail, finished_at)
420
+ VALUES (%s,%s,'fusion',%s,%s,%s,%s::jsonb,NOW())""",
421
+ (run_id, args.arena, "apply" if args.apply else "dry_run",
422
+ proposals, merged + llm_fact_merges, json.dumps(detail)))
423
+ conn.commit()
424
+
425
+ label = "APPLY (reversible via entity_merges/fact_merges)" if args.apply else "DRY-RUN"
426
+ print(f"[fusion-drive:fuse] {label} arena={args.arena}: {proposals} entity proposal(s), "
427
+ f"{merged} entities merged ({llm_entity_merges} via llm), {llm_fact_merges} facts merged via llm. "
428
+ f"LLM tier: {'on (distiller)' if post_fn else 'off'}")
429
+ print(f" ledger: {run_id}")
430
+ return 0
431
+
432
+
433
+ if __name__ == "__main__":
434
+ raise SystemExit(main())