@pentatonic-ai/ai-agent-sdk 0.10.11 → 0.10.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -878,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
878
878
  }
879
879
 
880
880
  // src/telemetry.js
881
- var VERSION = "0.10.11";
881
+ var VERSION = "0.10.13";
882
882
  var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
883
883
  function machineId() {
884
884
  const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
package/dist/index.js CHANGED
@@ -847,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
847
847
  }
848
848
 
849
849
  // src/telemetry.js
850
- var VERSION = "0.10.11";
850
+ var VERSION = "0.10.13";
851
851
  var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
852
852
  function machineId() {
853
853
  const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.10.11",
3
+ "version": "0.10.13",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -0,0 +1,22 @@
1
+ -- Fusion Drive audit integrity: the merge-audit tables must never lose rows
2
+ -- when the row they reference changes.
3
+ --
4
+ -- 002 (entity_merges) and 006 (fact_merges) declared canonical_id as
5
+ -- REFERENCES entities/facts(id) ON DELETE CASCADE
6
+ -- That destroys audit + rollback history on CHAINED merges. When fact/entity A
7
+ -- is merged into canonical K, a receipt is written with canonical_id = K. If K
8
+ -- is itself later merged away (deleted), the ON DELETE CASCADE deletes A's
9
+ -- receipt too — A stays deleted with no record of why, and no rollback payload.
10
+ -- Observed 2026-06-15: a single arena --apply left 4 facts deleted with no
11
+ -- fact_merges row (their canonical was absorbed into a longer "standing by"
12
+ -- statement, cascading the receipts away).
13
+ --
14
+ -- An append-only audit log must outlive its referents. deprecated_id was always
15
+ -- a plain TEXT column (no FK — the row it names is deleted by definition);
16
+ -- canonical_id should be treated the same. Drop the cascading FK and keep
17
+ -- canonical_id as a plain TEXT column. (We deliberately do NOT re-add a
18
+ -- SET NULL FK: canonical_id must stay populated for forensics even after the
19
+ -- canonical row is gone.)
20
+
21
+ ALTER TABLE entity_merges DROP CONSTRAINT IF EXISTS entity_merges_canonical_id_fkey;
22
+ ALTER TABLE fact_merges DROP CONSTRAINT IF EXISTS fact_merges_canonical_id_fkey;
@@ -213,22 +213,33 @@ def _execute_entity_plan(cur, plan) -> None:
213
213
  cur.execute(
214
214
  """INSERT INTO entity_merges (id, arena, canonical_id, deprecated_id,
215
215
  deprecated_canonical_name, deprecated_aliases, merge_signal,
216
- facts_repointed, rollback_payload)
217
- VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s::jsonb)""",
216
+ facts_repointed, relationships_repointed, merged_by, rollback_payload)
217
+ VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s::jsonb)""",
218
218
  ("em_" + uuid.uuid4().hex[:20], a["arena"], a["canonical_id"], a["deprecated_id"],
219
219
  a["deprecated_canonical_name"], a["deprecated_aliases"], a["merge_signal"],
220
220
  len(plan.fact_subject_repoints) + len(plan.fact_object_repoints),
221
+ len(plan.rel_endpoint_repoints), "fusion-drive",
221
222
  json.dumps(a["rollback_payload"], default=str)),
222
223
  )
223
224
  cur.execute("DELETE FROM entities WHERE id = ANY(%s)", (plan.deprecated_entity_ids,))
224
225
 
225
226
 
226
227
  def _dedup_master_facts(cur, arena: str, master_id: str) -> int:
227
- """After repointing facts onto the master, the master can hold several
228
- facts with the same (subject, predicate, object) but different statements
229
- (fact id is content_id(arena, statement), so they didn't collapse on
230
- insert). Fuse each such triple-group via build_fact_merge_plan: keep the
231
- best, union provenance, delete dups with a fact_merges receipt."""
228
+ """After repointing facts onto the master, collapse facts that are now
229
+ TRUE duplicates same (subject, predicate, object) AND the same normalized
230
+ statement. These exist because the fact id is content_id(arena, statement):
231
+ two rows with statements differing only in case/whitespace hash to distinct
232
+ ids and so survived insert-time dedup; once their subject/object entities
233
+ are unified they are genuinely the same assertion and fuse safely.
234
+
235
+ The statement is PART OF THE KEY on purpose. Grouping on the triple alone is
236
+ NOT identity: a NULL object with a generic predicate (e.g. subject "said"
237
+ NULL) buckets together unrelated assertions, and build_fact_merge_plan would
238
+ keep one and DELETE the rest — destroying distinct facts (it deleted 33% of
239
+ one arena's facts that way before this fix). Same-triple / different-meaning
240
+ facts are left untouched here; the LLM semantic tier (_semantic_fact_groups
241
+ + adjudicate_facts) is the only thing allowed to fuse facts whose statements
242
+ actually differ, and only on an affirmative same-assertion verdict."""
232
243
  cur.execute(
233
244
  """SELECT id, predicate, object_entity_id, statement, confidence, provenance_event_ids
234
245
  FROM facts
@@ -238,8 +249,12 @@ def _dedup_master_facts(cur, arena: str, master_id: str) -> int:
238
249
  rows = cur.fetchall()
239
250
  groups: dict[tuple, list[dict]] = {}
240
251
  for r in rows:
241
- # group key uses the master as the subject anchor + predicate + object
242
- groups.setdefault((master_id, r["predicate"], r["object_entity_id"]), []).append(r)
252
+ # key = master subject anchor + predicate + object + NORMALIZED STATEMENT.
253
+ # statement in the key => only byte-equal-after-normalization dupes fuse.
254
+ groups.setdefault(
255
+ (master_id, r["predicate"], r["object_entity_id"], _norm(r["statement"] or "")),
256
+ [],
257
+ ).append(r)
243
258
  deduped = 0
244
259
  for dup in groups.values():
245
260
  plan = build_fact_merge_plan(arena=arena, dup_facts=dup)
@@ -371,6 +386,15 @@ def main() -> int:
371
386
  merged += len(loser_ids)
372
387
  return len(loser_ids)
373
388
 
389
+ # Audit invariant baseline: every entity/fact deletion MUST leave a
390
+ # rollback receipt. Capture pre-counts; cross-check after the run.
391
+ def _counts():
392
+ return {t: cur.execute(
393
+ f"SELECT count(*) AS n FROM {t} WHERE arena=%s",
394
+ (args.arena,)).fetchone()["n"]
395
+ for t in ("entities", "facts", "entity_merges", "fact_merges")}
396
+ pre_counts = _counts()
397
+
374
398
  # Tier 1 — deterministic: exact normalized-name dupes only
375
399
  # (case/whitespace variants). Co-occurrence is NOT auto-merged.
376
400
  for group in _entity_dup_sets(cur, args.arena):
@@ -419,10 +443,26 @@ def main() -> int:
419
443
  conn.commit()
420
444
  llm_fact_merges += len(same) - 1
421
445
 
446
+ # Audit invariant: deletions must equal receipts written this run.
447
+ # A mismatch means some rows were deleted with no rollback receipt —
448
+ # e.g. a chained-merge cascade eating audit rows (fixed in migration
449
+ # 007, guarded here so it can never recur silently). Detective, not
450
+ # preventive (merges commit per-group), but it turns a silent leak
451
+ # into a loud, recorded failure. Holds trivially for dry-run (0==0).
452
+ post_counts = _counts()
453
+ ent_deleted = pre_counts["entities"] - post_counts["entities"]
454
+ fact_deleted = pre_counts["facts"] - post_counts["facts"]
455
+ ent_audited = post_counts["entity_merges"] - pre_counts["entity_merges"]
456
+ fact_audited = post_counts["fact_merges"] - pre_counts["fact_merges"]
457
+ audit_ok = (ent_deleted == ent_audited) and (fact_deleted == fact_audited)
458
+
422
459
  run_id = "fdr_" + uuid.uuid4().hex[:20]
423
460
  detail = {"proposals": proposals, "merged": merged,
424
461
  "llm_entity_merges": llm_entity_merges, "llm_fact_merges": llm_fact_merges,
425
- "llm_tier": bool(post_fn)}
462
+ "llm_tier": bool(post_fn),
463
+ "audit": {"ok": audit_ok,
464
+ "entities_deleted": ent_deleted, "entities_audited": ent_audited,
465
+ "facts_deleted": fact_deleted, "facts_audited": fact_audited}}
426
466
  cur.execute(
427
467
  """INSERT INTO fusion_drive_runs (id, arena, pass_kind, mode, scanned, changed, detail, finished_at)
428
468
  VALUES (%s,%s,'fusion',%s,%s,%s,%s::jsonb,NOW())""",
@@ -435,6 +475,14 @@ def main() -> int:
435
475
  f"{merged} entities merged ({llm_entity_merges} via llm), {llm_fact_merges} facts merged via llm. "
436
476
  f"LLM tier: {'on (distiller)' if post_fn else 'off'}")
437
477
  print(f" ledger: {run_id}")
478
+ if not audit_ok:
479
+ print(f"[fusion-drive:fuse] AUDIT INVARIANT VIOLATED arena={args.arena}: "
480
+ f"entities deleted={ent_deleted} audited={ent_audited}; "
481
+ f"facts deleted={fact_deleted} audited={fact_audited}. "
482
+ f"Deletions without a rollback receipt — do NOT trust audit-based "
483
+ f"rollback for this run; restore from backup if needed.",
484
+ file=sys.stderr)
485
+ return 3
438
486
  return 0
439
487
 
440
488
 
@@ -0,0 +1,83 @@
1
+ """Guards _dedup_master_facts against the over-fusion that deleted 33% of an
2
+ arena's facts (2026-06-14): grouping post-merge facts by (subject, predicate,
3
+ object) alone treats a NULL object + generic predicate as identity and deletes
4
+ distinct assertions. The statement must be part of the dedup key so ONLY
5
+ byte-equal-after-normalization duplicates fuse; same-triple/different-meaning
6
+ facts are left for the LLM semantic tier."""
7
+
8
+ from __future__ import annotations
9
+
10
+ import importlib
11
+ import os
12
+ import sys
13
+ import types
14
+
15
+ HERE = os.path.dirname(__file__)
16
+
17
+
18
+ def _load_fuse(monkeypatch):
19
+ fake_psycopg = types.ModuleType("psycopg")
20
+ fake_rows = types.ModuleType("psycopg.rows")
21
+ fake_rows.dict_row = object()
22
+ fake_psycopg.rows = fake_rows
23
+ monkeypatch.setitem(sys.modules, "psycopg", fake_psycopg)
24
+ monkeypatch.setitem(sys.modules, "psycopg.rows", fake_rows)
25
+ monkeypatch.syspath_prepend(os.path.join(HERE, "..", "fusion_drive"))
26
+ monkeypatch.syspath_prepend(HERE)
27
+ sys.modules.pop("fusion_drive_fuse", None)
28
+ return importlib.import_module("fusion_drive_fuse")
29
+
30
+
31
+ class FakeCursor:
32
+ """Returns preset fact rows on the SELECT; records ids passed to DELETE."""
33
+
34
+ def __init__(self, rows):
35
+ self._rows = rows
36
+ self.deleted_ids = []
37
+
38
+ def execute(self, sql, params=None):
39
+ s = " ".join(sql.split())
40
+ if s.startswith("DELETE FROM facts WHERE id = ANY"):
41
+ # params is a 1-tuple holding the id list
42
+ self.deleted_ids.extend(params[0])
43
+ # SELECT / UPDATE / INSERT: no-op (fetchall serves the preset rows)
44
+
45
+ def fetchall(self):
46
+ return self._rows
47
+
48
+
49
+ def _fact(fid, predicate, obj, statement, conf):
50
+ return {
51
+ "id": fid,
52
+ "predicate": predicate,
53
+ "object_entity_id": obj,
54
+ "statement": statement,
55
+ "confidence": conf,
56
+ "provenance_event_ids": [f"ev_{fid}"],
57
+ }
58
+
59
+
60
+ def test_distinct_statements_same_triple_are_NOT_fused(monkeypatch):
61
+ fuse = _load_fuse(monkeypatch)
62
+ rows = [
63
+ _fact("f1", "said", None, "Standing by", 0.9),
64
+ _fact("f2", "said", None, "yeah ship it", 0.8), # distinct meaning
65
+ _fact("f3", "said", None, "modules/deep-memory is vestigial", 0.7),
66
+ ]
67
+ cur = FakeCursor(rows)
68
+ deleted = fuse._dedup_master_facts(cur, "arena", "m")
69
+ assert deleted == 0, "must not fuse same-triple facts with different statements"
70
+ assert cur.deleted_ids == []
71
+
72
+
73
+ def test_only_normalized_statement_duplicates_fuse(monkeypatch):
74
+ fuse = _load_fuse(monkeypatch)
75
+ rows = [
76
+ _fact("f1", "said", None, "Standing by", 0.9),
77
+ _fact("f2", "said", None, "standing by", 0.5), # same after _norm
78
+ _fact("f3", "said", None, "something else entirely", 0.7),
79
+ ]
80
+ cur = FakeCursor(rows)
81
+ deleted = fuse._dedup_master_facts(cur, "arena", "m")
82
+ assert deleted == 1, "the case/whitespace duplicate should fuse"
83
+ assert cur.deleted_ids == ["f2"], "lower-confidence true-dupe is the one deleted"