npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.10.11 → 0.10.13 - Mend

@pentatonic-ai/ai-agent-sdk 0.10.11 → 0.10.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/index.cjs +1 -1
package/dist/index.js +1 -1
package/package.json +1 -1
package/packages/memory-engine-v2/org-model/migrations/007_audit_canonical_no_cascade.sql +22 -0
package/packages/memory-engine-v2/scripts/fusion_drive_fuse.py +58 -10
package/packages/memory-engine-v2/scripts/test_dedup_master_facts.py +83 -0

package/dist/index.cjs CHANGED Viewed

@@ -878,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
 }
 // src/telemetry.js
-var VERSION = "0.10.11";
+var VERSION = "0.10.13";
 var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
 function machineId() {
   const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";

package/dist/index.js CHANGED Viewed

@@ -847,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
 }
 // src/telemetry.js
-var VERSION = "0.10.11";
+var VERSION = "0.10.13";
 var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
 function machineId() {
   const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pentatonic-ai/ai-agent-sdk",
-  "version": "0.10.11",
+  "version": "0.10.13",
   "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
   "type": "module",
   "main": "./dist/index.cjs",

package/packages/memory-engine-v2/org-model/migrations/007_audit_canonical_no_cascade.sql ADDED Viewed

@@ -0,0 +1,22 @@
+-- Fusion Drive audit integrity: the merge-audit tables must never lose rows
+-- when the row they reference changes.
+--
+-- 002 (entity_merges) and 006 (fact_merges) declared canonical_id as
+--   REFERENCES entities/facts(id) ON DELETE CASCADE
+-- That destroys audit + rollback history on CHAINED merges. When fact/entity A
+-- is merged into canonical K, a receipt is written with canonical_id = K. If K
+-- is itself later merged away (deleted), the ON DELETE CASCADE deletes A's
+-- receipt too — A stays deleted with no record of why, and no rollback payload.
+-- Observed 2026-06-15: a single arena --apply left 4 facts deleted with no
+-- fact_merges row (their canonical was absorbed into a longer "standing by"
+-- statement, cascading the receipts away).
+--
+-- An append-only audit log must outlive its referents. deprecated_id was always
+-- a plain TEXT column (no FK — the row it names is deleted by definition);
+-- canonical_id should be treated the same. Drop the cascading FK and keep
+-- canonical_id as a plain TEXT column. (We deliberately do NOT re-add a
+-- SET NULL FK: canonical_id must stay populated for forensics even after the
+-- canonical row is gone.)
+ALTER TABLE entity_merges DROP CONSTRAINT IF EXISTS entity_merges_canonical_id_fkey;
+ALTER TABLE fact_merges   DROP CONSTRAINT IF EXISTS fact_merges_canonical_id_fkey;

package/packages/memory-engine-v2/scripts/fusion_drive_fuse.py CHANGED Viewed

@@ -213,22 +213,33 @@ def _execute_entity_plan(cur, plan) -> None:
         cur.execute(
             """INSERT INTO entity_merges (id, arena, canonical_id, deprecated_id,
                  deprecated_canonical_name, deprecated_aliases, merge_signal,
-                 facts_repointed, rollback_payload)
-               VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s::jsonb)""",
+                 facts_repointed, relationships_repointed, merged_by, rollback_payload)
+               VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s::jsonb)""",
             ("em_" + uuid.uuid4().hex[:20], a["arena"], a["canonical_id"], a["deprecated_id"],
              a["deprecated_canonical_name"], a["deprecated_aliases"], a["merge_signal"],
              len(plan.fact_subject_repoints) + len(plan.fact_object_repoints),
+             len(plan.rel_endpoint_repoints), "fusion-drive",
              json.dumps(a["rollback_payload"], default=str)),
         )
     cur.execute("DELETE FROM entities WHERE id = ANY(%s)", (plan.deprecated_entity_ids,))
 def _dedup_master_facts(cur, arena: str, master_id: str) -> int:
-    """After repointing facts onto the master, the master can hold several
-    facts with the same (subject, predicate, object) but different statements
-    (fact id is content_id(arena, statement), so they didn't collapse on
-    insert). Fuse each such triple-group via build_fact_merge_plan: keep the
-    best, union provenance, delete dups with a fact_merges receipt."""
+    """After repointing facts onto the master, collapse facts that are now
+    TRUE duplicates — same (subject, predicate, object) AND the same normalized
+    statement. These exist because the fact id is content_id(arena, statement):
+    two rows with statements differing only in case/whitespace hash to distinct
+    ids and so survived insert-time dedup; once their subject/object entities
+    are unified they are genuinely the same assertion and fuse safely.
+    The statement is PART OF THE KEY on purpose. Grouping on the triple alone is
+    NOT identity: a NULL object with a generic predicate (e.g. subject "said"
+    NULL) buckets together unrelated assertions, and build_fact_merge_plan would
+    keep one and DELETE the rest — destroying distinct facts (it deleted 33% of
+    one arena's facts that way before this fix). Same-triple / different-meaning
+    facts are left untouched here; the LLM semantic tier (_semantic_fact_groups
+    + adjudicate_facts) is the only thing allowed to fuse facts whose statements
+    actually differ, and only on an affirmative same-assertion verdict."""
     cur.execute(
         """SELECT id, predicate, object_entity_id, statement, confidence, provenance_event_ids
            FROM facts
@@ -238,8 +249,12 @@ def _dedup_master_facts(cur, arena: str, master_id: str) -> int:
     rows = cur.fetchall()
     groups: dict[tuple, list[dict]] = {}
     for r in rows:
-        # group key uses the master as the subject anchor + predicate + object
-        groups.setdefault((master_id, r["predicate"], r["object_entity_id"]), []).append(r)
+        # key = master subject anchor + predicate + object + NORMALIZED STATEMENT.
+        # statement in the key => only byte-equal-after-normalization dupes fuse.
+        groups.setdefault(
+            (master_id, r["predicate"], r["object_entity_id"], _norm(r["statement"] or "")),
+            [],
+        ).append(r)
     deduped = 0
     for dup in groups.values():
         plan = build_fact_merge_plan(arena=arena, dup_facts=dup)
@@ -371,6 +386,15 @@ def main() -> int:
                 merged += len(loser_ids)
                 return len(loser_ids)
+            # Audit invariant baseline: every entity/fact deletion MUST leave a
+            # rollback receipt. Capture pre-counts; cross-check after the run.
+            def _counts():
+                return {t: cur.execute(
+                            f"SELECT count(*) AS n FROM {t} WHERE arena=%s",
+                            (args.arena,)).fetchone()["n"]
+                        for t in ("entities", "facts", "entity_merges", "fact_merges")}
+            pre_counts = _counts()
             # Tier 1 — deterministic: exact normalized-name dupes only
             # (case/whitespace variants). Co-occurrence is NOT auto-merged.
             for group in _entity_dup_sets(cur, args.arena):
@@ -419,10 +443,26 @@ def main() -> int:
                             conn.commit()
                         llm_fact_merges += len(same) - 1
+            # Audit invariant: deletions must equal receipts written this run.
+            # A mismatch means some rows were deleted with no rollback receipt —
+            # e.g. a chained-merge cascade eating audit rows (fixed in migration
+            # 007, guarded here so it can never recur silently). Detective, not
+            # preventive (merges commit per-group), but it turns a silent leak
+            # into a loud, recorded failure. Holds trivially for dry-run (0==0).
+            post_counts = _counts()
+            ent_deleted = pre_counts["entities"] - post_counts["entities"]
+            fact_deleted = pre_counts["facts"] - post_counts["facts"]
+            ent_audited = post_counts["entity_merges"] - pre_counts["entity_merges"]
+            fact_audited = post_counts["fact_merges"] - pre_counts["fact_merges"]
+            audit_ok = (ent_deleted == ent_audited) and (fact_deleted == fact_audited)
             run_id = "fdr_" + uuid.uuid4().hex[:20]
             detail = {"proposals": proposals, "merged": merged,
                       "llm_entity_merges": llm_entity_merges, "llm_fact_merges": llm_fact_merges,
-                      "llm_tier": bool(post_fn)}
+                      "llm_tier": bool(post_fn),
+                      "audit": {"ok": audit_ok,
+                                "entities_deleted": ent_deleted, "entities_audited": ent_audited,
+                                "facts_deleted": fact_deleted, "facts_audited": fact_audited}}
             cur.execute(
                 """INSERT INTO fusion_drive_runs (id, arena, pass_kind, mode, scanned, changed, detail, finished_at)
                    VALUES (%s,%s,'fusion',%s,%s,%s,%s::jsonb,NOW())""",
@@ -435,6 +475,14 @@ def main() -> int:
           f"{merged} entities merged ({llm_entity_merges} via llm), {llm_fact_merges} facts merged via llm. "
           f"LLM tier: {'on (distiller)' if post_fn else 'off'}")
     print(f"  ledger: {run_id}")
+    if not audit_ok:
+        print(f"[fusion-drive:fuse] AUDIT INVARIANT VIOLATED arena={args.arena}: "
+              f"entities deleted={ent_deleted} audited={ent_audited}; "
+              f"facts deleted={fact_deleted} audited={fact_audited}. "
+              f"Deletions without a rollback receipt — do NOT trust audit-based "
+              f"rollback for this run; restore from backup if needed.",
+              file=sys.stderr)
+        return 3
     return 0

package/packages/memory-engine-v2/scripts/test_dedup_master_facts.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""Guards _dedup_master_facts against the over-fusion that deleted 33% of an
+arena's facts (2026-06-14): grouping post-merge facts by (subject, predicate,
+object) alone treats a NULL object + generic predicate as identity and deletes
+distinct assertions. The statement must be part of the dedup key so ONLY
+byte-equal-after-normalization duplicates fuse; same-triple/different-meaning
+facts are left for the LLM semantic tier."""
+from __future__ import annotations
+import importlib
+import os
+import sys
+import types
+HERE = os.path.dirname(__file__)
+def _load_fuse(monkeypatch):
+    fake_psycopg = types.ModuleType("psycopg")
+    fake_rows = types.ModuleType("psycopg.rows")
+    fake_rows.dict_row = object()
+    fake_psycopg.rows = fake_rows
+    monkeypatch.setitem(sys.modules, "psycopg", fake_psycopg)
+    monkeypatch.setitem(sys.modules, "psycopg.rows", fake_rows)
+    monkeypatch.syspath_prepend(os.path.join(HERE, "..", "fusion_drive"))
+    monkeypatch.syspath_prepend(HERE)
+    sys.modules.pop("fusion_drive_fuse", None)
+    return importlib.import_module("fusion_drive_fuse")
+class FakeCursor:
+    """Returns preset fact rows on the SELECT; records ids passed to DELETE."""
+    def __init__(self, rows):
+        self._rows = rows
+        self.deleted_ids = []
+    def execute(self, sql, params=None):
+        s = " ".join(sql.split())
+        if s.startswith("DELETE FROM facts WHERE id = ANY"):
+            # params is a 1-tuple holding the id list
+            self.deleted_ids.extend(params[0])
+        # SELECT / UPDATE / INSERT: no-op (fetchall serves the preset rows)
+    def fetchall(self):
+        return self._rows
+def _fact(fid, predicate, obj, statement, conf):
+    return {
+        "id": fid,
+        "predicate": predicate,
+        "object_entity_id": obj,
+        "statement": statement,
+        "confidence": conf,
+        "provenance_event_ids": [f"ev_{fid}"],
+    }
+def test_distinct_statements_same_triple_are_NOT_fused(monkeypatch):
+    fuse = _load_fuse(monkeypatch)
+    rows = [
+        _fact("f1", "said", None, "Standing by", 0.9),
+        _fact("f2", "said", None, "yeah ship it", 0.8),       # distinct meaning
+        _fact("f3", "said", None, "modules/deep-memory is vestigial", 0.7),
+    ]
+    cur = FakeCursor(rows)
+    deleted = fuse._dedup_master_facts(cur, "arena", "m")
+    assert deleted == 0, "must not fuse same-triple facts with different statements"
+    assert cur.deleted_ids == []
+def test_only_normalized_statement_duplicates_fuse(monkeypatch):
+    fuse = _load_fuse(monkeypatch)
+    rows = [
+        _fact("f1", "said", None, "Standing by", 0.9),
+        _fact("f2", "said", None, "standing  by", 0.5),        # same after _norm
+        _fact("f3", "said", None, "something else entirely", 0.7),
+    ]
+    cur = FakeCursor(rows)
+    deleted = fuse._dedup_master_facts(cur, "arena", "m")
+    assert deleted == 1, "the case/whitespace duplicate should fuse"
+    assert cur.deleted_ids == ["f2"], "lower-confidence true-dupe is the one deleted"