@pentatonic-ai/ai-agent-sdk 0.10.11 → 0.10.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/package.json +1 -1
- package/packages/memory-engine-v2/org-model/migrations/007_audit_canonical_no_cascade.sql +22 -0
- package/packages/memory-engine-v2/scripts/fusion_drive_fuse.py +58 -10
- package/packages/memory-engine-v2/scripts/test_dedup_master_facts.py +83 -0
package/dist/index.cjs
CHANGED
|
@@ -878,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
878
878
|
}
|
|
879
879
|
|
|
880
880
|
// src/telemetry.js
|
|
881
|
-
var VERSION = "0.10.
|
|
881
|
+
var VERSION = "0.10.13";
|
|
882
882
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
883
883
|
function machineId() {
|
|
884
884
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/dist/index.js
CHANGED
|
@@ -847,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
847
847
|
}
|
|
848
848
|
|
|
849
849
|
// src/telemetry.js
|
|
850
|
-
var VERSION = "0.10.
|
|
850
|
+
var VERSION = "0.10.13";
|
|
851
851
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
852
852
|
function machineId() {
|
|
853
853
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.10.
|
|
3
|
+
"version": "0.10.13",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
-- Fusion Drive audit integrity: the merge-audit tables must never lose rows
|
|
2
|
+
-- when the row they reference changes.
|
|
3
|
+
--
|
|
4
|
+
-- 002 (entity_merges) and 006 (fact_merges) declared canonical_id as
|
|
5
|
+
-- REFERENCES entities/facts(id) ON DELETE CASCADE
|
|
6
|
+
-- That destroys audit + rollback history on CHAINED merges. When fact/entity A
|
|
7
|
+
-- is merged into canonical K, a receipt is written with canonical_id = K. If K
|
|
8
|
+
-- is itself later merged away (deleted), the ON DELETE CASCADE deletes A's
|
|
9
|
+
-- receipt too — A stays deleted with no record of why, and no rollback payload.
|
|
10
|
+
-- Observed 2026-06-15: a single arena --apply left 4 facts deleted with no
|
|
11
|
+
-- fact_merges row (their canonical was absorbed into a longer "standing by"
|
|
12
|
+
-- statement, cascading the receipts away).
|
|
13
|
+
--
|
|
14
|
+
-- An append-only audit log must outlive its referents. deprecated_id was always
|
|
15
|
+
-- a plain TEXT column (no FK — the row it names is deleted by definition);
|
|
16
|
+
-- canonical_id should be treated the same. Drop the cascading FK and keep
|
|
17
|
+
-- canonical_id as a plain TEXT column. (We deliberately do NOT re-add a
|
|
18
|
+
-- SET NULL FK: canonical_id must stay populated for forensics even after the
|
|
19
|
+
-- canonical row is gone.)
|
|
20
|
+
|
|
21
|
+
ALTER TABLE entity_merges DROP CONSTRAINT IF EXISTS entity_merges_canonical_id_fkey;
|
|
22
|
+
ALTER TABLE fact_merges DROP CONSTRAINT IF EXISTS fact_merges_canonical_id_fkey;
|
|
@@ -213,22 +213,33 @@ def _execute_entity_plan(cur, plan) -> None:
|
|
|
213
213
|
cur.execute(
|
|
214
214
|
"""INSERT INTO entity_merges (id, arena, canonical_id, deprecated_id,
|
|
215
215
|
deprecated_canonical_name, deprecated_aliases, merge_signal,
|
|
216
|
-
facts_repointed, rollback_payload)
|
|
217
|
-
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s::jsonb)""",
|
|
216
|
+
facts_repointed, relationships_repointed, merged_by, rollback_payload)
|
|
217
|
+
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s::jsonb)""",
|
|
218
218
|
("em_" + uuid.uuid4().hex[:20], a["arena"], a["canonical_id"], a["deprecated_id"],
|
|
219
219
|
a["deprecated_canonical_name"], a["deprecated_aliases"], a["merge_signal"],
|
|
220
220
|
len(plan.fact_subject_repoints) + len(plan.fact_object_repoints),
|
|
221
|
+
len(plan.rel_endpoint_repoints), "fusion-drive",
|
|
221
222
|
json.dumps(a["rollback_payload"], default=str)),
|
|
222
223
|
)
|
|
223
224
|
cur.execute("DELETE FROM entities WHERE id = ANY(%s)", (plan.deprecated_entity_ids,))
|
|
224
225
|
|
|
225
226
|
|
|
226
227
|
def _dedup_master_facts(cur, arena: str, master_id: str) -> int:
|
|
227
|
-
"""After repointing facts onto the master,
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
228
|
+
"""After repointing facts onto the master, collapse facts that are now
|
|
229
|
+
TRUE duplicates — same (subject, predicate, object) AND the same normalized
|
|
230
|
+
statement. These exist because the fact id is content_id(arena, statement):
|
|
231
|
+
two rows with statements differing only in case/whitespace hash to distinct
|
|
232
|
+
ids and so survived insert-time dedup; once their subject/object entities
|
|
233
|
+
are unified they are genuinely the same assertion and fuse safely.
|
|
234
|
+
|
|
235
|
+
The statement is PART OF THE KEY on purpose. Grouping on the triple alone is
|
|
236
|
+
NOT identity: a NULL object with a generic predicate (e.g. subject "said"
|
|
237
|
+
NULL) buckets together unrelated assertions, and build_fact_merge_plan would
|
|
238
|
+
keep one and DELETE the rest — destroying distinct facts (it deleted 33% of
|
|
239
|
+
one arena's facts that way before this fix). Same-triple / different-meaning
|
|
240
|
+
facts are left untouched here; the LLM semantic tier (_semantic_fact_groups
|
|
241
|
+
+ adjudicate_facts) is the only thing allowed to fuse facts whose statements
|
|
242
|
+
actually differ, and only on an affirmative same-assertion verdict."""
|
|
232
243
|
cur.execute(
|
|
233
244
|
"""SELECT id, predicate, object_entity_id, statement, confidence, provenance_event_ids
|
|
234
245
|
FROM facts
|
|
@@ -238,8 +249,12 @@ def _dedup_master_facts(cur, arena: str, master_id: str) -> int:
|
|
|
238
249
|
rows = cur.fetchall()
|
|
239
250
|
groups: dict[tuple, list[dict]] = {}
|
|
240
251
|
for r in rows:
|
|
241
|
-
#
|
|
242
|
-
|
|
252
|
+
# key = master subject anchor + predicate + object + NORMALIZED STATEMENT.
|
|
253
|
+
# statement in the key => only byte-equal-after-normalization dupes fuse.
|
|
254
|
+
groups.setdefault(
|
|
255
|
+
(master_id, r["predicate"], r["object_entity_id"], _norm(r["statement"] or "")),
|
|
256
|
+
[],
|
|
257
|
+
).append(r)
|
|
243
258
|
deduped = 0
|
|
244
259
|
for dup in groups.values():
|
|
245
260
|
plan = build_fact_merge_plan(arena=arena, dup_facts=dup)
|
|
@@ -371,6 +386,15 @@ def main() -> int:
|
|
|
371
386
|
merged += len(loser_ids)
|
|
372
387
|
return len(loser_ids)
|
|
373
388
|
|
|
389
|
+
# Audit invariant baseline: every entity/fact deletion MUST leave a
|
|
390
|
+
# rollback receipt. Capture pre-counts; cross-check after the run.
|
|
391
|
+
def _counts():
|
|
392
|
+
return {t: cur.execute(
|
|
393
|
+
f"SELECT count(*) AS n FROM {t} WHERE arena=%s",
|
|
394
|
+
(args.arena,)).fetchone()["n"]
|
|
395
|
+
for t in ("entities", "facts", "entity_merges", "fact_merges")}
|
|
396
|
+
pre_counts = _counts()
|
|
397
|
+
|
|
374
398
|
# Tier 1 — deterministic: exact normalized-name dupes only
|
|
375
399
|
# (case/whitespace variants). Co-occurrence is NOT auto-merged.
|
|
376
400
|
for group in _entity_dup_sets(cur, args.arena):
|
|
@@ -419,10 +443,26 @@ def main() -> int:
|
|
|
419
443
|
conn.commit()
|
|
420
444
|
llm_fact_merges += len(same) - 1
|
|
421
445
|
|
|
446
|
+
# Audit invariant: deletions must equal receipts written this run.
|
|
447
|
+
# A mismatch means some rows were deleted with no rollback receipt —
|
|
448
|
+
# e.g. a chained-merge cascade eating audit rows (fixed in migration
|
|
449
|
+
# 007, guarded here so it can never recur silently). Detective, not
|
|
450
|
+
# preventive (merges commit per-group), but it turns a silent leak
|
|
451
|
+
# into a loud, recorded failure. Holds trivially for dry-run (0==0).
|
|
452
|
+
post_counts = _counts()
|
|
453
|
+
ent_deleted = pre_counts["entities"] - post_counts["entities"]
|
|
454
|
+
fact_deleted = pre_counts["facts"] - post_counts["facts"]
|
|
455
|
+
ent_audited = post_counts["entity_merges"] - pre_counts["entity_merges"]
|
|
456
|
+
fact_audited = post_counts["fact_merges"] - pre_counts["fact_merges"]
|
|
457
|
+
audit_ok = (ent_deleted == ent_audited) and (fact_deleted == fact_audited)
|
|
458
|
+
|
|
422
459
|
run_id = "fdr_" + uuid.uuid4().hex[:20]
|
|
423
460
|
detail = {"proposals": proposals, "merged": merged,
|
|
424
461
|
"llm_entity_merges": llm_entity_merges, "llm_fact_merges": llm_fact_merges,
|
|
425
|
-
"llm_tier": bool(post_fn)
|
|
462
|
+
"llm_tier": bool(post_fn),
|
|
463
|
+
"audit": {"ok": audit_ok,
|
|
464
|
+
"entities_deleted": ent_deleted, "entities_audited": ent_audited,
|
|
465
|
+
"facts_deleted": fact_deleted, "facts_audited": fact_audited}}
|
|
426
466
|
cur.execute(
|
|
427
467
|
"""INSERT INTO fusion_drive_runs (id, arena, pass_kind, mode, scanned, changed, detail, finished_at)
|
|
428
468
|
VALUES (%s,%s,'fusion',%s,%s,%s,%s::jsonb,NOW())""",
|
|
@@ -435,6 +475,14 @@ def main() -> int:
|
|
|
435
475
|
f"{merged} entities merged ({llm_entity_merges} via llm), {llm_fact_merges} facts merged via llm. "
|
|
436
476
|
f"LLM tier: {'on (distiller)' if post_fn else 'off'}")
|
|
437
477
|
print(f" ledger: {run_id}")
|
|
478
|
+
if not audit_ok:
|
|
479
|
+
print(f"[fusion-drive:fuse] AUDIT INVARIANT VIOLATED arena={args.arena}: "
|
|
480
|
+
f"entities deleted={ent_deleted} audited={ent_audited}; "
|
|
481
|
+
f"facts deleted={fact_deleted} audited={fact_audited}. "
|
|
482
|
+
f"Deletions without a rollback receipt — do NOT trust audit-based "
|
|
483
|
+
f"rollback for this run; restore from backup if needed.",
|
|
484
|
+
file=sys.stderr)
|
|
485
|
+
return 3
|
|
438
486
|
return 0
|
|
439
487
|
|
|
440
488
|
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Guards _dedup_master_facts against the over-fusion that deleted 33% of an
|
|
2
|
+
arena's facts (2026-06-14): grouping post-merge facts by (subject, predicate,
|
|
3
|
+
object) alone treats a NULL object + generic predicate as identity and deletes
|
|
4
|
+
distinct assertions. The statement must be part of the dedup key so ONLY
|
|
5
|
+
byte-equal-after-normalization duplicates fuse; same-triple/different-meaning
|
|
6
|
+
facts are left for the LLM semantic tier."""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import importlib
|
|
11
|
+
import os
|
|
12
|
+
import sys
|
|
13
|
+
import types
|
|
14
|
+
|
|
15
|
+
HERE = os.path.dirname(__file__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _load_fuse(monkeypatch):
|
|
19
|
+
fake_psycopg = types.ModuleType("psycopg")
|
|
20
|
+
fake_rows = types.ModuleType("psycopg.rows")
|
|
21
|
+
fake_rows.dict_row = object()
|
|
22
|
+
fake_psycopg.rows = fake_rows
|
|
23
|
+
monkeypatch.setitem(sys.modules, "psycopg", fake_psycopg)
|
|
24
|
+
monkeypatch.setitem(sys.modules, "psycopg.rows", fake_rows)
|
|
25
|
+
monkeypatch.syspath_prepend(os.path.join(HERE, "..", "fusion_drive"))
|
|
26
|
+
monkeypatch.syspath_prepend(HERE)
|
|
27
|
+
sys.modules.pop("fusion_drive_fuse", None)
|
|
28
|
+
return importlib.import_module("fusion_drive_fuse")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class FakeCursor:
|
|
32
|
+
"""Returns preset fact rows on the SELECT; records ids passed to DELETE."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, rows):
|
|
35
|
+
self._rows = rows
|
|
36
|
+
self.deleted_ids = []
|
|
37
|
+
|
|
38
|
+
def execute(self, sql, params=None):
|
|
39
|
+
s = " ".join(sql.split())
|
|
40
|
+
if s.startswith("DELETE FROM facts WHERE id = ANY"):
|
|
41
|
+
# params is a 1-tuple holding the id list
|
|
42
|
+
self.deleted_ids.extend(params[0])
|
|
43
|
+
# SELECT / UPDATE / INSERT: no-op (fetchall serves the preset rows)
|
|
44
|
+
|
|
45
|
+
def fetchall(self):
|
|
46
|
+
return self._rows
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _fact(fid, predicate, obj, statement, conf):
|
|
50
|
+
return {
|
|
51
|
+
"id": fid,
|
|
52
|
+
"predicate": predicate,
|
|
53
|
+
"object_entity_id": obj,
|
|
54
|
+
"statement": statement,
|
|
55
|
+
"confidence": conf,
|
|
56
|
+
"provenance_event_ids": [f"ev_{fid}"],
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_distinct_statements_same_triple_are_NOT_fused(monkeypatch):
|
|
61
|
+
fuse = _load_fuse(monkeypatch)
|
|
62
|
+
rows = [
|
|
63
|
+
_fact("f1", "said", None, "Standing by", 0.9),
|
|
64
|
+
_fact("f2", "said", None, "yeah ship it", 0.8), # distinct meaning
|
|
65
|
+
_fact("f3", "said", None, "modules/deep-memory is vestigial", 0.7),
|
|
66
|
+
]
|
|
67
|
+
cur = FakeCursor(rows)
|
|
68
|
+
deleted = fuse._dedup_master_facts(cur, "arena", "m")
|
|
69
|
+
assert deleted == 0, "must not fuse same-triple facts with different statements"
|
|
70
|
+
assert cur.deleted_ids == []
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_only_normalized_statement_duplicates_fuse(monkeypatch):
|
|
74
|
+
fuse = _load_fuse(monkeypatch)
|
|
75
|
+
rows = [
|
|
76
|
+
_fact("f1", "said", None, "Standing by", 0.9),
|
|
77
|
+
_fact("f2", "said", None, "standing by", 0.5), # same after _norm
|
|
78
|
+
_fact("f3", "said", None, "something else entirely", 0.7),
|
|
79
|
+
]
|
|
80
|
+
cur = FakeCursor(rows)
|
|
81
|
+
deleted = fuse._dedup_master_facts(cur, "arena", "m")
|
|
82
|
+
assert deleted == 1, "the case/whitespace duplicate should fuse"
|
|
83
|
+
assert cur.deleted_ids == ["f2"], "lower-confidence true-dupe is the one deleted"
|