@pentatonic-ai/ai-agent-sdk 0.10.7 → 0.10.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/package.json +1 -1
- package/packages/memory-engine-v2/RFC-decay-and-fusion.md +185 -0
- package/packages/memory-engine-v2/RFC-fusion-drive.md +193 -0
- package/packages/memory-engine-v2/extractor-async/confidence.py +37 -0
- package/packages/memory-engine-v2/extractor-async/test_born_salience_parity.py +35 -0
- package/packages/memory-engine-v2/extractor-async/worker.py +36 -6
- package/packages/memory-engine-v2/fusion_drive/__init__.py +0 -0
- package/packages/memory-engine-v2/fusion_drive/canonical.py +94 -0
- package/packages/memory-engine-v2/fusion_drive/conftest.py +8 -0
- package/packages/memory-engine-v2/fusion_drive/merge.py +178 -0
- package/packages/memory-engine-v2/fusion_drive/salience.py +118 -0
- package/packages/memory-engine-v2/fusion_drive/test_canonical.py +76 -0
- package/packages/memory-engine-v2/fusion_drive/test_merge.py +112 -0
- package/packages/memory-engine-v2/fusion_drive/test_salience.py +93 -0
- package/packages/memory-engine-v2/org-model/migrations/006_fusion_drive.sql +80 -0
- package/packages/memory-engine-v2/scripts/fusion_drive_born_salience_backfill.py +113 -0
- package/packages/memory-engine-v2/scripts/fusion_drive_decay.py +181 -0
- package/packages/memory-engine-v2/scripts/fusion_drive_fuse.py +264 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Unit tests for Fusion Drive salience scoring + decay (pure, no DB)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import salience as S
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestBornSalience:
|
|
9
|
+
def test_base_for_single_uncorroborated_clean_node(self):
|
|
10
|
+
assert S.born_salience(n_sources=1) == S.BASE_SALIENCE
|
|
11
|
+
|
|
12
|
+
def test_corroboration_raises_but_caps(self):
|
|
13
|
+
assert S.born_salience(n_sources=2) > S.BASE_SALIENCE
|
|
14
|
+
capped = S.born_salience(n_sources=100)
|
|
15
|
+
assert capped == round(S.BASE_SALIENCE + S.CORROB_CAP, 4)
|
|
16
|
+
|
|
17
|
+
def test_junk_is_born_below_decay_sweep_threshold(self):
|
|
18
|
+
# Any single hard-junk flag must sink it under the 0.3 sweep line
|
|
19
|
+
# so decay targets pollution without needing a fusion match.
|
|
20
|
+
for flag in ("noise_name", "numeric_id_person", "hallucinated_email"):
|
|
21
|
+
assert S.born_salience(n_sources=1, quality_flags=[flag]) < 0.3, flag
|
|
22
|
+
|
|
23
|
+
def test_combined_flags_drive_to_floor(self):
|
|
24
|
+
s = S.born_salience(
|
|
25
|
+
n_sources=1, quality_flags=["numeric_id_person", "hallucinated_email", "ungrounded"]
|
|
26
|
+
)
|
|
27
|
+
assert s == S.SALIENCE_FLOOR
|
|
28
|
+
|
|
29
|
+
def test_corroboration_cannot_rescue_hard_junk(self):
|
|
30
|
+
# A heavily-corroborated numeric-ID person is still junk-leaning.
|
|
31
|
+
s = S.born_salience(n_sources=5, quality_flags=["numeric_id_person"])
|
|
32
|
+
assert s < S.BASE_SALIENCE
|
|
33
|
+
|
|
34
|
+
def test_never_exceeds_ceiling_or_below_floor(self):
|
|
35
|
+
assert S.born_salience(n_sources=1000) <= S.SALIENCE_CEIL
|
|
36
|
+
assert S.born_salience(n_sources=1, quality_flags=["noise_name"] * 10) == S.SALIENCE_FLOOR
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class TestHalfLife:
|
|
40
|
+
def test_durable_categories_barely_decay(self):
|
|
41
|
+
assert S.half_life_days("fact", "decision") >= 3650
|
|
42
|
+
assert S.half_life_days("fact", "commitment") >= 3650
|
|
43
|
+
|
|
44
|
+
def test_ephemeral_categories_fade_fast(self):
|
|
45
|
+
assert S.half_life_days("fact", "mention") <= 30
|
|
46
|
+
assert S.half_life_days("fact", "observation") <= 30
|
|
47
|
+
|
|
48
|
+
def test_unknown_category_uses_default(self):
|
|
49
|
+
assert S.half_life_days("fact", "made_up") == S.FACT_HALF_LIFE_DEFAULT
|
|
50
|
+
|
|
51
|
+
def test_entity_and_relationship_kinds(self):
|
|
52
|
+
assert S.half_life_days("entity") == S.ENTITY_HALF_LIFE_DAYS
|
|
53
|
+
assert S.half_life_days("relationship") == S.RELATIONSHIP_HALF_LIFE_DAYS
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class TestDecay:
|
|
57
|
+
def test_one_half_life_halves_salience(self):
|
|
58
|
+
assert S.decayed_salience(0.8, age_days=30, hl_days=30) == round(0.4, 4)
|
|
59
|
+
|
|
60
|
+
def test_two_half_lives_quarters(self):
|
|
61
|
+
assert S.decayed_salience(0.8, age_days=60, hl_days=30) == round(0.2, 4)
|
|
62
|
+
|
|
63
|
+
def test_no_age_no_decay(self):
|
|
64
|
+
assert S.decayed_salience(0.6, age_days=0, hl_days=30) == 0.6
|
|
65
|
+
|
|
66
|
+
def test_durable_fact_after_a_year_barely_moves(self):
|
|
67
|
+
hl = S.half_life_days("fact", "decision")
|
|
68
|
+
assert S.decayed_salience(0.7, age_days=365, hl_days=hl) > 0.65
|
|
69
|
+
|
|
70
|
+
def test_mention_after_three_months_is_near_floor(self):
|
|
71
|
+
hl = S.half_life_days("fact", "mention") # 30d
|
|
72
|
+
assert S.decayed_salience(0.5, age_days=90, hl_days=hl) < 0.1
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class TestEvictable:
|
|
76
|
+
def test_low_salience_old_unreferenced_is_evictable(self):
|
|
77
|
+
assert S.is_evictable(current_salience=0.02, age_days=60, referenced_by_live_node=False)
|
|
78
|
+
|
|
79
|
+
def test_referenced_node_survives(self):
|
|
80
|
+
assert not S.is_evictable(current_salience=0.0, age_days=999, referenced_by_live_node=True)
|
|
81
|
+
|
|
82
|
+
def test_restricted_never_auto_evicted(self):
|
|
83
|
+
assert not S.is_evictable(
|
|
84
|
+
current_salience=0.0, age_days=999, referenced_by_live_node=False,
|
|
85
|
+
disclosure_class="restricted",
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def test_recent_low_salience_not_yet_evictable(self):
|
|
89
|
+
# Below threshold but too young — grace window protects it.
|
|
90
|
+
assert not S.is_evictable(current_salience=0.01, age_days=5, referenced_by_live_node=False)
|
|
91
|
+
|
|
92
|
+
def test_healthy_salience_never_evictable(self):
|
|
93
|
+
assert not S.is_evictable(current_salience=0.5, age_days=9999, referenced_by_live_node=False)
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
-- 006: Fusion Drive foundations — salience + audit/ledger tables.
|
|
2
|
+
--
|
|
3
|
+
-- See RFC-fusion-drive.md. This migration is Phase 1 (scoring only, NO
|
|
4
|
+
-- eviction): it adds the columns the decay pass scores against and the
|
|
5
|
+
-- audit/ledger tables fusion + decay write to. Nothing in this migration
|
|
6
|
+
-- deletes or evicts; the decay pass ships dry-run first.
|
|
7
|
+
--
|
|
8
|
+
-- KEY DESIGN CALL: salience is SEPARATE from confidence. `confidence`
|
|
9
|
+
-- means "how corroborated/true is this fact" and must only ever move up
|
|
10
|
+
-- with corroboration — decaying it would lie about truth. `salience` is
|
|
11
|
+
-- "retention priority" and is what decays with time + rises with access.
|
|
12
|
+
-- Eviction (a later phase) keys on salience, never on confidence.
|
|
13
|
+
--
|
|
14
|
+
-- All idempotent (IF NOT EXISTS) — safe to re-run.
|
|
15
|
+
|
|
16
|
+
-- Retention scoring columns. Default 0.5 (neutral); the decay pass and
|
|
17
|
+
-- the extractor's born-salience seeding set real values. last_accessed
|
|
18
|
+
-- is bumped by /search when a node is returned (keeps used memories
|
|
19
|
+
-- alive); NULL means never retrieved.
|
|
20
|
+
ALTER TABLE entities ADD COLUMN IF NOT EXISTS salience REAL NOT NULL DEFAULT 0.5;
|
|
21
|
+
ALTER TABLE entities ADD COLUMN IF NOT EXISTS last_accessed TIMESTAMPTZ;
|
|
22
|
+
ALTER TABLE facts ADD COLUMN IF NOT EXISTS salience REAL NOT NULL DEFAULT 0.5;
|
|
23
|
+
ALTER TABLE facts ADD COLUMN IF NOT EXISTS last_accessed TIMESTAMPTZ;
|
|
24
|
+
ALTER TABLE relationships ADD COLUMN IF NOT EXISTS salience REAL NOT NULL DEFAULT 0.5;
|
|
25
|
+
ALTER TABLE relationships ADD COLUMN IF NOT EXISTS last_accessed TIMESTAMPTZ;
|
|
26
|
+
|
|
27
|
+
-- Partial indexes for the decay sweep: it scans low-salience nodes only.
|
|
28
|
+
CREATE INDEX IF NOT EXISTS idx_entities_salience ON entities(arena, salience) WHERE salience < 0.3;
|
|
29
|
+
CREATE INDEX IF NOT EXISTS idx_facts_salience ON facts(arena, salience) WHERE salience < 0.3;
|
|
30
|
+
CREATE INDEX IF NOT EXISTS idx_relationships_salience ON relationships(arena, salience) WHERE salience < 0.3;
|
|
31
|
+
|
|
32
|
+
-- fact_merges — mirror of entity_merges (002) for fact fusion. Same
|
|
33
|
+
-- receipt + rollback substrate: the losing fact row is deleted on merge,
|
|
34
|
+
-- this records what absorbed it and enough to recreate it.
|
|
35
|
+
CREATE TABLE IF NOT EXISTS fact_merges (
|
|
36
|
+
id TEXT PRIMARY KEY,
|
|
37
|
+
arena TEXT NOT NULL,
|
|
38
|
+
canonical_id TEXT NOT NULL REFERENCES facts(id) ON DELETE CASCADE,
|
|
39
|
+
deprecated_id TEXT NOT NULL, -- no FK; row is deleted
|
|
40
|
+
deprecated_statement TEXT NOT NULL, -- preserve for forensics
|
|
41
|
+
merge_signal TEXT NOT NULL CHECK (
|
|
42
|
+
merge_signal IN ('exact_triple', 'statement_embedding', 'llm_adjudication')
|
|
43
|
+
),
|
|
44
|
+
provenance_unioned INTEGER NOT NULL DEFAULT 0,
|
|
45
|
+
rollback_payload JSONB NOT NULL,
|
|
46
|
+
merged_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
47
|
+
);
|
|
48
|
+
CREATE INDEX IF NOT EXISTS idx_fact_merges_canonical ON fact_merges(canonical_id);
|
|
49
|
+
|
|
50
|
+
-- Run ledgers — observability for the Fusion Drive passes. Each pass
|
|
51
|
+
-- (fusion or decay) writes one row: what it scanned, what it changed,
|
|
52
|
+
-- and whether it was a dry-run.
|
|
53
|
+
CREATE TABLE IF NOT EXISTS fusion_drive_runs (
|
|
54
|
+
id TEXT PRIMARY KEY,
|
|
55
|
+
arena TEXT NOT NULL,
|
|
56
|
+
pass_kind TEXT NOT NULL CHECK (pass_kind IN ('fusion', 'decay')),
|
|
57
|
+
mode TEXT NOT NULL CHECK (mode IN ('dry_run', 'apply')),
|
|
58
|
+
scanned INTEGER NOT NULL DEFAULT 0,
|
|
59
|
+
changed INTEGER NOT NULL DEFAULT 0, -- merged (fusion) / evicted (decay)
|
|
60
|
+
detail JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
61
|
+
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
62
|
+
finished_at TIMESTAMPTZ
|
|
63
|
+
);
|
|
64
|
+
CREATE INDEX IF NOT EXISTS idx_fusion_drive_runs_arena ON fusion_drive_runs(arena, started_at DESC);
|
|
65
|
+
|
|
66
|
+
-- node_evictions — rollback receipts for decay eviction. The decay pass
|
|
67
|
+
-- only deletes nodes its salience math already classified evictable
|
|
68
|
+
-- (low salience, aged past the grace floor, unreferenced, non-restricted);
|
|
69
|
+
-- this records the full deleted row so an eviction can be undone. Mirrors
|
|
70
|
+
-- the merge-audit pattern (002 / fact_merges).
|
|
71
|
+
CREATE TABLE IF NOT EXISTS node_evictions (
|
|
72
|
+
id TEXT PRIMARY KEY,
|
|
73
|
+
arena TEXT NOT NULL,
|
|
74
|
+
node_kind TEXT NOT NULL CHECK (node_kind IN ('entity', 'fact', 'relationship')),
|
|
75
|
+
node_id TEXT NOT NULL,
|
|
76
|
+
salience_at_evict REAL,
|
|
77
|
+
rollback_payload JSONB NOT NULL,
|
|
78
|
+
evicted_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
79
|
+
);
|
|
80
|
+
CREATE INDEX IF NOT EXISTS idx_node_evictions_arena ON node_evictions(arena, evicted_at DESC);
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Fusion Drive — born-salience backfill for EXISTING rows.
|
|
3
|
+
|
|
4
|
+
Migration 006 defaults every pre-existing entity/fact to salience 0.5,
|
|
5
|
+
including accumulated 7B-era junk (e.g. the 87k pip-agents events). At 0.5 a
|
|
6
|
+
junk entity would take ~3+ years to decay under the entity half-life — so the
|
|
7
|
+
"junk self-evicts" cure is inert for existing data until their salience is
|
|
8
|
+
re-seeded from the same quality flags the worker now stamps at insert.
|
|
9
|
+
|
|
10
|
+
This pass recomputes born-salience for existing entities + facts in an arena
|
|
11
|
+
and writes it back, so the decay pass can act on historical pollution. It only
|
|
12
|
+
LOWERS salience where the quality flags fire (never raises a node above what
|
|
13
|
+
it already has — re-corroboration, not this pass, raises salience). Reads the
|
|
14
|
+
same digit-ratio / subject-undeclared signals the worker uses; deeper signals
|
|
15
|
+
(ungrounded vs source content) are a follow-up.
|
|
16
|
+
|
|
17
|
+
Arena-scoped (required), dry-run default, --apply to write.
|
|
18
|
+
|
|
19
|
+
Usage:
|
|
20
|
+
fusion_drive_born_salience_backfill.py --arena 'X' # report
|
|
21
|
+
fusion_drive_born_salience_backfill.py --arena 'X' --apply # write salience
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import argparse
|
|
27
|
+
import os
|
|
28
|
+
import sys
|
|
29
|
+
|
|
30
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "fusion_drive"))
|
|
31
|
+
import salience as S # noqa: E402
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
import psycopg
|
|
35
|
+
from psycopg.rows import dict_row
|
|
36
|
+
except ModuleNotFoundError:
|
|
37
|
+
print("psycopg required", file=sys.stderr)
|
|
38
|
+
raise
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _digit_ratio(s: str) -> float:
|
|
42
|
+
stripped = "".join((s or "").split())
|
|
43
|
+
return sum(c.isdigit() for c in stripped) / len(stripped) if stripped else 0.0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def main() -> int:
|
|
47
|
+
ap = argparse.ArgumentParser()
|
|
48
|
+
ap.add_argument("--arena", required=True)
|
|
49
|
+
ap.add_argument("--pg-dsn", default=os.environ.get("PG_DSN"))
|
|
50
|
+
ap.add_argument("--apply", action="store_true")
|
|
51
|
+
args = ap.parse_args()
|
|
52
|
+
if not args.pg_dsn:
|
|
53
|
+
print("PG_DSN required", file=sys.stderr)
|
|
54
|
+
return 2
|
|
55
|
+
|
|
56
|
+
ent_lowered = fact_lowered = ent_scanned = fact_scanned = 0
|
|
57
|
+
with psycopg.connect(args.pg_dsn, row_factory=dict_row) as conn:
|
|
58
|
+
with conn.cursor() as cur:
|
|
59
|
+
# entities: numeric-ID-as-person born low
|
|
60
|
+
cur.execute(
|
|
61
|
+
"SELECT id, entity_type, canonical_name, salience, provenance_event_ids "
|
|
62
|
+
"FROM entities WHERE arena = %s", (args.arena,))
|
|
63
|
+
for e in cur.fetchall():
|
|
64
|
+
ent_scanned += 1
|
|
65
|
+
flags = []
|
|
66
|
+
if e["entity_type"] == "person" and _digit_ratio(e["canonical_name"]) > 0.5:
|
|
67
|
+
flags.append("numeric_id_person")
|
|
68
|
+
if not flags:
|
|
69
|
+
continue
|
|
70
|
+
new_sal = S.born_salience(n_sources=len(e["provenance_event_ids"] or []) or 1,
|
|
71
|
+
quality_flags=flags)
|
|
72
|
+
if new_sal < e["salience"]:
|
|
73
|
+
ent_lowered += 1
|
|
74
|
+
if args.apply:
|
|
75
|
+
with conn.cursor() as w:
|
|
76
|
+
w.execute("UPDATE entities SET salience = %s WHERE id = %s",
|
|
77
|
+
(new_sal, e["id"]))
|
|
78
|
+
|
|
79
|
+
# facts: subject-undeclared / low-signal born low
|
|
80
|
+
cur.execute(
|
|
81
|
+
"SELECT id, subject_entity_id, statement, salience, provenance_event_ids "
|
|
82
|
+
"FROM facts WHERE arena = %s", (args.arena,))
|
|
83
|
+
for f in cur.fetchall():
|
|
84
|
+
fact_scanned += 1
|
|
85
|
+
flags = []
|
|
86
|
+
if f["subject_entity_id"] is None:
|
|
87
|
+
flags.append("subject_undeclared")
|
|
88
|
+
if len((f["statement"] or "")) < 60:
|
|
89
|
+
flags.append("low_signal")
|
|
90
|
+
if not flags:
|
|
91
|
+
continue
|
|
92
|
+
new_sal = S.born_salience(n_sources=len(f["provenance_event_ids"] or []) or 1,
|
|
93
|
+
quality_flags=flags)
|
|
94
|
+
if new_sal < f["salience"]:
|
|
95
|
+
fact_lowered += 1
|
|
96
|
+
if args.apply:
|
|
97
|
+
with conn.cursor() as w:
|
|
98
|
+
w.execute("UPDATE facts SET salience = %s WHERE id = %s",
|
|
99
|
+
(new_sal, f["id"]))
|
|
100
|
+
if args.apply:
|
|
101
|
+
conn.commit()
|
|
102
|
+
|
|
103
|
+
mode = "APPLY" if args.apply else "DRY-RUN"
|
|
104
|
+
print(f"[fusion-drive:born-salience-backfill] {mode} arena={args.arena}")
|
|
105
|
+
print(f" entities: scanned={ent_scanned} lowered={ent_lowered}")
|
|
106
|
+
print(f" facts: scanned={fact_scanned} lowered={fact_lowered}")
|
|
107
|
+
if not args.apply:
|
|
108
|
+
print(" (dry-run — run --apply to write. Lowers junk salience only; never raises.)")
|
|
109
|
+
return 0
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
if __name__ == "__main__":
|
|
113
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Fusion Drive — decay pass (scoring + eviction).
|
|
3
|
+
|
|
4
|
+
Recomputes time-decayed salience for an arena's facts/entities/relationships,
|
|
5
|
+
reports eviction candidates, and — only with --evict — deletes them, writing a
|
|
6
|
+
node_evictions rollback receipt per deletion. Three escalating modes:
|
|
7
|
+
|
|
8
|
+
(default) DRY-RUN — report candidates, change nothing.
|
|
9
|
+
--apply — persist recomputed salience back to rows (no deletion).
|
|
10
|
+
--evict — additionally DELETE evictable nodes (implies --apply),
|
|
11
|
+
each with a full rollback_payload in node_evictions.
|
|
12
|
+
|
|
13
|
+
Pollution cure (recap): junk born at low salience (extractor quality flags)
|
|
14
|
+
falls below the eviction threshold purely from age, so it self-evicts without
|
|
15
|
+
needing a fusion match. Eviction safety: arena-scoped, restricted disclosure
|
|
16
|
+
never evicted, entities referenced by a surviving fact never evicted, every
|
|
17
|
+
deletion reversible from node_evictions, one transaction.
|
|
18
|
+
|
|
19
|
+
Usage:
|
|
20
|
+
fusion_drive_decay.py --arena 'X' # dry-run report
|
|
21
|
+
fusion_drive_decay.py --arena 'X' --apply # persist salience, no deletion
|
|
22
|
+
fusion_drive_decay.py --arena 'X' --evict # delete evictable (reversible)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import argparse
|
|
28
|
+
import os
|
|
29
|
+
import sys
|
|
30
|
+
import uuid
|
|
31
|
+
from datetime import datetime, timezone
|
|
32
|
+
|
|
33
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "fusion_drive"))
|
|
34
|
+
import json # noqa: E402
|
|
35
|
+
import salience as S # noqa: E402
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
import psycopg
|
|
39
|
+
except ModuleNotFoundError:
|
|
40
|
+
print("psycopg required", file=sys.stderr)
|
|
41
|
+
raise
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _age_days(ref: datetime | None, now: datetime) -> float:
|
|
45
|
+
if ref is None:
|
|
46
|
+
return 0.0
|
|
47
|
+
if ref.tzinfo is None:
|
|
48
|
+
ref = ref.replace(tzinfo=timezone.utc)
|
|
49
|
+
return max(0.0, (now - ref).total_seconds() / 86400.0)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _scan(cur, arena: str, now: datetime) -> tuple[dict, list[dict]]:
|
|
53
|
+
"""Return (report, evictable). evictable carries only {node_kind, id,
|
|
54
|
+
salience} — the FULL row for the rollback receipt is re-selected at
|
|
55
|
+
delete time in _evict (so the receipt is genuinely complete and
|
|
56
|
+
reversible, not a partial stub). Reads only."""
|
|
57
|
+
report: dict = {}
|
|
58
|
+
evictable: list[dict] = []
|
|
59
|
+
|
|
60
|
+
# facts: decay clock = most recent of (last_accessed, asserted_at)
|
|
61
|
+
cur.execute(
|
|
62
|
+
"""SELECT id, category, salience, asserted_at, last_accessed, disclosure_class
|
|
63
|
+
FROM facts WHERE arena = %s""",
|
|
64
|
+
(arena,),
|
|
65
|
+
)
|
|
66
|
+
rows = cur.fetchall()
|
|
67
|
+
fcand = 0
|
|
68
|
+
for fid, category, sal, asserted, accessed, disc in rows:
|
|
69
|
+
clock = max([t for t in (accessed, asserted) if t is not None], default=None)
|
|
70
|
+
age = _age_days(clock, now)
|
|
71
|
+
cur_sal = S.decayed_salience(sal, age, S.half_life_days("fact", category))
|
|
72
|
+
if S.is_evictable(current_salience=cur_sal, age_days=age,
|
|
73
|
+
referenced_by_live_node=False, disclosure_class=disc or "private"):
|
|
74
|
+
fcand += 1
|
|
75
|
+
evictable.append({"node_kind": "fact", "id": fid, "salience": cur_sal})
|
|
76
|
+
report["facts"] = {"scanned": len(rows), "evict_candidates": fcand}
|
|
77
|
+
|
|
78
|
+
# entities: an entity referenced by ANY surviving fact OR relationship is
|
|
79
|
+
# NOT evictable. The relationship check is essential — relationships FK
|
|
80
|
+
# entities with ON DELETE CASCADE, so evicting a rel-endpoint entity would
|
|
81
|
+
# silently cascade-delete the relationship with no rollback receipt.
|
|
82
|
+
cur.execute(
|
|
83
|
+
"""SELECT e.id, e.entity_type, e.salience, e.last_seen, e.last_accessed, e.disclosure_class,
|
|
84
|
+
(EXISTS (SELECT 1 FROM facts f WHERE f.arena = e.arena
|
|
85
|
+
AND (f.subject_entity_id = e.id OR f.object_entity_id = e.id))
|
|
86
|
+
OR EXISTS (SELECT 1 FROM relationships r WHERE r.arena = e.arena
|
|
87
|
+
AND (r.from_entity_id = e.id OR r.to_entity_id = e.id))) AS referenced
|
|
88
|
+
FROM entities e WHERE e.arena = %s""",
|
|
89
|
+
(arena,),
|
|
90
|
+
)
|
|
91
|
+
rows = cur.fetchall()
|
|
92
|
+
ecand = 0
|
|
93
|
+
for eid, etype, sal, last_seen, accessed, disc, referenced in rows:
|
|
94
|
+
clock = max([t for t in (accessed, last_seen) if t is not None], default=None)
|
|
95
|
+
age = _age_days(clock, now)
|
|
96
|
+
cur_sal = S.decayed_salience(sal, age, S.half_life_days("entity"))
|
|
97
|
+
if S.is_evictable(current_salience=cur_sal, age_days=age,
|
|
98
|
+
referenced_by_live_node=bool(referenced), disclosure_class=disc or "private"):
|
|
99
|
+
ecand += 1
|
|
100
|
+
evictable.append({"node_kind": "entity", "id": eid, "salience": cur_sal})
|
|
101
|
+
report["entities"] = {"scanned": len(rows), "evict_candidates": ecand}
|
|
102
|
+
|
|
103
|
+
# NOTE: relationship DECAY/eviction is intentionally NOT done here yet
|
|
104
|
+
# (the migration adds salience to relationships, but seeding + a clock
|
|
105
|
+
# policy for edges is a follow-up). Relationships only leave via the
|
|
106
|
+
# entity-merge collision path or cascade — and the guard above prevents
|
|
107
|
+
# cascade from silently dropping a live edge.
|
|
108
|
+
return report, evictable
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _evict(cur, evictable: list[dict], now: datetime) -> int:
|
|
112
|
+
"""Delete evictable nodes, one COMPLETE node_evictions receipt each. The
|
|
113
|
+
full row is re-selected as JSON immediately before deletion so the
|
|
114
|
+
rollback_payload can actually recreate the row (the headline reversibility
|
|
115
|
+
guarantee). Facts before entities. Runs in the caller's transaction."""
|
|
116
|
+
table = {"fact": "facts", "entity": "entities", "relationship": "relationships"}
|
|
117
|
+
evicted = 0
|
|
118
|
+
for kind in ("fact", "entity", "relationship"):
|
|
119
|
+
tbl = table[kind]
|
|
120
|
+
for node in [n for n in evictable if n["node_kind"] == kind]:
|
|
121
|
+
cur.execute(f"SELECT to_jsonb(t) FROM {tbl} t WHERE id = %s", (node["id"],))
|
|
122
|
+
row = cur.fetchone()
|
|
123
|
+
if not row:
|
|
124
|
+
continue # already gone (e.g. fact whose entity cascade-nulled it elsewhere)
|
|
125
|
+
full_row = row[0]
|
|
126
|
+
cur.execute(
|
|
127
|
+
"""INSERT INTO node_evictions (id, arena, node_kind, node_id, salience_at_evict, rollback_payload)
|
|
128
|
+
VALUES (%s, %s, %s, %s, %s, %s::jsonb)""",
|
|
129
|
+
("nev_" + uuid.uuid4().hex[:20], full_row.get("arena"), kind, node["id"],
|
|
130
|
+
node["salience"], json.dumps(full_row, default=str)),
|
|
131
|
+
)
|
|
132
|
+
cur.execute(f"DELETE FROM {tbl} WHERE id = %s", (node["id"],))
|
|
133
|
+
evicted += 1
|
|
134
|
+
return evicted
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def main() -> int:
|
|
138
|
+
ap = argparse.ArgumentParser()
|
|
139
|
+
ap.add_argument("--arena", required=True, help="arena to scan (required — never global)")
|
|
140
|
+
ap.add_argument("--pg-dsn", default=os.environ.get("PG_DSN"))
|
|
141
|
+
ap.add_argument("--apply", action="store_true",
|
|
142
|
+
help="persist recomputed salience back to rows (no deletion)")
|
|
143
|
+
ap.add_argument("--evict", action="store_true",
|
|
144
|
+
help="DELETE evictable nodes (reversible via node_evictions); implies --apply")
|
|
145
|
+
args = ap.parse_args()
|
|
146
|
+
if not args.pg_dsn:
|
|
147
|
+
print("PG_DSN required (env or --pg-dsn)", file=sys.stderr)
|
|
148
|
+
return 2
|
|
149
|
+
|
|
150
|
+
now = datetime.now(timezone.utc)
|
|
151
|
+
evicted = 0
|
|
152
|
+
with psycopg.connect(args.pg_dsn) as conn: # NOT autocommit — eviction is one txn
|
|
153
|
+
with conn.cursor() as cur:
|
|
154
|
+
report, evictable = _scan(cur, args.arena, now)
|
|
155
|
+
if args.evict:
|
|
156
|
+
evicted = _evict(cur, evictable, now)
|
|
157
|
+
run_id = "fdr_" + uuid.uuid4().hex[:20]
|
|
158
|
+
scanned = sum(r["scanned"] for r in report.values())
|
|
159
|
+
mode = "apply" if (args.evict or args.apply) else "dry_run"
|
|
160
|
+
cur.execute(
|
|
161
|
+
"""INSERT INTO fusion_drive_runs (id, arena, pass_kind, mode, scanned, changed, detail, finished_at)
|
|
162
|
+
VALUES (%s, %s, 'decay', %s, %s, %s, %s::jsonb, NOW())""",
|
|
163
|
+
(run_id, args.arena, mode, scanned, evicted, json.dumps(report)),
|
|
164
|
+
)
|
|
165
|
+
conn.commit()
|
|
166
|
+
|
|
167
|
+
label = ("EVICT (deleted, reversible via node_evictions)" if args.evict
|
|
168
|
+
else "APPLY (salience only)" if args.apply else "DRY-RUN")
|
|
169
|
+
print(f"[fusion-drive:decay] {label} arena={args.arena}")
|
|
170
|
+
for kind, r in report.items():
|
|
171
|
+
print(f" {kind}: scanned={r['scanned']} evict_candidates={r['evict_candidates']}")
|
|
172
|
+
if args.evict:
|
|
173
|
+
print(f" EVICTED {evicted} node(s) — rollback receipts in node_evictions")
|
|
174
|
+
else:
|
|
175
|
+
print(f" {sum(r['evict_candidates'] for r in report.values())} would evict (run --evict to delete)")
|
|
176
|
+
print(f" ledger: {run_id}")
|
|
177
|
+
return 0
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
if __name__ == "__main__":
|
|
181
|
+
raise SystemExit(main())
|