@pentatonic-ai/ai-agent-sdk 0.10.7 → 0.10.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/package.json +1 -1
- package/packages/memory-engine-v2/RFC-decay-and-fusion.md +185 -0
- package/packages/memory-engine-v2/RFC-fusion-drive.md +193 -0
- package/packages/memory-engine-v2/extractor-async/confidence.py +37 -0
- package/packages/memory-engine-v2/extractor-async/test_born_salience_parity.py +35 -0
- package/packages/memory-engine-v2/extractor-async/worker.py +36 -6
- package/packages/memory-engine-v2/fusion_drive/__init__.py +0 -0
- package/packages/memory-engine-v2/fusion_drive/canonical.py +94 -0
- package/packages/memory-engine-v2/fusion_drive/conftest.py +8 -0
- package/packages/memory-engine-v2/fusion_drive/merge.py +178 -0
- package/packages/memory-engine-v2/fusion_drive/salience.py +118 -0
- package/packages/memory-engine-v2/fusion_drive/test_canonical.py +76 -0
- package/packages/memory-engine-v2/fusion_drive/test_merge.py +112 -0
- package/packages/memory-engine-v2/fusion_drive/test_salience.py +93 -0
- package/packages/memory-engine-v2/org-model/migrations/006_fusion_drive.sql +80 -0
- package/packages/memory-engine-v2/scripts/fusion_drive_born_salience_backfill.py +113 -0
- package/packages/memory-engine-v2/scripts/fusion_drive_decay.py +181 -0
- package/packages/memory-engine-v2/scripts/fusion_drive_fuse.py +264 -0
package/dist/index.cjs
CHANGED
|
@@ -878,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
878
878
|
}
|
|
879
879
|
|
|
880
880
|
// src/telemetry.js
|
|
881
|
-
var VERSION = "0.10.
|
|
881
|
+
var VERSION = "0.10.8";
|
|
882
882
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
883
883
|
function machineId() {
|
|
884
884
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/dist/index.js
CHANGED
|
@@ -847,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
847
847
|
}
|
|
848
848
|
|
|
849
849
|
// src/telemetry.js
|
|
850
|
-
var VERSION = "0.10.
|
|
850
|
+
var VERSION = "0.10.8";
|
|
851
851
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
852
852
|
function machineId() {
|
|
853
853
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.10.
|
|
3
|
+
"version": "0.10.8",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# RFC: the Fusion Drive — v2 memory self-healing (cross-run node fusion + decay)
|
|
2
|
+
|
|
3
|
+
> **Fusion Drive** = the continuous, arena-scoped background engine that keeps the v2
|
|
4
|
+
> memory graph self-healing: it *fuses* duplicate/near-duplicate nodes from different
|
|
5
|
+
> distillation runs into a single master node (horizontal convergence) and *decays* stale,
|
|
6
|
+
> low-value, and junk nodes out of existence (vertical aging). Named for the drive that
|
|
7
|
+
> does the fusing — the decay pass rides the same engine.
|
|
8
|
+
|
|
9
|
+
**Status:** draft / spec — 2026-06-12
|
|
10
|
+
**Builds on:** `RFC-entity-reconciliation.md`, `scripts/entity_resolution_v2.py` (#82),
|
|
11
|
+
`org-model/migrations/002_entity_merges_audit.sql`.
|
|
12
|
+
**Motivated by:** the v2 store is currently **pure-accretion** — three independent
|
|
13
|
+
properties, all verified in code, mean nothing ever leaves or improves in place:
|
|
14
|
+
|
|
15
|
+
1. **No supersede by source_id** — event identity is `sha256(arena:content)`; re-emitting
|
|
16
|
+
edited content appends a new event, the old persists.
|
|
17
|
+
2. **Accrete-only graph writes** — entity/fact upserts are `ON CONFLICT (id) DO UPDATE`
|
|
18
|
+
that only merge aliases/provenance and bump confidence; a *corrected* extraction has a
|
|
19
|
+
different deterministic id, so it lands **beside** the polluted node, never replacing it.
|
|
20
|
+
3. **No decay/eviction** — v2 has no GC; fact confidence only moves up; recency affects
|
|
21
|
+
search ranking only, never retention.
|
|
22
|
+
|
|
23
|
+
Net: improving the extractor/teacher only helps **new** content. Accumulated 7B-era
|
|
24
|
+
pollution (hallucinated emails, numeric-ID-as-person, ungrounded entities) is immortal.
|
|
25
|
+
`pentatonic-team` had to be **nuked** rather than re-distilled because of this; `pip-agents`
|
|
26
|
+
(87k events) still carries all of it.
|
|
27
|
+
|
|
28
|
+
This RFC makes the store **self-healing** via two complementary mechanisms:
|
|
29
|
+
**fusion** (horizontal — converge duplicate/near-duplicate nodes from different
|
|
30
|
+
distillation runs into one *master* node) and **decay** (vertical — age out stale and
|
|
31
|
+
low-value nodes). Both are gated, arena-scoped, audited, and reversible.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Part A — Fusion: converge near-duplicate nodes into a master
|
|
36
|
+
|
|
37
|
+
Extends the existing entity-resolution machinery along four axes.
|
|
38
|
+
|
|
39
|
+
### A1. Online + continuous (today it's dry-run batch)
|
|
40
|
+
Run fusion as a scheduled per-arena pass (systemd timer on the engine box, same pattern as
|
|
41
|
+
the distiller autoscaler) **and** opportunistically after a distillation run touches an
|
|
42
|
+
arena's entities. Keep #82's invariants: dry-run default, `--apply` gate, arena scoping,
|
|
43
|
+
`entity_merges` rollback. Add a `fusion_runs` ledger (arena, started_at, candidates,
|
|
44
|
+
merged, mode) for observability.
|
|
45
|
+
|
|
46
|
+
### A2. Cross-distillation-run detection (the actual pollution cure)
|
|
47
|
+
The hard case #82 misses: 7B `"1716801984"` (numeric-ID person) and Qwen3.6 `"Katie Cooper"`
|
|
48
|
+
are the same real entity but share **no name similarity**, so name-blocking never compares
|
|
49
|
+
them. New candidate signals beyond name trigrams / embedding-on-name:
|
|
50
|
+
|
|
51
|
+
- **Shared-provenance co-reference** — two entities of the same `entity_type` citing the
|
|
52
|
+
same `event_id` in `provenance_event_ids`, where one is low-quality (numeric / ungrounded
|
|
53
|
+
/ single-token). The shared event's content is the adjudication context ("does this event
|
|
54
|
+
support these being the same person?").
|
|
55
|
+
- **Context embedding** — embed the *facts/statements about* an entity (not just its name),
|
|
56
|
+
so name-divergent dupes still cluster. Reuses the bulk-embed lane.
|
|
57
|
+
- **Teacher-version signal** — provenance maps to `distillation_traces.llm_model` /
|
|
58
|
+
`system_prompt_hash`. Prefer the newer-teacher extraction as master; an entity *only* ever
|
|
59
|
+
produced by the superseded teacher and never re-confirmed by the new one is both a fusion
|
|
60
|
+
candidate (likely a worse rendering of a node the new teacher got right) and a decay
|
|
61
|
+
candidate (stale-teacher orphan — see B).
|
|
62
|
+
|
|
63
|
+
### A3. Master-node selection — replace richest-row-wins
|
|
64
|
+
#82 uses "richest-row-wins", which (flagged in review) would crown the typo **"Phil Mossop"**
|
|
65
|
+
over **"Philip Mossop"**. Replace with a **scored** canonical pick:
|
|
66
|
+
|
|
67
|
+
| Signal | Effect |
|
|
68
|
+
|---|---|
|
|
69
|
+
| **Directory/authority anchor** (name matches an org-directory / HubSpot contact / Pip `contact_email`+`contact_name`) | dominant + → canonical |
|
|
70
|
+
| Grounding (name appears verbatim in a provenance event's content) | + |
|
|
71
|
+
| Teacher recency (newer `llm_model`) | + |
|
|
72
|
+
| Corroboration (`cardinality(provenance_event_ids)`) | + |
|
|
73
|
+
| Looks-like-ID (digit-ratio > 0.5) / hallucinated-email flag / single-token bare name | − − |
|
|
74
|
+
|
|
75
|
+
Master = highest score. Losers' surface forms become **aliases** on the master (so existing
|
|
76
|
+
lookups still resolve), facts/relationships are repointed, losers tombstoned in
|
|
77
|
+
`entity_merges` with `rollback_payload`. Directory-anchored selection is the key fix: an
|
|
78
|
+
authoritative source, when present, beats any heuristic.
|
|
79
|
+
|
|
80
|
+
### A4. Fact + relationship fusion (today only entities fuse)
|
|
81
|
+
After entity fusion (so subject/object ids are canonical):
|
|
82
|
+
- **Facts** — exact `(arena, subject, predicate, object)` dupes already collapse via the
|
|
83
|
+
content-id. **Semantic** dupes (same assertion, different surface — "joined Acme" vs "works
|
|
84
|
+
at Acme") need statement-embedding similarity + LLM adjudication ("same assertion?").
|
|
85
|
+
Master fact = max confidence + best-grounded statement; union provenance; tombstone dupes.
|
|
86
|
+
New `fact_merges` audit mirroring `entity_merges`.
|
|
87
|
+
- **Relationships** — `(from,to,type)` already collapses; a controlled rel-type vocabulary
|
|
88
|
+
("works at" ≡ "employed by") is a later optional canonicalization.
|
|
89
|
+
|
|
90
|
+
### A5. Audit, reversibility, safety rails
|
|
91
|
+
Reuse `entity_merges`; add `fact_merges`. Every fusion carries `rollback_payload`.
|
|
92
|
+
LLM-adjudicated merges store prompt+verdict. **Disclosure rail:** never send
|
|
93
|
+
`disclosure_class='restricted'` rows to the LLM adjudicator (data-egress; the #82 review
|
|
94
|
+
item). Auto-merge only above a high confidence band; everything else → human-review queue.
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Part B — Decay: age out stale and low-value nodes
|
|
99
|
+
|
|
100
|
+
### B1. Separate `salience` from `confidence` (important)
|
|
101
|
+
Do **not** decay `confidence` — it means "how corroborated/true is this", and decaying it
|
|
102
|
+
would lie about corroboration. Add a separate **`salience`** (retention priority) to
|
|
103
|
+
entities/facts/relationships. Decay acts on salience; eviction keys on salience.
|
|
104
|
+
|
|
105
|
+
`salience(t) = salience₀ · exp(−ln2 · Δt / half_life[category])`, bumped on access or
|
|
106
|
+
re-corroboration. Per-category half-life:
|
|
107
|
+
|
|
108
|
+
| category | half-life | rationale |
|
|
109
|
+
|---|---|---|
|
|
110
|
+
| decision, commitment | very long / ∞ | durable record |
|
|
111
|
+
| state, preference | medium | changes but matters |
|
|
112
|
+
| mention, observation | short | ephemeral |
|
|
113
|
+
|
|
114
|
+
`Δt` = time since `last_seen` **or** a new `last_accessed` (bumped when a node is returned by
|
|
115
|
+
`/search` — cheap write, makes retrieval keep memories alive). Re-corroboration (new
|
|
116
|
+
provenance) resets the clock and bumps salience.
|
|
117
|
+
|
|
118
|
+
### B2. Born-salience — the cheap partial cure
|
|
119
|
+
Seed `salience₀` from extraction-quality signals already computed (the trap detectors:
|
|
120
|
+
ungrounded, numeric-ID-person, hallucinated-email, `noise_filter` hits). **Junk is born
|
|
121
|
+
low**, so it decays below threshold and self-evicts fast — pollution cleans itself even
|
|
122
|
+
without a fusion match.
|
|
123
|
+
|
|
124
|
+
### B3. Eviction (GC)
|
|
125
|
+
Node is evictable when: `salience < min_threshold` **AND** `last_seen`/`last_accessed`
|
|
126
|
+
older than a floor **AND** not referenced by a surviving higher-salience node (an entity
|
|
127
|
+
that's the subject/object of a live fact survives). Eviction = **tombstone** (soft-delete +
|
|
128
|
+
retention window) → hard-delete after grace, cascading to the node's Qdrant points +
|
|
129
|
+
`vector_provenance`. Never evict `disclosure_class='restricted'` without sign-off.
|
|
130
|
+
|
|
131
|
+
### B4. Capacity bound (optional)
|
|
132
|
+
Per-arena soft cap; when exceeded, evict lowest-salience first. Backstop against unbounded
|
|
133
|
+
arenas.
|
|
134
|
+
|
|
135
|
+
### B5. Cadence + safety
|
|
136
|
+
Background per-arena pass (timer on the engine box), dry-run → `--apply` in a quiet window,
|
|
137
|
+
counts logged, fully arena-scoped. Same operational shape as the distiller autoscaler /
|
|
138
|
+
sparse backfill.
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Part C — Ordering & how they combine
|
|
143
|
+
|
|
144
|
+
Per arena, on schedule: **(1) fusion → (2) decay.** Fusion first so a master node absorbs
|
|
145
|
+
its duplicates' provenance/salience *before* decay judges it (else a real node split across
|
|
146
|
+
two weak dupes could wrongly decay out). Then decay ages + evicts the survivors.
|
|
147
|
+
|
|
148
|
+
**This is what finally cures immortal pollution:**
|
|
149
|
+
- 7B polluted node *with* a correct Qwen3.6 counterpart → **fused**, correct one as master,
|
|
150
|
+
polluted demoted to alias / tombstoned.
|
|
151
|
+
- 7B pure-junk node with *no* correct counterpart (numeric-ID-person, ungrounded) → born-low
|
|
152
|
+
salience + no corroboration + never accessed → **decays out and is evicted**.
|
|
153
|
+
|
|
154
|
+
Together they convert the accrete-only store into a self-healing one. `pip-agents` could
|
|
155
|
+
then self-clean over time instead of requiring a nuke (a nuke is still faster for a one-shot
|
|
156
|
+
reset, but no longer the *only* path).
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## Part D — Schema changes
|
|
161
|
+
|
|
162
|
+
- `entities`: `+ salience REAL DEFAULT …`, `+ last_accessed TIMESTAMPTZ`.
|
|
163
|
+
- `facts`: `+ salience REAL`, `+ last_accessed TIMESTAMPTZ` (keep `confidence` as-is =
|
|
164
|
+
corroboration truth; `asserted_at`/`expires_at` already exist).
|
|
165
|
+
- `relationships`: `+ salience REAL`, `+ last_accessed` (already has `weight`,
|
|
166
|
+
`first/last_seen`).
|
|
167
|
+
- new `fact_merges` audit (mirror `entity_merges` incl. `rollback_payload`).
|
|
168
|
+
- new `fusion_runs` + `decay_runs` ledgers for observability.
|
|
169
|
+
- `/search` gains a `last_accessed = NOW()` bump on returned nodes (batched).
|
|
170
|
+
|
|
171
|
+
## Part E — Rollout (each flag-gated, arena-scoped, dry-run-first, audited)
|
|
172
|
+
|
|
173
|
+
1. **Salience scoring only** — add columns, born-salience + decay math, NO eviction.
|
|
174
|
+
Observe distributions; confirm junk scores low and durable facts stay high.
|
|
175
|
+
2. **Eviction** — dry-run (count what *would* evict) → `--apply` in a quiet window.
|
|
176
|
+
3. **Fusion extension** — scored canonical selection (fix typo-crowning) + cross-run
|
|
177
|
+
detection + fact fusion, dry-run → apply.
|
|
178
|
+
4. **Online/continuous** — wire fusion+decay to run after distillation per arena.
|
|
179
|
+
|
|
180
|
+
## Open questions
|
|
181
|
+
- Half-life constants per category — needs a calibration pass against real arenas.
|
|
182
|
+
- `last_accessed` write amplification on hot search paths — batch/throttle the bump.
|
|
183
|
+
- Directory authority source for canonical anchoring — HubSpot contacts? a curated table?
|
|
184
|
+
- Interaction with the (still-open) source_id supersede mode — fusion partly subsumes it,
|
|
185
|
+
but explicit supersede is cheaper for known-mutable sources.
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# RFC: the Fusion Drive — v2 memory self-healing (cross-run node fusion + decay)
|
|
2
|
+
|
|
3
|
+
> **Fusion Drive** = the continuous, arena-scoped background engine that keeps the v2
|
|
4
|
+
> memory graph self-healing: it *fuses* duplicate/near-duplicate nodes from different
|
|
5
|
+
> distillation runs into a single master node (horizontal convergence) and *decays* stale,
|
|
6
|
+
> low-value, and junk nodes out of existence (vertical aging). Named for the drive that
|
|
7
|
+
> does the fusing — the decay pass rides the same engine.
|
|
8
|
+
|
|
9
|
+
**Status:** spec + initial implementation (PR #92) — 2026-06-12. Implemented: salience
|
|
10
|
+
scoring + decay, **eviction** (`fusion_drive_decay.py --evict`, reversible via
|
|
11
|
+
`node_evictions`), and **fusion** of exact + cross-run-shared-provenance entity dupes and
|
|
12
|
+
exact-triple fact dupes (`fusion_drive_fuse.py --apply`, reversible via `entity_merges`/
|
|
13
|
+
`fact_merges`), with scored directory-anchored master selection. All arena-scoped,
|
|
14
|
+
dry-run-default, transactional, audited. TODO (later PRs): embedding-band + LLM-adjudicated
|
|
15
|
+
detection (in `entity_resolution_v2.py`), semantic fact fusion, authority-table wiring for
|
|
16
|
+
canonical scoring, continuous scheduling, and a half-life/threshold calibration pass before
|
|
17
|
+
`--evict` runs in prod.
|
|
18
|
+
**Builds on:** `RFC-entity-reconciliation.md`, `scripts/entity_resolution_v2.py` (#82),
|
|
19
|
+
`org-model/migrations/002_entity_merges_audit.sql`.
|
|
20
|
+
**Motivated by:** the v2 store is currently **pure-accretion** — three independent
|
|
21
|
+
properties, all verified in code, mean nothing ever leaves or improves in place:
|
|
22
|
+
|
|
23
|
+
1. **No supersede by source_id** — event identity is `sha256(arena:content)`; re-emitting
|
|
24
|
+
edited content appends a new event, the old persists.
|
|
25
|
+
2. **Accrete-only graph writes** — entity/fact upserts are `ON CONFLICT (id) DO UPDATE`
|
|
26
|
+
that only merge aliases/provenance and bump confidence; a *corrected* extraction has a
|
|
27
|
+
different deterministic id, so it lands **beside** the polluted node, never replacing it.
|
|
28
|
+
3. **No decay/eviction** — v2 has no GC; fact confidence only moves up; recency affects
|
|
29
|
+
search ranking only, never retention.
|
|
30
|
+
|
|
31
|
+
Net: improving the extractor/teacher only helps **new** content. Accumulated 7B-era
|
|
32
|
+
pollution (hallucinated emails, numeric-ID-as-person, ungrounded entities) is immortal.
|
|
33
|
+
`pentatonic-team` had to be **nuked** rather than re-distilled because of this; `pip-agents`
|
|
34
|
+
(87k events) still carries all of it.
|
|
35
|
+
|
|
36
|
+
This RFC makes the store **self-healing** via two complementary mechanisms:
|
|
37
|
+
**fusion** (horizontal — converge duplicate/near-duplicate nodes from different
|
|
38
|
+
distillation runs into one *master* node) and **decay** (vertical — age out stale and
|
|
39
|
+
low-value nodes). Both are gated, arena-scoped, audited, and reversible.
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## Part A — Fusion: converge near-duplicate nodes into a master
|
|
44
|
+
|
|
45
|
+
Extends the existing entity-resolution machinery along four axes.
|
|
46
|
+
|
|
47
|
+
### A1. Online + continuous (today it's dry-run batch)
|
|
48
|
+
Run fusion as a scheduled per-arena pass (systemd timer on the engine box, same pattern as
|
|
49
|
+
the distiller autoscaler) **and** opportunistically after a distillation run touches an
|
|
50
|
+
arena's entities. Keep #82's invariants: dry-run default, `--apply` gate, arena scoping,
|
|
51
|
+
`entity_merges` rollback. Add a `fusion_runs` ledger (arena, started_at, candidates,
|
|
52
|
+
merged, mode) for observability.
|
|
53
|
+
|
|
54
|
+
### A2. Cross-distillation-run detection (the actual pollution cure)
|
|
55
|
+
The hard case #82 misses: 7B `"1716801984"` (numeric-ID person) and Qwen3.6 `"Katie Cooper"`
|
|
56
|
+
are the same real entity but share **no name similarity**, so name-blocking never compares
|
|
57
|
+
them. New candidate signals beyond name trigrams / embedding-on-name:
|
|
58
|
+
|
|
59
|
+
- **Shared-provenance co-reference** — two entities of the same `entity_type` citing the
|
|
60
|
+
same `event_id` in `provenance_event_ids`, where one is low-quality (numeric / ungrounded
|
|
61
|
+
/ single-token). The shared event's content is the adjudication context ("does this event
|
|
62
|
+
support these being the same person?").
|
|
63
|
+
- **Context embedding** — embed the *facts/statements about* an entity (not just its name),
|
|
64
|
+
so name-divergent dupes still cluster. Reuses the bulk-embed lane.
|
|
65
|
+
- **Teacher-version signal** — provenance maps to `distillation_traces.llm_model` /
|
|
66
|
+
`system_prompt_hash`. Prefer the newer-teacher extraction as master; an entity *only* ever
|
|
67
|
+
produced by the superseded teacher and never re-confirmed by the new one is both a fusion
|
|
68
|
+
candidate (likely a worse rendering of a node the new teacher got right) and a decay
|
|
69
|
+
candidate (stale-teacher orphan — see B).
|
|
70
|
+
|
|
71
|
+
### A3. Master-node selection — replace richest-row-wins
|
|
72
|
+
#82 uses "richest-row-wins", which (flagged in review) would crown the typo **"Phil Mossop"**
|
|
73
|
+
over **"Philip Mossop"**. Replace with a **scored** canonical pick:
|
|
74
|
+
|
|
75
|
+
| Signal | Effect |
|
|
76
|
+
|---|---|
|
|
77
|
+
| **Directory/authority anchor** (name matches an org-directory / HubSpot contact / Pip `contact_email`+`contact_name`) | dominant + → canonical |
|
|
78
|
+
| Grounding (name appears verbatim in a provenance event's content) | + |
|
|
79
|
+
| Teacher recency (newer `llm_model`) | + |
|
|
80
|
+
| Corroboration (`cardinality(provenance_event_ids)`) | + |
|
|
81
|
+
| Looks-like-ID (digit-ratio > 0.5) / hallucinated-email flag / single-token bare name | − − |
|
|
82
|
+
|
|
83
|
+
Master = highest score. Losers' surface forms become **aliases** on the master (so existing
|
|
84
|
+
lookups still resolve), facts/relationships are repointed, losers tombstoned in
|
|
85
|
+
`entity_merges` with `rollback_payload`. Directory-anchored selection is the key fix: an
|
|
86
|
+
authoritative source, when present, beats any heuristic.
|
|
87
|
+
|
|
88
|
+
### A4. Fact + relationship fusion (today only entities fuse)
|
|
89
|
+
After entity fusion (so subject/object ids are canonical):
|
|
90
|
+
- **Facts** — exact `(arena, subject, predicate, object)` dupes already collapse via the
|
|
91
|
+
content-id. **Semantic** dupes (same assertion, different surface — "joined Acme" vs "works
|
|
92
|
+
at Acme") need statement-embedding similarity + LLM adjudication ("same assertion?").
|
|
93
|
+
Master fact = max confidence + best-grounded statement; union provenance; tombstone dupes.
|
|
94
|
+
New `fact_merges` audit mirroring `entity_merges`.
|
|
95
|
+
- **Relationships** — `(from,to,type)` already collapses; a controlled rel-type vocabulary
|
|
96
|
+
("works at" ≡ "employed by") is a later optional canonicalization.
|
|
97
|
+
|
|
98
|
+
### A5. Audit, reversibility, safety rails
|
|
99
|
+
Reuse `entity_merges`; add `fact_merges`. Every fusion carries `rollback_payload`.
|
|
100
|
+
LLM-adjudicated merges store prompt+verdict. **Disclosure rail:** never send
|
|
101
|
+
`disclosure_class='restricted'` rows to the LLM adjudicator (data-egress; the #82 review
|
|
102
|
+
item). Auto-merge only above a high confidence band; everything else → human-review queue.
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## Part B — Decay: age out stale and low-value nodes
|
|
107
|
+
|
|
108
|
+
### B1. Separate `salience` from `confidence` (important)
|
|
109
|
+
Do **not** decay `confidence` — it means "how corroborated/true is this", and decaying it
|
|
110
|
+
would lie about corroboration. Add a separate **`salience`** (retention priority) to
|
|
111
|
+
entities/facts/relationships. Decay acts on salience; eviction keys on salience.
|
|
112
|
+
|
|
113
|
+
`salience(t) = salience₀ · exp(−ln2 · Δt / half_life[category])`, bumped on access or
|
|
114
|
+
re-corroboration. Per-category half-life:
|
|
115
|
+
|
|
116
|
+
| category | half-life | rationale |
|
|
117
|
+
|---|---|---|
|
|
118
|
+
| decision, commitment | very long / ∞ | durable record |
|
|
119
|
+
| state, preference | medium | changes but matters |
|
|
120
|
+
| mention, observation | short | ephemeral |
|
|
121
|
+
|
|
122
|
+
`Δt` = time since `last_seen` **or** a new `last_accessed` (bumped when a node is returned by
|
|
123
|
+
`/search` — cheap write, makes retrieval keep memories alive). Re-corroboration (new
|
|
124
|
+
provenance) resets the clock and bumps salience.
|
|
125
|
+
|
|
126
|
+
### B2. Born-salience — the cheap partial cure
|
|
127
|
+
Seed `salience₀` from extraction-quality signals already computed (the trap detectors:
|
|
128
|
+
ungrounded, numeric-ID-person, hallucinated-email, `noise_filter` hits). **Junk is born
|
|
129
|
+
low**, so it decays below threshold and self-evicts fast — pollution cleans itself even
|
|
130
|
+
without a fusion match.
|
|
131
|
+
|
|
132
|
+
### B3. Eviction (GC)
|
|
133
|
+
Node is evictable when: `salience < min_threshold` **AND** `last_seen`/`last_accessed`
|
|
134
|
+
older than a floor **AND** not referenced by a surviving higher-salience node (an entity
|
|
135
|
+
that's the subject/object of a live fact survives). Eviction = **tombstone** (soft-delete +
|
|
136
|
+
retention window) → hard-delete after grace, cascading to the node's Qdrant points +
|
|
137
|
+
`vector_provenance`. Never evict `disclosure_class='restricted'` without sign-off.
|
|
138
|
+
|
|
139
|
+
### B4. Capacity bound (optional)
|
|
140
|
+
Per-arena soft cap; when exceeded, evict lowest-salience first. Backstop against unbounded
|
|
141
|
+
arenas.
|
|
142
|
+
|
|
143
|
+
### B5. Cadence + safety
|
|
144
|
+
Background per-arena pass (timer on the engine box), dry-run → `--apply` in a quiet window,
|
|
145
|
+
counts logged, fully arena-scoped. Same operational shape as the distiller autoscaler /
|
|
146
|
+
sparse backfill.
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## Part C — Ordering & how they combine
|
|
151
|
+
|
|
152
|
+
Per arena, on schedule: **(1) fusion → (2) decay.** Fusion first so a master node absorbs
|
|
153
|
+
its duplicates' provenance/salience *before* decay judges it (else a real node split across
|
|
154
|
+
two weak dupes could wrongly decay out). Then decay ages + evicts the survivors.
|
|
155
|
+
|
|
156
|
+
**This is what finally cures immortal pollution:**
|
|
157
|
+
- 7B polluted node *with* a correct Qwen3.6 counterpart → **fused**, correct one as master,
|
|
158
|
+
polluted demoted to alias / tombstoned.
|
|
159
|
+
- 7B pure-junk node with *no* correct counterpart (numeric-ID-person, ungrounded) → born-low
|
|
160
|
+
salience + no corroboration + never accessed → **decays out and is evicted**.
|
|
161
|
+
|
|
162
|
+
Together they convert the accrete-only store into a self-healing one. `pip-agents` could
|
|
163
|
+
then self-clean over time instead of requiring a nuke (a nuke is still faster for a one-shot
|
|
164
|
+
reset, but no longer the *only* path).
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
## Part D — Schema changes
|
|
169
|
+
|
|
170
|
+
- `entities`: `+ salience REAL DEFAULT …`, `+ last_accessed TIMESTAMPTZ`.
|
|
171
|
+
- `facts`: `+ salience REAL`, `+ last_accessed TIMESTAMPTZ` (keep `confidence` as-is =
|
|
172
|
+
corroboration truth; `asserted_at`/`expires_at` already exist).
|
|
173
|
+
- `relationships`: `+ salience REAL`, `+ last_accessed` (already has `weight`,
|
|
174
|
+
`first/last_seen`).
|
|
175
|
+
- new `fact_merges` audit (mirror `entity_merges` incl. `rollback_payload`).
|
|
176
|
+
- new `fusion_runs` + `decay_runs` ledgers for observability.
|
|
177
|
+
- `/search` gains a `last_accessed = NOW()` bump on returned nodes (batched).
|
|
178
|
+
|
|
179
|
+
## Part E — Rollout (each flag-gated, arena-scoped, dry-run-first, audited)
|
|
180
|
+
|
|
181
|
+
1. **Salience scoring only** — add columns, born-salience + decay math, NO eviction.
|
|
182
|
+
Observe distributions; confirm junk scores low and durable facts stay high.
|
|
183
|
+
2. **Eviction** — dry-run (count what *would* evict) → `--apply` in a quiet window.
|
|
184
|
+
3. **Fusion extension** — scored canonical selection (fix typo-crowning) + cross-run
|
|
185
|
+
detection + fact fusion, dry-run → apply.
|
|
186
|
+
4. **Online/continuous** — wire fusion+decay to run after distillation per arena.
|
|
187
|
+
|
|
188
|
+
## Open questions
|
|
189
|
+
- Half-life constants per category — needs a calibration pass against real arenas.
|
|
190
|
+
- `last_accessed` write amplification on hot search paths — batch/throttle the bump.
|
|
191
|
+
- Directory authority source for canonical anchoring — HubSpot contacts? a curated table?
|
|
192
|
+
- Interaction with the (still-open) source_id supersede mode — fusion partly subsumes it,
|
|
193
|
+
but explicit supersede is cheaper for known-mutable sources.
|
|
@@ -60,3 +60,40 @@ def corroborated_confidence(n_sources: int) -> float:
|
|
|
60
60
|
if bumped > _CONF_CAP:
|
|
61
61
|
return _CONF_CAP
|
|
62
62
|
return round(bumped, 2)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# ── born salience (Fusion Drive) ─────────────────────────────────────
|
|
66
|
+
# Retention priority a node is stamped with at extraction time, SEPARATE
|
|
67
|
+
# from confidence (confidence = corroboration/truth; salience = how long
|
|
68
|
+
# it's worth keeping). Junk — flagged by the extractor's own quality
|
|
69
|
+
# detectors (noise name, numeric-ID-as-person, hallucinated email,
|
|
70
|
+
# ungrounded, etc.) — is born near the floor so the Fusion Drive decay
|
|
71
|
+
# pass evicts it on a short clock instead of the multi-year default.
|
|
72
|
+
#
|
|
73
|
+
# This MUST stay byte-identical to fusion_drive/salience.py:born_salience
|
|
74
|
+
# (the decay side uses the same scale). test_born_salience_parity.py
|
|
75
|
+
# guards the two against drift — same pattern as entity_id.py's parity
|
|
76
|
+
# test across the sync/async build contexts.
|
|
77
|
+
_SAL_BASE = 0.50
|
|
78
|
+
_SAL_CORROB_PER_SOURCE = 0.10
|
|
79
|
+
_SAL_CORROB_CAP = 0.30
|
|
80
|
+
_SAL_FLOOR = 0.01
|
|
81
|
+
_SAL_CEIL = 1.00
|
|
82
|
+
_SAL_PENALTIES = {
|
|
83
|
+
"noise_name": 0.45,
|
|
84
|
+
"numeric_id_person": 0.45,
|
|
85
|
+
"hallucinated_email": 0.40,
|
|
86
|
+
"ungrounded": 0.35,
|
|
87
|
+
"subject_undeclared": 0.25,
|
|
88
|
+
"low_signal": 0.15,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def born_salience(n_sources: int = 1, quality_flags: list[str] | None = None) -> float:
|
|
93
|
+
"""Salience to stamp on a freshly extracted node. See the module note."""
|
|
94
|
+
s = _SAL_BASE
|
|
95
|
+
if n_sources > 1:
|
|
96
|
+
s += min(_SAL_CORROB_CAP, _SAL_CORROB_PER_SOURCE * (n_sources - 1))
|
|
97
|
+
for flag in quality_flags or []:
|
|
98
|
+
s -= _SAL_PENALTIES.get(flag, 0.0)
|
|
99
|
+
return round(max(_SAL_FLOOR, min(_SAL_CEIL, s)), 4)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Parity guard: confidence.born_salience (worker, copied into the container)
|
|
2
|
+
must stay byte-equivalent to fusion_drive/salience.born_salience (the decay
|
|
3
|
+
side). Same pattern as test_entity_id_parity.py — the two live across a Docker
|
|
4
|
+
build-context boundary and would silently drift otherwise."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
import confidence as worker
|
|
12
|
+
|
|
13
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "fusion_drive"))
|
|
14
|
+
import salience as drive # noqa: E402
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_constants_match():
|
|
18
|
+
assert worker._SAL_BASE == drive.BASE_SALIENCE
|
|
19
|
+
assert worker._SAL_CORROB_PER_SOURCE == drive.CORROB_PER_SOURCE
|
|
20
|
+
assert worker._SAL_CORROB_CAP == drive.CORROB_CAP
|
|
21
|
+
assert worker._SAL_FLOOR == drive.SALIENCE_FLOOR
|
|
22
|
+
assert worker._SAL_CEIL == drive.SALIENCE_CEIL
|
|
23
|
+
assert worker._SAL_PENALTIES == drive.QUALITY_PENALTIES
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_output_matches_across_input_matrix():
|
|
27
|
+
flagsets = [
|
|
28
|
+
None, [], ["noise_name"], ["numeric_id_person"], ["hallucinated_email"],
|
|
29
|
+
["ungrounded"], ["subject_undeclared"], ["low_signal"],
|
|
30
|
+
["numeric_id_person", "hallucinated_email", "ungrounded"],
|
|
31
|
+
["noise_name"] * 5,
|
|
32
|
+
]
|
|
33
|
+
for n in (1, 2, 3, 5, 100):
|
|
34
|
+
for flags in flagsets:
|
|
35
|
+
assert worker.born_salience(n, flags) == drive.born_salience(n_sources=n, quality_flags=flags), (n, flags)
|
|
@@ -39,7 +39,7 @@ import httpx
|
|
|
39
39
|
import psycopg
|
|
40
40
|
import psycopg.rows
|
|
41
41
|
|
|
42
|
-
from confidence import corroborated_confidence
|
|
42
|
+
from confidence import born_salience, corroborated_confidence
|
|
43
43
|
from entity_id import entity_id, normalize_surface_form
|
|
44
44
|
from extraction_schema import (
|
|
45
45
|
ALLOWED_ENT_TYPES,
|
|
@@ -782,6 +782,15 @@ def _content_id(*parts: str) -> str:
|
|
|
782
782
|
return hashlib.sha256("\x1f".join(parts).encode()).hexdigest()[:32]
|
|
783
783
|
|
|
784
784
|
|
|
785
|
+
def _digit_ratio(s: str) -> float:
|
|
786
|
+
"""Fraction of non-whitespace chars that are digits. Used to flag
|
|
787
|
+
numeric-ID-as-person junk for Fusion Drive born-salience."""
|
|
788
|
+
stripped = "".join(s.split())
|
|
789
|
+
if not stripped:
|
|
790
|
+
return 0.0
|
|
791
|
+
return sum(c.isdigit() for c in stripped) / len(stripped)
|
|
792
|
+
|
|
793
|
+
|
|
785
794
|
def upsert_entities(
|
|
786
795
|
conn: psycopg.Connection,
|
|
787
796
|
arena: str,
|
|
@@ -883,12 +892,20 @@ def upsert_entities(
|
|
|
883
892
|
else:
|
|
884
893
|
# 3b. No match — insert new.
|
|
885
894
|
eid = entity_id(arena, etype, name)
|
|
895
|
+
# Fusion Drive born-salience: a numeric-ID-as-person (classic
|
|
896
|
+
# 7B junk that slips past noise_filter, e.g. "1716801984") is
|
|
897
|
+
# born near the floor so the decay pass can evict it on a short
|
|
898
|
+
# clock instead of the multi-year entity default.
|
|
899
|
+
_qflags = []
|
|
900
|
+
if etype == "person" and _digit_ratio(name) > 0.5:
|
|
901
|
+
_qflags.append("numeric_id_person")
|
|
902
|
+
_sal = born_salience(1, _qflags)
|
|
886
903
|
cur.execute(
|
|
887
904
|
"""
|
|
888
905
|
INSERT INTO entities (
|
|
889
906
|
id, arena, entity_type, canonical_name, aliases,
|
|
890
|
-
provenance_event_ids, participant_set, disclosure_class
|
|
891
|
-
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s::disclosure_class)
|
|
907
|
+
provenance_event_ids, participant_set, disclosure_class, salience
|
|
908
|
+
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s::disclosure_class, %s)
|
|
892
909
|
ON CONFLICT (id) DO UPDATE SET
|
|
893
910
|
aliases = (
|
|
894
911
|
SELECT ARRAY(SELECT DISTINCT UNNEST(entities.aliases || EXCLUDED.aliases))
|
|
@@ -896,11 +913,13 @@ def upsert_entities(
|
|
|
896
913
|
provenance_event_ids = (
|
|
897
914
|
SELECT ARRAY(SELECT DISTINCT UNNEST(entities.provenance_event_ids || EXCLUDED.provenance_event_ids))
|
|
898
915
|
),
|
|
916
|
+
-- re-corroboration can only RAISE salience, never lower it
|
|
917
|
+
salience = GREATEST(entities.salience, EXCLUDED.salience),
|
|
899
918
|
last_seen = NOW()
|
|
900
919
|
""",
|
|
901
920
|
(
|
|
902
921
|
eid, arena, etype, name, aliases,
|
|
903
|
-
[event_id], participant_set, disclosure_class,
|
|
922
|
+
[event_id], participant_set, disclosure_class, _sal,
|
|
904
923
|
),
|
|
905
924
|
)
|
|
906
925
|
name_to_id[name] = eid
|
|
@@ -942,15 +961,24 @@ def upsert_facts(
|
|
|
942
961
|
continue
|
|
943
962
|
subj_name = f.get("subject")
|
|
944
963
|
obj_name = f.get("object")
|
|
964
|
+
# Fusion Drive born-salience: a fact whose subject isn't among the
|
|
965
|
+
# event's declared entities (ungrounded subject) or that's barely
|
|
966
|
+
# a sentence is born low so decay can clear it. n_sources=1 here.
|
|
967
|
+
_fflags = []
|
|
968
|
+
if subj_name and not name_to_id.get(subj_name):
|
|
969
|
+
_fflags.append("subject_undeclared")
|
|
970
|
+
if len(stmt) < 60:
|
|
971
|
+
_fflags.append("low_signal")
|
|
972
|
+
_fsal = born_salience(1, _fflags)
|
|
945
973
|
cur.execute(
|
|
946
974
|
"""
|
|
947
975
|
INSERT INTO facts (
|
|
948
976
|
id, arena, category, subject_entity_id, predicate,
|
|
949
977
|
object_entity_id, statement, provenance_event_ids,
|
|
950
|
-
stage, confidence, participant_set, disclosure_class
|
|
978
|
+
stage, confidence, participant_set, disclosure_class, salience
|
|
951
979
|
) VALUES (
|
|
952
980
|
%s, %s, %s, %s, %s, %s, %s, %s,
|
|
953
|
-
'provisional'::extraction_stage, %s, %s, %s::disclosure_class
|
|
981
|
+
'provisional'::extraction_stage, %s, %s, %s::disclosure_class, %s
|
|
954
982
|
)
|
|
955
983
|
ON CONFLICT (id) DO UPDATE SET
|
|
956
984
|
provenance_event_ids = (
|
|
@@ -958,6 +986,7 @@ def upsert_facts(
|
|
|
958
986
|
facts.provenance_event_ids || EXCLUDED.provenance_event_ids
|
|
959
987
|
))
|
|
960
988
|
),
|
|
989
|
+
salience = GREATEST(facts.salience, EXCLUDED.salience),
|
|
961
990
|
-- Confidence bumps with each additional independent
|
|
962
991
|
-- source. The cardinality of the merged provenance
|
|
963
992
|
-- array IS the corroboration count, so the formula
|
|
@@ -990,6 +1019,7 @@ def upsert_facts(
|
|
|
990
1019
|
float(f.get("confidence") or corroborated_confidence(1)),
|
|
991
1020
|
participant_set,
|
|
992
1021
|
disclosure_class,
|
|
1022
|
+
_fsal,
|
|
993
1023
|
),
|
|
994
1024
|
)
|
|
995
1025
|
inserted += 1
|
|
File without changes
|