@pentatonic-ai/ai-agent-sdk 0.10.6 → 0.10.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/dist/index.cjs +1 -1
  2. package/dist/index.js +1 -1
  3. package/package.json +1 -1
  4. package/packages/memory-engine-v2/RFC-decay-and-fusion.md +185 -0
  5. package/packages/memory-engine-v2/RFC-fusion-drive.md +193 -0
  6. package/packages/memory-engine-v2/docker-compose.aws.yml +62 -1
  7. package/packages/memory-engine-v2/docker-compose.yml +8 -1
  8. package/packages/memory-engine-v2/extractor-async/confidence.py +37 -0
  9. package/packages/memory-engine-v2/extractor-async/test_born_salience_parity.py +35 -0
  10. package/packages/memory-engine-v2/extractor-async/test_guided_json_parser.py +44 -0
  11. package/packages/memory-engine-v2/extractor-async/worker.py +67 -7
  12. package/packages/memory-engine-v2/extractor-sync/server.py +6 -2
  13. package/packages/memory-engine-v2/extractor-sync/test_paired_extraction.py +82 -1
  14. package/packages/memory-engine-v2/fusion_drive/__init__.py +0 -0
  15. package/packages/memory-engine-v2/fusion_drive/canonical.py +94 -0
  16. package/packages/memory-engine-v2/fusion_drive/conftest.py +8 -0
  17. package/packages/memory-engine-v2/fusion_drive/merge.py +178 -0
  18. package/packages/memory-engine-v2/fusion_drive/salience.py +118 -0
  19. package/packages/memory-engine-v2/fusion_drive/test_canonical.py +76 -0
  20. package/packages/memory-engine-v2/fusion_drive/test_merge.py +112 -0
  21. package/packages/memory-engine-v2/fusion_drive/test_salience.py +93 -0
  22. package/packages/memory-engine-v2/org-model/migrations/004_source_kind_code_reference.sql +12 -0
  23. package/packages/memory-engine-v2/org-model/migrations/005_fk_indexes.sql +20 -0
  24. package/packages/memory-engine-v2/org-model/migrations/006_fusion_drive.sql +80 -0
  25. package/packages/memory-engine-v2/scripts/fusion_drive_born_salience_backfill.py +113 -0
  26. package/packages/memory-engine-v2/scripts/fusion_drive_decay.py +181 -0
  27. package/packages/memory-engine-v2/scripts/fusion_drive_fuse.py +264 -0
package/dist/index.cjs CHANGED
@@ -878,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
878
878
  }
879
879
 
880
880
  // src/telemetry.js
881
- var VERSION = "0.10.6";
881
+ var VERSION = "0.10.8";
882
882
  var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
883
883
  function machineId() {
884
884
  const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
package/dist/index.js CHANGED
@@ -847,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
847
847
  }
848
848
 
849
849
  // src/telemetry.js
850
- var VERSION = "0.10.6";
850
+ var VERSION = "0.10.8";
851
851
  var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
852
852
  function machineId() {
853
853
  const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.10.6",
3
+ "version": "0.10.8",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -0,0 +1,185 @@
1
+ # RFC: the Fusion Drive — v2 memory self-healing (cross-run node fusion + decay)
2
+
3
+ > **Fusion Drive** = the continuous, arena-scoped background engine that keeps the v2
4
+ > memory graph self-healing: it *fuses* duplicate/near-duplicate nodes from different
5
+ > distillation runs into a single master node (horizontal convergence) and *decays* stale,
6
+ > low-value, and junk nodes out of existence (vertical aging). Named for the drive that
7
+ > does the fusing — the decay pass rides the same engine.
8
+
9
+ **Status:** draft / spec — 2026-06-12
10
+ **Builds on:** `RFC-entity-reconciliation.md`, `scripts/entity_resolution_v2.py` (#82),
11
+ `org-model/migrations/002_entity_merges_audit.sql`.
12
+ **Motivated by:** the v2 store is currently **pure-accretion** — three independent
13
+ properties, all verified in code, mean nothing ever leaves or improves in place:
14
+
15
+ 1. **No supersede by source_id** — event identity is `sha256(arena:content)`; re-emitting
16
+ edited content appends a new event, the old persists.
17
+ 2. **Accrete-only graph writes** — entity/fact upserts are `ON CONFLICT (id) DO UPDATE`
18
+ that only merge aliases/provenance and bump confidence; a *corrected* extraction has a
19
+ different deterministic id, so it lands **beside** the polluted node, never replacing it.
20
+ 3. **No decay/eviction** — v2 has no GC; fact confidence only moves up; recency affects
21
+ search ranking only, never retention.
22
+
23
+ Net: improving the extractor/teacher only helps **new** content. Accumulated 7B-era
24
+ pollution (hallucinated emails, numeric-ID-as-person, ungrounded entities) is immortal.
25
+ `pentatonic-team` had to be **nuked** rather than re-distilled because of this; `pip-agents`
26
+ (87k events) still carries all of it.
27
+
28
+ This RFC makes the store **self-healing** via two complementary mechanisms:
29
+ **fusion** (horizontal — converge duplicate/near-duplicate nodes from different
30
+ distillation runs into one *master* node) and **decay** (vertical — age out stale and
31
+ low-value nodes). Both are gated, arena-scoped, audited, and reversible.
32
+
33
+ ---
34
+
35
+ ## Part A — Fusion: converge near-duplicate nodes into a master
36
+
37
+ Extends the existing entity-resolution machinery along four axes.
38
+
39
+ ### A1. Online + continuous (today it's dry-run batch)
40
+ Run fusion as a scheduled per-arena pass (systemd timer on the engine box, same pattern as
41
+ the distiller autoscaler) **and** opportunistically after a distillation run touches an
42
+ arena's entities. Keep #82's invariants: dry-run default, `--apply` gate, arena scoping,
43
+ `entity_merges` rollback. Add a `fusion_runs` ledger (arena, started_at, candidates,
44
+ merged, mode) for observability.
45
+
46
+ ### A2. Cross-distillation-run detection (the actual pollution cure)
47
+ The hard case #82 misses: 7B `"1716801984"` (numeric-ID person) and Qwen3.6 `"Katie Cooper"`
48
+ are the same real entity but share **no name similarity**, so name-blocking never compares
49
+ them. New candidate signals beyond name trigrams / embedding-on-name:
50
+
51
+ - **Shared-provenance co-reference** — two entities of the same `entity_type` citing the
52
+ same `event_id` in `provenance_event_ids`, where one is low-quality (numeric / ungrounded
53
+ / single-token). The shared event's content is the adjudication context ("does this event
54
+ support these being the same person?").
55
+ - **Context embedding** — embed the *facts/statements about* an entity (not just its name),
56
+ so name-divergent dupes still cluster. Reuses the bulk-embed lane.
57
+ - **Teacher-version signal** — provenance maps to `distillation_traces.llm_model` /
58
+ `system_prompt_hash`. Prefer the newer-teacher extraction as master; an entity *only* ever
59
+ produced by the superseded teacher and never re-confirmed by the new one is both a fusion
60
+ candidate (likely a worse rendering of a node the new teacher got right) and a decay
61
+ candidate (stale-teacher orphan — see B).
62
+
63
+ ### A3. Master-node selection — replace richest-row-wins
64
+ #82 uses "richest-row-wins", which (flagged in review) would crown the typo **"Phil Mossop"**
65
+ over **"Philip Mossop"**. Replace with a **scored** canonical pick:
66
+
67
+ | Signal | Effect |
68
+ |---|---|
69
+ | **Directory/authority anchor** (name matches an org-directory / HubSpot contact / Pip `contact_email`+`contact_name`) | dominant + → canonical |
70
+ | Grounding (name appears verbatim in a provenance event's content) | + |
71
+ | Teacher recency (newer `llm_model`) | + |
72
+ | Corroboration (`cardinality(provenance_event_ids)`) | + |
73
+ | Looks-like-ID (digit-ratio > 0.5) / hallucinated-email flag / single-token bare name | − − |
74
+
75
+ Master = highest score. Losers' surface forms become **aliases** on the master (so existing
76
+ lookups still resolve), facts/relationships are repointed, losers tombstoned in
77
+ `entity_merges` with `rollback_payload`. Directory-anchored selection is the key fix: an
78
+ authoritative source, when present, beats any heuristic.
79
+
80
+ ### A4. Fact + relationship fusion (today only entities fuse)
81
+ After entity fusion (so subject/object ids are canonical):
82
+ - **Facts** — exact `(arena, subject, predicate, object)` dupes already collapse via the
83
+ content-id. **Semantic** dupes (same assertion, different surface — "joined Acme" vs "works
84
+ at Acme") need statement-embedding similarity + LLM adjudication ("same assertion?").
85
+ Master fact = max confidence + best-grounded statement; union provenance; tombstone dupes.
86
+ New `fact_merges` audit mirroring `entity_merges`.
87
+ - **Relationships** — `(from,to,type)` already collapses; a controlled rel-type vocabulary
88
+ ("works at" ≡ "employed by") is a later optional canonicalization.
89
+
90
+ ### A5. Audit, reversibility, safety rails
91
+ Reuse `entity_merges`; add `fact_merges`. Every fusion carries `rollback_payload`.
92
+ LLM-adjudicated merges store prompt+verdict. **Disclosure rail:** never send
93
+ `disclosure_class='restricted'` rows to the LLM adjudicator (data-egress; the #82 review
94
+ item). Auto-merge only above a high confidence band; everything else → human-review queue.
95
+
96
+ ---
97
+
98
+ ## Part B — Decay: age out stale and low-value nodes
99
+
100
+ ### B1. Separate `salience` from `confidence` (important)
101
+ Do **not** decay `confidence` — it means "how corroborated/true is this", and decaying it
102
+ would lie about corroboration. Add a separate **`salience`** (retention priority) to
103
+ entities/facts/relationships. Decay acts on salience; eviction keys on salience.
104
+
105
+ `salience(t) = salience₀ · exp(−ln2 · Δt / half_life[category])`, bumped on access or
106
+ re-corroboration. Per-category half-life:
107
+
108
+ | category | half-life | rationale |
109
+ |---|---|---|
110
+ | decision, commitment | very long / ∞ | durable record |
111
+ | state, preference | medium | changes but matters |
112
+ | mention, observation | short | ephemeral |
113
+
114
+ `Δt` = time since `last_seen` **or** a new `last_accessed` (bumped when a node is returned by
115
+ `/search` — cheap write, makes retrieval keep memories alive). Re-corroboration (new
116
+ provenance) resets the clock and bumps salience.
117
+
118
+ ### B2. Born-salience — the cheap partial cure
119
+ Seed `salience₀` from extraction-quality signals already computed (the trap detectors:
120
+ ungrounded, numeric-ID-person, hallucinated-email, `noise_filter` hits). **Junk is born
121
+ low**, so it decays below threshold and self-evicts fast — pollution cleans itself even
122
+ without a fusion match.
123
+
124
+ ### B3. Eviction (GC)
125
+ Node is evictable when: `salience < min_threshold` **AND** `last_seen`/`last_accessed`
126
+ older than a floor **AND** not referenced by a surviving higher-salience node (an entity
127
+ that's the subject/object of a live fact survives). Eviction = **tombstone** (soft-delete +
128
+ retention window) → hard-delete after grace, cascading to the node's Qdrant points +
129
+ `vector_provenance`. Never evict `disclosure_class='restricted'` without sign-off.
130
+
131
+ ### B4. Capacity bound (optional)
132
+ Per-arena soft cap; when exceeded, evict lowest-salience first. Backstop against unbounded
133
+ arenas.
134
+
135
+ ### B5. Cadence + safety
136
+ Background per-arena pass (timer on the engine box), dry-run → `--apply` in a quiet window,
137
+ counts logged, fully arena-scoped. Same operational shape as the distiller autoscaler /
138
+ sparse backfill.
139
+
140
+ ---
141
+
142
+ ## Part C — Ordering & how they combine
143
+
144
+ Per arena, on schedule: **(1) fusion → (2) decay.** Fusion first so a master node absorbs
145
+ its duplicates' provenance/salience *before* decay judges it (else a real node split across
146
+ two weak dupes could wrongly decay out). Then decay ages + evicts the survivors.
147
+
148
+ **This is what finally cures immortal pollution:**
149
+ - 7B polluted node *with* a correct Qwen3.6 counterpart → **fused**, correct one as master,
150
+ polluted demoted to alias / tombstoned.
151
+ - 7B pure-junk node with *no* correct counterpart (numeric-ID-person, ungrounded) → born-low
152
+ salience + no corroboration + never accessed → **decays out and is evicted**.
153
+
154
+ Together they convert the accrete-only store into a self-healing one. `pip-agents` could
155
+ then self-clean over time instead of requiring a nuke (a nuke is still faster for a one-shot
156
+ reset, but no longer the *only* path).
157
+
158
+ ---
159
+
160
+ ## Part D — Schema changes
161
+
162
+ - `entities`: `+ salience REAL DEFAULT …`, `+ last_accessed TIMESTAMPTZ`.
163
+ - `facts`: `+ salience REAL`, `+ last_accessed TIMESTAMPTZ` (keep `confidence` as-is =
164
+ corroboration truth; `asserted_at`/`expires_at` already exist).
165
+ - `relationships`: `+ salience REAL`, `+ last_accessed` (already has `weight`,
166
+ `first/last_seen`).
167
+ - new `fact_merges` audit (mirror `entity_merges` incl. `rollback_payload`).
168
+ - new `fusion_runs` + `decay_runs` ledgers for observability.
169
+ - `/search` gains a `last_accessed = NOW()` bump on returned nodes (batched).
170
+
171
+ ## Part E — Rollout (each flag-gated, arena-scoped, dry-run-first, audited)
172
+
173
+ 1. **Salience scoring only** — add columns, born-salience + decay math, NO eviction.
174
+ Observe distributions; confirm junk scores low and durable facts stay high.
175
+ 2. **Eviction** — dry-run (count what *would* evict) → `--apply` in a quiet window.
176
+ 3. **Fusion extension** — scored canonical selection (fix typo-crowning) + cross-run
177
+ detection + fact fusion, dry-run → apply.
178
+ 4. **Online/continuous** — wire fusion+decay to run after distillation per arena.
179
+
180
+ ## Open questions
181
+ - Half-life constants per category — needs a calibration pass against real arenas.
182
+ - `last_accessed` write amplification on hot search paths — batch/throttle the bump.
183
+ - Directory authority source for canonical anchoring — HubSpot contacts? a curated table?
184
+ - Interaction with the (still-open) source_id supersede mode — fusion partly subsumes it,
185
+ but explicit supersede is cheaper for known-mutable sources.
@@ -0,0 +1,193 @@
1
+ # RFC: the Fusion Drive — v2 memory self-healing (cross-run node fusion + decay)
2
+
3
+ > **Fusion Drive** = the continuous, arena-scoped background engine that keeps the v2
4
+ > memory graph self-healing: it *fuses* duplicate/near-duplicate nodes from different
5
+ > distillation runs into a single master node (horizontal convergence) and *decays* stale,
6
+ > low-value, and junk nodes out of existence (vertical aging). Named for the drive that
7
+ > does the fusing — the decay pass rides the same engine.
8
+
9
+ **Status:** spec + initial implementation (PR #92) — 2026-06-12. Implemented: salience
10
+ scoring + decay, **eviction** (`fusion_drive_decay.py --evict`, reversible via
11
+ `node_evictions`), and **fusion** of exact + cross-run-shared-provenance entity dupes and
12
+ exact-triple fact dupes (`fusion_drive_fuse.py --apply`, reversible via `entity_merges`/
13
+ `fact_merges`), with scored directory-anchored master selection. All arena-scoped,
14
+ dry-run-default, transactional, audited. TODO (later PRs): embedding-band + LLM-adjudicated
15
+ detection (in `entity_resolution_v2.py`), semantic fact fusion, authority-table wiring for
16
+ canonical scoring, continuous scheduling, and a half-life/threshold calibration pass before
17
+ `--evict` runs in prod.
18
+ **Builds on:** `RFC-entity-reconciliation.md`, `scripts/entity_resolution_v2.py` (#82),
19
+ `org-model/migrations/002_entity_merges_audit.sql`.
20
+ **Motivated by:** the v2 store is currently **pure-accretion** — three independent
21
+ properties, all verified in code, mean nothing ever leaves or improves in place:
22
+
23
+ 1. **No supersede by source_id** — event identity is `sha256(arena:content)`; re-emitting
24
+ edited content appends a new event, the old persists.
25
+ 2. **Accrete-only graph writes** — entity/fact upserts are `ON CONFLICT (id) DO UPDATE`
26
+ that only merge aliases/provenance and bump confidence; a *corrected* extraction has a
27
+ different deterministic id, so it lands **beside** the polluted node, never replacing it.
28
+ 3. **No decay/eviction** — v2 has no GC; fact confidence only moves up; recency affects
29
+ search ranking only, never retention.
30
+
31
+ Net: improving the extractor/teacher only helps **new** content. Accumulated 7B-era
32
+ pollution (hallucinated emails, numeric-ID-as-person, ungrounded entities) is immortal.
33
+ `pentatonic-team` had to be **nuked** rather than re-distilled because of this; `pip-agents`
34
+ (87k events) still carries all of it.
35
+
36
+ This RFC makes the store **self-healing** via two complementary mechanisms:
37
+ **fusion** (horizontal — converge duplicate/near-duplicate nodes from different
38
+ distillation runs into one *master* node) and **decay** (vertical — age out stale and
39
+ low-value nodes). Both are gated, arena-scoped, audited, and reversible.
40
+
41
+ ---
42
+
43
+ ## Part A — Fusion: converge near-duplicate nodes into a master
44
+
45
+ Extends the existing entity-resolution machinery along four axes.
46
+
47
+ ### A1. Online + continuous (today it's dry-run batch)
48
+ Run fusion as a scheduled per-arena pass (systemd timer on the engine box, same pattern as
49
+ the distiller autoscaler) **and** opportunistically after a distillation run touches an
50
+ arena's entities. Keep #82's invariants: dry-run default, `--apply` gate, arena scoping,
51
+ `entity_merges` rollback. Add a `fusion_runs` ledger (arena, started_at, candidates,
52
+ merged, mode) for observability.
53
+
54
+ ### A2. Cross-distillation-run detection (the actual pollution cure)
55
+ The hard case #82 misses: 7B `"1716801984"` (numeric-ID person) and Qwen3.6 `"Katie Cooper"`
56
+ are the same real entity but share **no name similarity**, so name-blocking never compares
57
+ them. New candidate signals beyond name trigrams / embedding-on-name:
58
+
59
+ - **Shared-provenance co-reference** — two entities of the same `entity_type` citing the
60
+ same `event_id` in `provenance_event_ids`, where one is low-quality (numeric / ungrounded
61
+ / single-token). The shared event's content is the adjudication context ("does this event
62
+ support these being the same person?").
63
+ - **Context embedding** — embed the *facts/statements about* an entity (not just its name),
64
+ so name-divergent dupes still cluster. Reuses the bulk-embed lane.
65
+ - **Teacher-version signal** — provenance maps to `distillation_traces.llm_model` /
66
+ `system_prompt_hash`. Prefer the newer-teacher extraction as master; an entity *only* ever
67
+ produced by the superseded teacher and never re-confirmed by the new one is both a fusion
68
+ candidate (likely a worse rendering of a node the new teacher got right) and a decay
69
+ candidate (stale-teacher orphan — see B).
70
+
71
+ ### A3. Master-node selection — replace richest-row-wins
72
+ #82 uses "richest-row-wins", which (flagged in review) would crown the typo **"Phil Mossop"**
73
+ over **"Philip Mossop"**. Replace with a **scored** canonical pick:
74
+
75
+ | Signal | Effect |
76
+ |---|---|
77
+ | **Directory/authority anchor** (name matches an org-directory / HubSpot contact / Pip `contact_email`+`contact_name`) | dominant + → canonical |
78
+ | Grounding (name appears verbatim in a provenance event's content) | + |
79
+ | Teacher recency (newer `llm_model`) | + |
80
+ | Corroboration (`cardinality(provenance_event_ids)`) | + |
81
+ | Looks-like-ID (digit-ratio > 0.5) / hallucinated-email flag / single-token bare name | − − |
82
+
83
+ Master = highest score. Losers' surface forms become **aliases** on the master (so existing
84
+ lookups still resolve), facts/relationships are repointed, losers tombstoned in
85
+ `entity_merges` with `rollback_payload`. Directory-anchored selection is the key fix: an
86
+ authoritative source, when present, beats any heuristic.
87
+
88
+ ### A4. Fact + relationship fusion (today only entities fuse)
89
+ After entity fusion (so subject/object ids are canonical):
90
+ - **Facts** — exact `(arena, subject, predicate, object)` dupes already collapse via the
91
+ content-id. **Semantic** dupes (same assertion, different surface — "joined Acme" vs "works
92
+ at Acme") need statement-embedding similarity + LLM adjudication ("same assertion?").
93
+ Master fact = max confidence + best-grounded statement; union provenance; tombstone dupes.
94
+ New `fact_merges` audit mirroring `entity_merges`.
95
+ - **Relationships** — `(from,to,type)` already collapses; a controlled rel-type vocabulary
96
+ ("works at" ≡ "employed by") is a later optional canonicalization.
97
+
98
+ ### A5. Audit, reversibility, safety rails
99
+ Reuse `entity_merges`; add `fact_merges`. Every fusion carries `rollback_payload`.
100
+ LLM-adjudicated merges store prompt+verdict. **Disclosure rail:** never send
101
+ `disclosure_class='restricted'` rows to the LLM adjudicator (data-egress; the #82 review
102
+ item). Auto-merge only above a high confidence band; everything else → human-review queue.
103
+
104
+ ---
105
+
106
+ ## Part B — Decay: age out stale and low-value nodes
107
+
108
+ ### B1. Separate `salience` from `confidence` (important)
109
+ Do **not** decay `confidence` — it means "how corroborated/true is this", and decaying it
110
+ would lie about corroboration. Add a separate **`salience`** (retention priority) to
111
+ entities/facts/relationships. Decay acts on salience; eviction keys on salience.
112
+
113
+ `salience(t) = salience₀ · exp(−ln2 · Δt / half_life[category])`, bumped on access or
114
+ re-corroboration. Per-category half-life:
115
+
116
+ | category | half-life | rationale |
117
+ |---|---|---|
118
+ | decision, commitment | very long / ∞ | durable record |
119
+ | state, preference | medium | changes but matters |
120
+ | mention, observation | short | ephemeral |
121
+
122
+ `Δt` = time since `last_seen` **or** a new `last_accessed` (bumped when a node is returned by
123
+ `/search` — cheap write, makes retrieval keep memories alive). Re-corroboration (new
124
+ provenance) resets the clock and bumps salience.
125
+
126
+ ### B2. Born-salience — the cheap partial cure
127
+ Seed `salience₀` from extraction-quality signals already computed (the trap detectors:
128
+ ungrounded, numeric-ID-person, hallucinated-email, `noise_filter` hits). **Junk is born
129
+ low**, so it decays below threshold and self-evicts fast — pollution cleans itself even
130
+ without a fusion match.
131
+
132
+ ### B3. Eviction (GC)
133
+ Node is evictable when: `salience < min_threshold` **AND** `last_seen`/`last_accessed`
134
+ older than a floor **AND** not referenced by a surviving higher-salience node (an entity
135
+ that's the subject/object of a live fact survives). Eviction = **tombstone** (soft-delete +
136
+ retention window) → hard-delete after grace, cascading to the node's Qdrant points +
137
+ `vector_provenance`. Never evict `disclosure_class='restricted'` without sign-off.
138
+
139
+ ### B4. Capacity bound (optional)
140
+ Per-arena soft cap; when exceeded, evict lowest-salience first. Backstop against unbounded
141
+ arenas.
142
+
143
+ ### B5. Cadence + safety
144
+ Background per-arena pass (timer on the engine box), dry-run → `--apply` in a quiet window,
145
+ counts logged, fully arena-scoped. Same operational shape as the distiller autoscaler /
146
+ sparse backfill.
147
+
148
+ ---
149
+
150
+ ## Part C — Ordering & how they combine
151
+
152
+ Per arena, on schedule: **(1) fusion → (2) decay.** Fusion first so a master node absorbs
153
+ its duplicates' provenance/salience *before* decay judges it (else a real node split across
154
+ two weak dupes could wrongly decay out). Then decay ages + evicts the survivors.
155
+
156
+ **This is what finally cures immortal pollution:**
157
+ - 7B polluted node *with* a correct Qwen3.6 counterpart → **fused**, correct one as master,
158
+ polluted demoted to alias / tombstoned.
159
+ - 7B pure-junk node with *no* correct counterpart (numeric-ID-person, ungrounded) → born-low
160
+ salience + no corroboration + never accessed → **decays out and is evicted**.
161
+
162
+ Together they convert the accrete-only store into a self-healing one. `pip-agents` could
163
+ then self-clean over time instead of requiring a nuke (a nuke is still faster for a one-shot
164
+ reset, but no longer the *only* path).
165
+
166
+ ---
167
+
168
+ ## Part D — Schema changes
169
+
170
+ - `entities`: `+ salience REAL DEFAULT …`, `+ last_accessed TIMESTAMPTZ`.
171
+ - `facts`: `+ salience REAL`, `+ last_accessed TIMESTAMPTZ` (keep `confidence` as-is =
172
+ corroboration truth; `asserted_at`/`expires_at` already exist).
173
+ - `relationships`: `+ salience REAL`, `+ last_accessed` (already has `weight`,
174
+ `first/last_seen`).
175
+ - new `fact_merges` audit (mirror `entity_merges` incl. `rollback_payload`).
176
+ - new `fusion_runs` + `decay_runs` ledgers for observability.
177
+ - `/search` gains a `last_accessed = NOW()` bump on returned nodes (batched).
178
+
179
+ ## Part E — Rollout (each flag-gated, arena-scoped, dry-run-first, audited)
180
+
181
+ 1. **Salience scoring only** — add columns, born-salience + decay math, NO eviction.
182
+ Observe distributions; confirm junk scores low and durable facts stay high.
183
+ 2. **Eviction** — dry-run (count what *would* evict) → `--apply` in a quiet window.
184
+ 3. **Fusion extension** — scored canonical selection (fix typo-crowning) + cross-run
185
+ detection + fact fusion, dry-run → apply.
186
+ 4. **Online/continuous** — wire fusion+decay to run after distillation per arena.
187
+
188
+ ## Open questions
189
+ - Half-life constants per category — needs a calibration pass against real arenas.
190
+ - `last_accessed` write amplification on hot search paths — batch/throttle the bump.
191
+ - Directory authority source for canonical anchoring — HubSpot contacts? a curated table?
192
+ - Interaction with the (still-open) source_id supersede mode — fusion partly subsumes it,
193
+ but explicit supersede is cheaper for known-mutable sources.
@@ -19,6 +19,14 @@
19
19
 
20
20
  services:
21
21
  org-model:
22
+ # max_connections + shared_buffers must be passed via `-c` flags;
23
+ # the postgres:16-alpine image does NOT honor POSTGRES_MAX_CONNECTIONS
24
+ # or POSTGRES_SHARED_BUFFERS env vars (only POSTGRES_USER/PASSWORD/DB).
25
+ # 2026-05-19: bumped from compiled default 100 -> 200 after Pip's
26
+ # aborted-forget incident saturated the slots (4 stuck DELETEs +
27
+ # baseline pools). Shared_buffers raised to match the operator intent
28
+ # that was previously expressed in the unread env vars.
29
+ command: ["postgres", "-c", "max_connections=200", "-c", "shared_buffers=1GB"]
22
30
  environment:
23
31
  # Production tuning: bigger shared_buffers for the materialised
24
32
  # views, more connection slots for the extractor + compat pools.
@@ -45,8 +53,53 @@ services:
45
53
  PG_DSN: ${PME_V2_PG_DSN}
46
54
  LLM_ENDPOINT: ${PME_V2_LLM_ENDPOINT:-}
47
55
  LLM_API_KEY: ${PENTATONIC_AI_GATEWAY_KEY:-}
56
+ # Default model id for the AWS self-hosted distiller (Qwen2.5-7B-Instruct
57
+ # via vLLM on i-0d658d1aa70b497a6, served as `qwen2.5-7b-instruct`).
58
+ # When PME_V2_LLM_ENDPOINT points back at the Lambda 30B gateway,
59
+ # override LLM_MODEL via env to that gateway's model id.
60
+ LLM_MODEL: ${LLM_MODEL:-qwen2.5-7b-instruct}
61
+ # Self-hosted distiller (Qwen3.6-27B-FP8 on L40S, served via the
62
+ # autoscaled fleet). Tuning vs the Lambda 30B fleet: smaller
63
+ # per-call chunks, higher concurrency, longer timeout.
64
+ #
65
+ # EVENTS_PER_LLM_CALL=3 (was 5) + LLM_MAX_TOKENS_PER_EVENT_JSON=900
66
+ # (was the 400 default): the guided-JSON max_tokens budget is
67
+ # SHARED across the chunk's events, so dense events (full email/doc
68
+ # bodies maxing 8 ent/6 fct/6 rel ≈ ~1.1k output tokens each)
69
+ # clustering in a 5-event chunk overran the old 2000-tok ceiling
70
+ # and truncated the JSON array tail — 15% of calls finished on
71
+ # `length` not `stop` (measured 2026-06-12). 3×900=2700 output +
72
+ # ~2100 prompt = ~4.8k, well inside the L40S's 8192 max-model-len
73
+ # (16384 OOMs the L40S), giving every event real headroom.
74
+ # Quality over throughput — the autoscaler adds boxes to recover
75
+ # the per-box throughput lost to smaller chunks.
76
+ EVENTS_PER_LLM_CALL: "3"
77
+ CONCURRENT_LLM_CALLS: "20"
78
+ LLM_MAX_TOKENS_PER_EVENT_JSON: "900"
79
+ LLM_TIMEOUT_SEC: "300"
48
80
  POLL_INTERVAL_SEC: "10"
49
- CLAIM_TTL_SEC: "600"
81
+ CLAIM_TTL_SEC: "900"
82
+ POLL_INTERVAL_SEC_AFTER_EMPTY: "5"
83
+ # Skip-source list — never distil agent's-own-output, code ingest,
84
+ # orchestrator briefings, manual triage events into the graph.
85
+ # Source labels enumerated as they were observed leaking into prod
86
+ # over the weekend. New agent producers should be added here AND
87
+ # source_kind='agent' filtering should already drop them via worker.py.
88
+ DISTILL_SKIP_SOURCES: "pip-code-ingest,claude-code-plugin,openclaw-seesa,openclaw-plugin,openclaw-philip-mossop,openclaw-jamie,seesa,seesa-direct-curl-test,seesa-dedup-probe,orchestrator-web,briefing-morning,briefing-eod,triage-email,triage-manual"
89
+ # Trace logging — captures raw teacher I/O per distilled event into
90
+ # the distillation_traces table for student-model training data.
91
+ # Opt-in: defaults false here; set DISTILL_TRACE_ENABLED=true in
92
+ # SSM Parameter Store to flip on. See ai-events-sdk PR #74 for the
93
+ # worker-side logic + the migration that creates the table.
94
+ DISTILL_TRACE_ENABLED: ${DISTILL_TRACE_ENABLED:-false}
95
+ DISTILL_OUTPUT_MODE: ${DISTILL_OUTPUT_MODE:-kv}
96
+ DISTILL_GUIDED_PARAM_STYLE: ${DISTILL_GUIDED_PARAM_STYLE:-response_format}
97
+ # Chat-template switches forwarded verbatim on every completion
98
+ # (vLLM `chat_template_kwargs`). Required for thinking-capable
99
+ # teachers — Qwen3.x defaults enable_thinking=true, which burns
100
+ # the token budget on reasoning the distiller never reads. Set in
101
+ # SSM to '{"enable_thinking": false}' for the Qwen3.6 teacher.
102
+ DISTILL_CHAT_TEMPLATE_KWARGS: ${DISTILL_CHAT_TEMPLATE_KWARGS:-}
50
103
 
51
104
  compat:
52
105
  environment:
@@ -54,8 +107,15 @@ services:
54
107
  VECTOR_INDEX_URL: http://vector-index:6333
55
108
  EXTRACTOR_SYNC_URL: http://extractor-sync:8101
56
109
  NV_EMBED_URL: ${NV_EMBED_URL}
110
+ # Bulk embed lane (PR #76 ai-events-sdk) — separate box from the
111
+ # interactive lane so heavy backfills don't queue behind chat
112
+ # query embeds. Set in SSM to a different IP from NV_EMBED_URL.
113
+ NV_EMBED_URL_BULK: ${NV_EMBED_URL_BULK}
57
114
  NV_EMBED_API_KEY: ${PENTATONIC_AI_GATEWAY_KEY}
58
115
  NV_EMBED_PROVIDER: pentatonic-gateway
116
+ SEARCH_HYBRID_ENABLED: ${SEARCH_HYBRID_ENABLED:-}
117
+ SEARCH_MMR_ENABLED: ${SEARCH_MMR_ENABLED:-1}
118
+ SEARCH_INTENT_BOOST: ${SEARCH_INTENT_BOOST:-1}
59
119
  EMBED_DIM: "4096"
60
120
 
61
121
  # Cloudflared tunnel — same pattern as v1. Optional; only start if
@@ -76,3 +136,4 @@ services:
76
136
  depends_on:
77
137
  compat:
78
138
  condition: service_healthy
139
+
@@ -74,7 +74,14 @@ services:
74
74
  # --------------------------------------------------------------------
75
75
  vector-index:
76
76
  <<: *engine-base
77
- image: qdrant/qdrant:v1.12.4
77
+ # v1.18.2: minimum version whose API can ADD a named (sparse) vector
78
+ # to an existing collection (PUT /collections/{c}/vectors/{v}) —
79
+ # required by hybrid retrieval's 'lex' migration. Upgraded in prod
80
+ # 2026-06-11 by stepping minors 1.13.6→…→1.18.2 (the 1.12→1.18
81
+ # direct jump fails: segment.json "unknown variant `on_disk`").
82
+ # Do NOT lower this pin: 1.18-migrated storage cannot be read by
83
+ # older servers.
84
+ image: qdrant/qdrant:v1.18.2
78
85
  container_name: pme2-vector-index
79
86
  ports:
80
87
  - "127.0.0.1:${PME_V2_QDRANT_HTTP_PORT:-16333}:6333"
@@ -60,3 +60,40 @@ def corroborated_confidence(n_sources: int) -> float:
60
60
  if bumped > _CONF_CAP:
61
61
  return _CONF_CAP
62
62
  return round(bumped, 2)
63
+
64
+
65
+ # ── born salience (Fusion Drive) ─────────────────────────────────────
66
+ # Retention priority a node is stamped with at extraction time, SEPARATE
67
+ # from confidence (confidence = corroboration/truth; salience = how long
68
+ # it's worth keeping). Junk — flagged by the extractor's own quality
69
+ # detectors (noise name, numeric-ID-as-person, hallucinated email,
70
+ # ungrounded, etc.) — is born near the floor so the Fusion Drive decay
71
+ # pass evicts it on a short clock instead of the multi-year default.
72
+ #
73
+ # This MUST stay byte-identical to fusion_drive/salience.py:born_salience
74
+ # (the decay side uses the same scale). test_born_salience_parity.py
75
+ # guards the two against drift — same pattern as entity_id.py's parity
76
+ # test across the sync/async build contexts.
77
+ _SAL_BASE = 0.50
78
+ _SAL_CORROB_PER_SOURCE = 0.10
79
+ _SAL_CORROB_CAP = 0.30
80
+ _SAL_FLOOR = 0.01
81
+ _SAL_CEIL = 1.00
82
+ _SAL_PENALTIES = {
83
+ "noise_name": 0.45,
84
+ "numeric_id_person": 0.45,
85
+ "hallucinated_email": 0.40,
86
+ "ungrounded": 0.35,
87
+ "subject_undeclared": 0.25,
88
+ "low_signal": 0.15,
89
+ }
90
+
91
+
92
+ def born_salience(n_sources: int = 1, quality_flags: list[str] | None = None) -> float:
93
+ """Salience to stamp on a freshly extracted node. See the module note."""
94
+ s = _SAL_BASE
95
+ if n_sources > 1:
96
+ s += min(_SAL_CORROB_CAP, _SAL_CORROB_PER_SOURCE * (n_sources - 1))
97
+ for flag in quality_flags or []:
98
+ s -= _SAL_PENALTIES.get(flag, 0.0)
99
+ return round(max(_SAL_FLOOR, min(_SAL_CEIL, s)), 4)
@@ -0,0 +1,35 @@
1
+ """Parity guard: confidence.born_salience (worker, copied into the container)
2
+ must stay byte-equivalent to fusion_drive/salience.born_salience (the decay
3
+ side). Same pattern as test_entity_id_parity.py — the two live across a Docker
4
+ build-context boundary and would silently drift otherwise."""
5
+
6
+ from __future__ import annotations
7
+
8
+ import os
9
+ import sys
10
+
11
+ import confidence as worker
12
+
13
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "fusion_drive"))
14
+ import salience as drive # noqa: E402
15
+
16
+
17
+ def test_constants_match():
18
+ assert worker._SAL_BASE == drive.BASE_SALIENCE
19
+ assert worker._SAL_CORROB_PER_SOURCE == drive.CORROB_PER_SOURCE
20
+ assert worker._SAL_CORROB_CAP == drive.CORROB_CAP
21
+ assert worker._SAL_FLOOR == drive.SALIENCE_FLOOR
22
+ assert worker._SAL_CEIL == drive.SALIENCE_CEIL
23
+ assert worker._SAL_PENALTIES == drive.QUALITY_PENALTIES
24
+
25
+
26
+ def test_output_matches_across_input_matrix():
27
+ flagsets = [
28
+ None, [], ["noise_name"], ["numeric_id_person"], ["hallucinated_email"],
29
+ ["ungrounded"], ["subject_undeclared"], ["low_signal"],
30
+ ["numeric_id_person", "hallucinated_email", "ungrounded"],
31
+ ["noise_name"] * 5,
32
+ ]
33
+ for n in (1, 2, 3, 5, 100):
34
+ for flags in flagsets:
35
+ assert worker.born_salience(n, flags) == drive.born_salience(n_sources=n, quality_flags=flags), (n, flags)
@@ -409,3 +409,47 @@ def test_guided_prompt_keeps_content_rules() -> None:
409
409
  # Pipe scaffolding gone
410
410
  assert "COUNT THE PIPES" not in p
411
411
  assert "PIPE-DELIMITED" not in p
412
+
413
+
414
+ # ----------------------------------------------------------------------
415
+ # DISTILL_CHAT_TEMPLATE_KWARGS — thinking-teacher template switch
416
+ # ----------------------------------------------------------------------
417
+
418
+
419
+ def test_default_body_has_no_chat_template_kwargs(
420
+ monkeypatch: pytest.MonkeyPatch,
421
+ ) -> None:
422
+ """Unset env → the request body is byte-identical to before the
423
+ knob existed (Qwen2.5-class teachers need no template switches)."""
424
+ monkeypatch.delenv("DISTILL_CHAT_TEMPLATE_KWARGS", raising=False)
425
+ w = _load_worker("worker_no_ctk")
426
+ assert w.DISTILL_CHAT_TEMPLATE_KWARGS is None
427
+ assert "chat_template_kwargs" not in w._build_request_body("PROMPT", 5)
428
+
429
+
430
+ def test_chat_template_kwargs_forwarded(monkeypatch: pytest.MonkeyPatch) -> None:
431
+ """The Qwen3.x swap case: {"enable_thinking": false} must land
432
+ verbatim in every request body, in both output modes."""
433
+ monkeypatch.setenv("DISTILL_CHAT_TEMPLATE_KWARGS", '{"enable_thinking": false}')
434
+ w = _load_worker("worker_ctk")
435
+ assert w.DISTILL_CHAT_TEMPLATE_KWARGS == {"enable_thinking": False}
436
+ body = w._build_request_body("PROMPT", 5)
437
+ assert body["chat_template_kwargs"] == {"enable_thinking": False}
438
+
439
+ monkeypatch.setenv("DISTILL_OUTPUT_MODE", "guided_json")
440
+ w2 = _load_worker("worker_ctk_guided")
441
+ body2 = w2._build_request_body("PROMPT", 5)
442
+ assert body2["chat_template_kwargs"] == {"enable_thinking": False}
443
+ assert "response_format" in body2
444
+
445
+
446
+ def test_chat_template_kwargs_invalid_ignored(
447
+ monkeypatch: pytest.MonkeyPatch,
448
+ ) -> None:
449
+ """Malformed JSON or a non-object must not take the worker down —
450
+ log + ignore, requests stay clean."""
451
+ for bad in ("{not json", '["a", "list"]', '"a string"'):
452
+ monkeypatch.setenv("DISTILL_CHAT_TEMPLATE_KWARGS", bad)
453
+ w = _load_worker(f"worker_ctk_bad_{abs(hash(bad))}")
454
+ assert w.DISTILL_CHAT_TEMPLATE_KWARGS is None
455
+ assert "chat_template_kwargs" not in w._build_request_body("PROMPT", 5)