npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.10.8 → 0.10.10 - Mend

@pentatonic-ai/ai-agent-sdk 0.10.8 → 0.10.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/index.cjs CHANGED Viewed

@@ -878,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
 }
 // src/telemetry.js
-var VERSION = "0.10.8";
+var VERSION = "0.10.10";
 var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
 function machineId() {
   const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";

package/dist/index.js CHANGED Viewed

@@ -847,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
 }
 // src/telemetry.js
-var VERSION = "0.10.8";
+var VERSION = "0.10.10";
 var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
 function machineId() {
   const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pentatonic-ai/ai-agent-sdk",
-  "version": "0.10.8",
+  "version": "0.10.10",
   "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
   "type": "module",
   "main": "./dist/index.cjs",

package/packages/memory-engine-v2/RFC-fusion-drive.md CHANGED Viewed

@@ -6,15 +6,21 @@
 > low-value, and junk nodes out of existence (vertical aging). Named for the drive that
 > does the fusing — the decay pass rides the same engine.
-**Status:** spec + initial implementation (PR #92) — 2026-06-12. Implemented: salience
-scoring + decay, **eviction** (`fusion_drive_decay.py --evict`, reversible via
-`node_evictions`), and **fusion** of exact + cross-run-shared-provenance entity dupes and
-exact-triple fact dupes (`fusion_drive_fuse.py --apply`, reversible via `entity_merges`/
-`fact_merges`), with scored directory-anchored master selection. All arena-scoped,
-dry-run-default, transactional, audited. TODO (later PRs): embedding-band + LLM-adjudicated
-detection (in `entity_resolution_v2.py`), semantic fact fusion, authority-table wiring for
-canonical scoring, continuous scheduling, and a half-life/threshold calibration pass before
-`--evict` runs in prod.
+**Status:** spec + implementation (PR #92, then completion PR) — 2026-06-13.
+**Implemented:** salience scoring + decay; **eviction** (`fusion_drive_decay.py --evict`,
+reversible via `node_evictions`); **entity AND relationship decay**; **fusion** of exact +
+cross-run-shared-provenance entity dupes and exact-triple fact dupes, plus an **LLM
+adjudication tier via the in-VPC distiller** (Qwen3.6 — NO egress) for ambiguous cross-run
+entities and semantic (same-assertion-different-words) facts; **authority signals** wired
+into canonical scoring (`grounded` = name verbatim in a provenance event;
+`from_current_teacher` = `distillation_traces.llm_model`); **born-salience** in BOTH the
+async distiller and the sync extractor (+ backfill for existing rows); **continuous
+scheduling** (the `fusion-drive-sweep` 6h timer — dry-run-default, never `--evict` from
+cron). All arena-scoped, dry-run-default, transactional, reversible, audited.
+**Remaining:** `in_directory` anchoring (needs an authoritative directory/contacts source —
+no such table exists yet; the scorer already supports it for when one lands); and the
+**half-life / threshold / salience-constant CALIBRATION pass on a real arena before
+`--evict` is ever run in prod** — eviction stays a deliberate manual op until then.
 **Builds on:** `RFC-entity-reconciliation.md`, `scripts/entity_resolution_v2.py` (#82),
 `org-model/migrations/002_entity_merges_audit.sql`.
 **Motivated by:** the v2 store is currently **pure-accretion** — three independent

package/packages/memory-engine-v2/extractor-async/Dockerfile CHANGED Viewed

@@ -14,6 +14,6 @@ COPY worker.py .
 # add a new sibling module, add it here too — missing COPY makes the
 # container crash-loop on import at startup (observed 2026-06-08 deploy).
 # The test_*.py files are intentionally excluded; pytest only, not runtime.
-COPY noise_filter.py confidence.py entity_id.py sensitive_filter.py extraction_schema.py ./
+COPY noise_filter.py confidence.py entity_id.py sensitive_filter.py extraction_schema.py source_time.py ./
 CMD ["python", "worker.py"]

package/packages/memory-engine-v2/extractor-async/source_time.py ADDED Viewed

@@ -0,0 +1,63 @@
+"""source_time — robust ISO-8601 source-time parsing for graph stamping.
+The memory graph must stamp `events.emitted_at` and the graph rows'
+`first_seen` / `last_seen` / `asserted_at` from the SOURCE time of the
+content (when the email/meeting/message actually happened), NOT the
+ingest wall-clock (`NOW()`). The source time is carried on the event as
+`attributes.timestamp` (ISO-8601). This helper promotes it.
+Mirrors `compat/server.py:_parse_ts` (handles the bare `Z` suffix that
+`datetime.fromisoformat` only learned in 3.11) but returns a tz-aware
+`datetime` rather than a unix float, because the destination columns are
+`TIMESTAMPTZ` and we want psycopg to bind a datetime, not an epoch.
+CONTRACT (load-bearing): callers MUST fall back to the existing default
+(received / NOW) when the source time is absent or unparseable. This
+helper NEVER raises and returns `None` on anything it can't parse — the
+caller is responsible for the `or NOW()` fallback so we never NULL a
+NOT NULL column or crash the ingest/distill path.
+NOTE: keep this byte-identical with the copy in extractor-sync/. Same
+convention as entity_id.py — two services, one parsing rule.
+"""
+from __future__ import annotations
+from datetime import datetime, timezone
+from typing import Any
+def parse_source_time(value: Any) -> datetime | None:
+    """Best-effort ISO-8601 -> tz-aware datetime. Returns None on
+    anything we can't parse (caller falls back to NOW()).
+    Accepts both the bare `Z` suffix and explicit offsets. A parsed
+    value with no offset is assumed UTC (the producers emit UTC ISO
+    strings; a naive datetime would break TIMESTAMPTZ comparisons)."""
+    if not isinstance(value, str) or not value:
+        return None
+    try:
+        # `fromisoformat` handles `+00:00` but not the bare `Z` suffix
+        # until Python 3.11; normalise to be safe across runtime
+        # versions on the engine box.
+        dt = datetime.fromisoformat(value.replace("Z", "+00:00"))
+    except Exception:
+        return None
+    if dt.tzinfo is None:
+        # Producer emitted a naive ISO string; treat as UTC rather than
+        # letting psycopg interpret it in the server's local zone.
+        dt = dt.replace(tzinfo=timezone.utc)
+    return dt
+def event_source_time(event: dict[str, Any]) -> datetime | None:
+    """Pull the source time off an event dict's attributes.
+    Precedence: `attributes.timestamp` (the source/content time) wins
+    over `attributes.emitted_at` (a producer-supplied emit-now, which is
+    closer to ingest time). Returns None if neither parses — caller
+    falls back to NOW()."""
+    attrs = event.get("attributes") or {}
+    return parse_source_time(attrs.get("timestamp")) or parse_source_time(
+        attrs.get("emitted_at")
+    )

package/packages/memory-engine-v2/extractor-async/test_source_time.py ADDED Viewed

@@ -0,0 +1,102 @@
+"""Tests for source_time — promoting source event time onto graph rows.
+The contract under test: source time present and parseable → used;
+absent, empty, or garbage → returns None so the caller falls back to
+NOW() (never crashes, never NULLs a NOT NULL column).
+Run: pytest packages/memory-engine-v2/extractor-async/test_source_time.py
+"""
+from __future__ import annotations
+from datetime import datetime, timezone
+import pytest
+from source_time import event_source_time, parse_source_time
+class TestParseSourceTime:
+    def test_iso_with_z_suffix(self):
+        dt = parse_source_time("2025-03-14T09:30:00Z")
+        assert dt == datetime(2025, 3, 14, 9, 30, tzinfo=timezone.utc)
+    def test_iso_with_explicit_offset(self):
+        dt = parse_source_time("2025-03-14T09:30:00+00:00")
+        assert dt == datetime(2025, 3, 14, 9, 30, tzinfo=timezone.utc)
+    def test_iso_with_nonzero_offset_preserved(self):
+        dt = parse_source_time("2025-03-14T12:30:00+03:00")
+        # 12:30+03:00 == 09:30 UTC
+        assert dt.utcoffset().total_seconds() == 3 * 3600
+        assert dt.astimezone(timezone.utc) == datetime(
+            2025, 3, 14, 9, 30, tzinfo=timezone.utc
+        )
+    def test_naive_iso_assumed_utc(self):
+        # No offset → must NOT come back naive (would break TIMESTAMPTZ
+        # comparisons); we assume UTC.
+        dt = parse_source_time("2025-03-14T09:30:00")
+        assert dt is not None
+        assert dt.tzinfo is not None
+        assert dt == datetime(2025, 3, 14, 9, 30, tzinfo=timezone.utc)
+    # --- fallback cases: must return None, never raise ---
+    @pytest.mark.parametrize(
+        "bad",
+        [
+            None,
+            "",
+            "not-a-date",
+            "2025-13-99T99:99:99Z",  # structurally ISO-ish but invalid
+            "14/03/2025",            # wrong format
+            12345,                    # not a string
+            [],                       # not a string
+            {"timestamp": "x"},      # not a string
+        ],
+    )
+    def test_garbage_or_absent_returns_none(self, bad):
+        assert parse_source_time(bad) is None
+class TestEventSourceTime:
+    def test_prefers_timestamp_over_emitted_at(self):
+        ev = {
+            "attributes": {
+                "timestamp": "2025-01-01T00:00:00Z",     # source time
+                "emitted_at": "2025-06-01T00:00:00Z",    # producer emit-now
+            }
+        }
+        assert event_source_time(ev) == datetime(
+            2025, 1, 1, 0, 0, tzinfo=timezone.utc
+        )
+    def test_falls_back_to_emitted_at_when_no_timestamp(self):
+        ev = {"attributes": {"emitted_at": "2025-06-01T00:00:00Z"}}
+        assert event_source_time(ev) == datetime(
+            2025, 6, 1, 0, 0, tzinfo=timezone.utc
+        )
+    def test_none_when_neither_present(self):
+        assert event_source_time({"attributes": {}}) is None
+    def test_none_when_no_attributes(self):
+        # Must not crash on an event with a missing/None attributes bag.
+        assert event_source_time({}) is None
+        assert event_source_time({"attributes": None}) is None
+    def test_garbage_timestamp_falls_back_to_emitted_at(self):
+        ev = {
+            "attributes": {
+                "timestamp": "garbage",
+                "emitted_at": "2025-06-01T00:00:00Z",
+            }
+        }
+        assert event_source_time(ev) == datetime(
+            2025, 6, 1, 0, 0, tzinfo=timezone.utc
+        )
+    def test_all_garbage_returns_none(self):
+        ev = {"attributes": {"timestamp": "nope", "emitted_at": "also-nope"}}
+        assert event_source_time(ev) is None

package/packages/memory-engine-v2/extractor-async/worker.py CHANGED Viewed

@@ -33,6 +33,7 @@ import os
 import re
 import socket
 import time
+from datetime import datetime
 from typing import Any
 import httpx
@@ -41,6 +42,7 @@ import psycopg.rows
 from confidence import born_salience, corroborated_confidence
 from entity_id import entity_id, normalize_surface_form
+from source_time import event_source_time, parse_source_time
 from extraction_schema import (
     ALLOWED_ENT_TYPES,
     ALLOWED_FCT_CATEGORIES,
@@ -372,7 +374,10 @@ def build_event_block(idx: int, event: dict[str, Any]) -> str:
     src = event.get("source_kind", "unknown")
     content = clean_content(event.get("content") or "")[:MAX_CONTENT_CHARS]
     attrs = event.get("attributes") or {}
-    when = attrs.get("emitted_at") or attrs.get("timestamp")
+    # Prefer the SOURCE time (`timestamp`) over the producer's emit-now
+    # (`emitted_at`) so the LLM anchors "when" to when the content
+    # actually happened, not when it was forwarded into ingest.
+    when = attrs.get("timestamp") or attrs.get("emitted_at")
     author = attrs.get("author") or attrs.get("user_id")
     header = [f"[event {idx}]", f"source_kind: {src}"]
     if when:
@@ -798,10 +803,18 @@ def upsert_entities(
     participant_set: list[str],
     disclosure_class: str,
     entities: list[dict],
+    event_time: datetime | None,
 ) -> dict[str, str]:
     """Alias-aware insert (or merge) of entities; returns a name→id
     map so facts and relationships can link to the inserted rows.
+    `event_time` is the SOURCE time of the event being distilled (parsed
+    from `attributes.timestamp`); it stamps `first_seen`/`last_seen` so
+    the graph tracks content time, not ingest time. `None` (no/garbage
+    source time) falls back to NOW() via COALESCE — never NULLs a NOT
+    NULL column. Re-corroboration widens the window with LEAST/GREATEST
+    on the SOURCE time, so "most recent evidence" = newest source time.
     Two concerns layered together:
     1. **ID derivation** uses the shared `entity_id()` helper from
@@ -884,10 +897,13 @@ def upsert_entities(
                     UPDATE entities SET
                       aliases = ARRAY(SELECT DISTINCT UNNEST(aliases || %s::text[])),
                       provenance_event_ids = ARRAY(SELECT DISTINCT UNNEST(provenance_event_ids || %s::text[])),
-                      last_seen = NOW()
+                      -- Widen the seen-window with this event's SOURCE
+                      -- time, not NOW(): newest evidence = newest source.
+                      last_seen = GREATEST(last_seen, COALESCE(%s, NOW())),
+                      first_seen = LEAST(first_seen, COALESCE(%s, NOW()))
                     WHERE id = %s
                     """,
-                    (aliases, [event_id], eid),
+                    (aliases, [event_id], event_time, event_time, eid),
                 )
             else:
                 # 3b. No match — insert new.
@@ -904,8 +920,12 @@ def upsert_entities(
                     """
                     INSERT INTO entities (
                       id, arena, entity_type, canonical_name, aliases,
-                      provenance_event_ids, participant_set, disclosure_class, salience
-                    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s::disclosure_class, %s)
+                      provenance_event_ids, participant_set, disclosure_class, salience,
+                      first_seen, last_seen
+                    ) VALUES (
+                      %s, %s, %s, %s, %s, %s, %s, %s::disclosure_class, %s,
+                      COALESCE(%s, NOW()), COALESCE(%s, NOW())
+                    )
                     ON CONFLICT (id) DO UPDATE SET
                       aliases = (
                         SELECT ARRAY(SELECT DISTINCT UNNEST(entities.aliases || EXCLUDED.aliases))
@@ -915,11 +935,14 @@ def upsert_entities(
                       ),
                       -- re-corroboration can only RAISE salience, never lower it
                       salience = GREATEST(entities.salience, EXCLUDED.salience),
-                      last_seen = NOW()
+                      -- widen the seen-window on SOURCE time, not NOW()
+                      last_seen = GREATEST(entities.last_seen, EXCLUDED.last_seen),
+                      first_seen = LEAST(entities.first_seen, EXCLUDED.first_seen)
                     """,
                     (
                         eid, arena, etype, name, aliases,
                         [event_id], participant_set, disclosure_class, _sal,
+                        event_time, event_time,
                     ),
                 )
             name_to_id[name] = eid
@@ -934,6 +957,8 @@ def upsert_facts(
     disclosure_class: str,
     facts: list[dict],
     name_to_id: dict[str, str],
+    event_time: datetime | None,
+    due_at: datetime | None = None,
 ) -> int:
     """Facts are content-hashed on (arena, statement). Same statement
     extracted from any event in the arena converges to the same row,
@@ -950,7 +975,16 @@ def upsert_facts(
     (see confidence.py — caps at 0.9 to reserve [0.9, 1.0] for
     `stage = 'verified'` which only a human can produce). Stage stays
     `provisional`; corroboration is a signal, not a graduation.
-    """
+    `asserted_at` is stamped from the event's SOURCE time (`event_time`,
+    parsed from `attributes.timestamp`), falling back to NOW() via
+    COALESCE — so the temporal anchor is when the fact was actually
+    asserted, not when we distilled it. On corroboration it moves
+    FORWARD with GREATEST to the newest source time across all
+    corroborating events: facts have no `last_seen`, so #92's decay uses
+    `asserted_at` as the recency clock and resets it on re-corroboration
+    — order-stable regardless of distill order. `due_at` (the source
+    event's structured deadline, if any) populates `effective_until`."""
     if not facts:
         return 0
     inserted = 0
@@ -975,10 +1009,12 @@ def upsert_facts(
                 INSERT INTO facts (
                   id, arena, category, subject_entity_id, predicate,
                   object_entity_id, statement, provenance_event_ids,
-                  stage, confidence, participant_set, disclosure_class, salience
+                  stage, confidence, participant_set, disclosure_class, salience,
+                  asserted_at, effective_until
                 ) VALUES (
                   %s, %s, %s, %s, %s, %s, %s, %s,
-                  'provisional'::extraction_stage, %s, %s, %s::disclosure_class, %s
+                  'provisional'::extraction_stage, %s, %s, %s::disclosure_class, %s,
+                  COALESCE(%s, NOW()), %s
                 )
                 ON CONFLICT (id) DO UPDATE SET
                   provenance_event_ids = (
@@ -1005,7 +1041,18 @@ def upsert_facts(
                       )
                     ),
                     0.9
-                  )
+                  ),
+                  -- `asserted_at` doubles as the decay clock for facts:
+                  -- #92's fusion_drive_decay ages off
+                  -- max(last_accessed, asserted_at) and resets that
+                  -- clock on re-corroboration (facts have no `last_seen`
+                  -- of their own). So on conflict we move it FORWARD
+                  -- with GREATEST to the newest source time across all
+                  -- corroborating events — newest evidence, not oldest.
+                  -- This also makes it order-stable (independent of
+                  -- distill order). EXCLUDED.asserted_at is the
+                  -- COALESCE(event_time, NOW()) from the INSERT above.
+                  asserted_at = GREATEST(facts.asserted_at, EXCLUDED.asserted_at)
                 """,
                 (
                     _content_id(arena, stmt),
@@ -1020,6 +1067,8 @@ def upsert_facts(
                     participant_set,
                     disclosure_class,
                     _fsal,
+                    event_time,
+                    due_at,
                 ),
             )
             inserted += 1
@@ -1034,9 +1083,14 @@ def upsert_relationships(
     disclosure_class: str,
     relationships: list[dict],
     name_to_id: dict[str, str],
+    event_time: datetime | None,
 ) -> int:
     """Edge identity is (arena, from, to, type). ON CONFLICT bumps
-    weight + last_seen rather than duplicating."""
+    weight + widens the seen-window rather than duplicating.
+    `first_seen`/`last_seen` are stamped from the event's SOURCE time
+    (`event_time`), falling back to NOW() via COALESCE; re-corroboration
+    widens with LEAST/GREATEST on the source time, not ingest time."""
     if not relationships:
         return 0
     inserted = 0
@@ -1052,21 +1106,25 @@ def upsert_relationships(
                 """
                 INSERT INTO relationships (
                   id, arena, from_entity_id, to_entity_id, relationship_type,
-                  weight, provenance_event_ids, participant_set, disclosure_class
+                  weight, provenance_event_ids, participant_set, disclosure_class,
+                  first_seen, last_seen
                 ) VALUES (
-                  %s, %s, %s, %s, %s, %s, %s, %s, %s::disclosure_class
+                  %s, %s, %s, %s, %s, %s, %s, %s, %s::disclosure_class,
+                  COALESCE(%s, NOW()), COALESCE(%s, NOW())
                 )
                 ON CONFLICT (id) DO UPDATE SET
                   weight = relationships.weight + EXCLUDED.weight,
                   provenance_event_ids = (
                     SELECT ARRAY(SELECT DISTINCT UNNEST(relationships.provenance_event_ids || EXCLUDED.provenance_event_ids))
                   ),
-                  last_seen = NOW()
+                  last_seen = GREATEST(relationships.last_seen, EXCLUDED.last_seen),
+                  first_seen = LEAST(relationships.first_seen, EXCLUDED.first_seen)
                 """,
                 (
                     rid, arena, from_id, to_id, rtype,
                     float(r.get("confidence") or 0.5),
                     [event_id], participant_set, disclosure_class,
+                    event_time, event_time,
                 ),
             )
             inserted += 1
@@ -1318,7 +1376,7 @@ def fetch_event(conn: psycopg.Connection, event_id: str) -> dict[str, Any] | Non
     with conn.cursor(row_factory=psycopg.rows.dict_row) as cur:
         cur.execute(
             "SELECT id, arena, source_kind, content, attributes, participant_set, "
-            "disclosure_class FROM events WHERE id = %s",
+            "disclosure_class, emitted_at FROM events WHERE id = %s",
             (event_id,),
         )
         return cur.fetchone()
@@ -1476,16 +1534,31 @@ async def process_batch(
             arena = event["arena"]
             participant_set = event.get("participant_set") or [arena]
             disclosure = event.get("disclosure_class") or "private"
+            # SOURCE time of this event: prefer the parsed
+            # `attributes.timestamp` (canonical), falling back to the
+            # stored `emitted_at` column (which the sync path now also
+            # stamps from source time). `None` ⇒ upserts fall back to
+            # NOW() in-SQL. NEVER crash on a bad/absent source time.
+            event_time = event_source_time(event) or event.get("emitted_at")
+            # A structured deadline on the source event, if the producer
+            # supplied one — populates facts.effective_until. Absent or
+            # unparseable ⇒ None (column stays NULL, its existing
+            # behaviour). Only `attributes.due_at` is honoured; we do NOT
+            # guess deadlines from free text here.
+            due_at = parse_source_time((event.get("attributes") or {}).get("due_at"))
             try:
                 name_to_id = upsert_entities(
-                    conn, arena, event_id, participant_set, disclosure, ents
+                    conn, arena, event_id, participant_set, disclosure, ents,
+                    event_time,
                 )
                 n_facts = upsert_facts(
                     conn, arena, event_id, participant_set, disclosure, facts, name_to_id,
+                    event_time, due_at,
                 )
                 n_rels = upsert_relationships(
                     conn, arena, event_id, participant_set, disclosure, rels, name_to_id,
+                    event_time,
                 )
                 mark_done(conn, queue_id)
                 log.info(

package/packages/memory-engine-v2/extractor-sync/Dockerfile CHANGED Viewed

@@ -5,7 +5,9 @@ WORKDIR /app
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-COPY entity_id.py server.py .
+# confidence.py is a byte-identical copy of extractor-async's (the born_salience
+# scale must match the decay side). test_born_salience_parity guards drift.
+COPY entity_id.py confidence.py source_time.py server.py .
 EXPOSE 8101
 CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8101", "--workers", "2"]

package/packages/memory-engine-v2/extractor-sync/confidence.py ADDED Viewed

@@ -0,0 +1,99 @@
+"""confidence — fact confidence promotion based on multi-source corroboration.
+Today every fact lands in org_model at confidence 0.5 / stage 'provisional'
+and never moves. Live-data audit (2026-05-25): EVERY fact across 200
+sampled rows in pentatonic-team is stuck at 0.5 — no signal of
+"how trustworthy is this?" reaches the read side.
+The right signal is **multi-source corroboration**: the same statement
+appearing in two emails AND a calendar event is meaningfully more
+trustworthy than a one-off mention in a Slack DM. The extractor
+already records `provenance_event_ids` (the list of source events
+that mention each fact), so the data needed for promotion is there
+— we just don't use it.
+Formula:
+    confidence = min(0.5 + 0.15 * (n_sources - 1), 0.9)
+Concretely:
+    1 source  →  0.50  (single mention, default)
+    2 sources →  0.65  (one corroboration)
+    3 sources →  0.80
+    4 sources →  0.90  (cap; "verified" remains human-only)
+    5+        →  0.90
+Cap at 0.9 reserves the [0.9, 1.0] range for human-verified facts
+(`stage = 'verified'`), which the extractor cannot produce on its
+own. We never bump the stage from `provisional` to `distilled` or
+`verified` from this code path — corroboration is a signal, not a
+promotion. Stage transitions stay deliberate / explicit.
+Pure module — no I/O, no deps. Importable from worker.py without
+pulling in psycopg / httpx.
+"""
+from __future__ import annotations
+# Bump-per-additional-source. Tuned so:
+#   1 → 0.50  (base)
+#   2 → 0.65
+#   3 → 0.80
+#   4 → 0.90 (cap reached)
+# Picked instead of a smooth log/sqrt because the read-side bucket
+# boundaries (UI badge colours) align cleanly with these steps.
+_CONF_PER_SOURCE = 0.15
+_CONF_BASE = 0.5
+_CONF_CAP = 0.9
+def corroborated_confidence(n_sources: int) -> float:
+    """Confidence score for a fact corroborated by `n_sources` events.
+    `n_sources <= 0` returns the base confidence — never negative,
+    never above the cap. Pure function for easy unit testing.
+    """
+    if n_sources <= 1:
+        return _CONF_BASE
+    bumped = _CONF_BASE + _CONF_PER_SOURCE * (n_sources - 1)
+    if bumped > _CONF_CAP:
+        return _CONF_CAP
+    return round(bumped, 2)
+# ── born salience (Fusion Drive) ─────────────────────────────────────
+# Retention priority a node is stamped with at extraction time, SEPARATE
+# from confidence (confidence = corroboration/truth; salience = how long
+# it's worth keeping). Junk — flagged by the extractor's own quality
+# detectors (noise name, numeric-ID-as-person, hallucinated email,
+# ungrounded, etc.) — is born near the floor so the Fusion Drive decay
+# pass evicts it on a short clock instead of the multi-year default.
+#
+# This MUST stay byte-identical to fusion_drive/salience.py:born_salience
+# (the decay side uses the same scale). test_born_salience_parity.py
+# guards the two against drift — same pattern as entity_id.py's parity
+# test across the sync/async build contexts.
+_SAL_BASE = 0.50
+_SAL_CORROB_PER_SOURCE = 0.10
+_SAL_CORROB_CAP = 0.30
+_SAL_FLOOR = 0.01
+_SAL_CEIL = 1.00
+_SAL_PENALTIES = {
+    "noise_name": 0.45,
+    "numeric_id_person": 0.45,
+    "hallucinated_email": 0.40,
+    "ungrounded": 0.35,
+    "subject_undeclared": 0.25,
+    "low_signal": 0.15,
+}
+def born_salience(n_sources: int = 1, quality_flags: list[str] | None = None) -> float:
+    """Salience to stamp on a freshly extracted node. See the module note."""
+    s = _SAL_BASE
+    if n_sources > 1:
+        s += min(_SAL_CORROB_CAP, _SAL_CORROB_PER_SOURCE * (n_sources - 1))
+    for flag in quality_flags or []:
+        s -= _SAL_PENALTIES.get(flag, 0.0)
+    return round(max(_SAL_FLOOR, min(_SAL_CEIL, s)), 4)