messagefoundry 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. messagefoundry/__init__.py +108 -0
  2. messagefoundry/__main__.py +1155 -0
  3. messagefoundry/api/__init__.py +27 -0
  4. messagefoundry/api/app.py +1581 -0
  5. messagefoundry/api/approvals.py +184 -0
  6. messagefoundry/api/auth_models.py +211 -0
  7. messagefoundry/api/auth_routes.py +655 -0
  8. messagefoundry/api/field_authz.py +96 -0
  9. messagefoundry/api/models.py +374 -0
  10. messagefoundry/api/security.py +247 -0
  11. messagefoundry/api/tls.py +47 -0
  12. messagefoundry/auth/__init__.py +39 -0
  13. messagefoundry/auth/data/common_passwords.NOTICE +13 -0
  14. messagefoundry/auth/data/common_passwords.txt +10000 -0
  15. messagefoundry/auth/identity.py +71 -0
  16. messagefoundry/auth/ldap.py +264 -0
  17. messagefoundry/auth/notifications.py +68 -0
  18. messagefoundry/auth/passwords.py +53 -0
  19. messagefoundry/auth/permissions.py +120 -0
  20. messagefoundry/auth/policy.py +153 -0
  21. messagefoundry/auth/ratelimit.py +55 -0
  22. messagefoundry/auth/service.py +1323 -0
  23. messagefoundry/auth/tokens.py +26 -0
  24. messagefoundry/auth/totp.py +174 -0
  25. messagefoundry/checks.py +174 -0
  26. messagefoundry/config/__init__.py +30 -0
  27. messagefoundry/config/active_environment.py +80 -0
  28. messagefoundry/config/ai_policy.py +140 -0
  29. messagefoundry/config/code_sets.py +260 -0
  30. messagefoundry/config/connections_edit.py +200 -0
  31. messagefoundry/config/connections_file.py +287 -0
  32. messagefoundry/config/db_lookup.py +117 -0
  33. messagefoundry/config/environments.py +116 -0
  34. messagefoundry/config/ingest_time.py +83 -0
  35. messagefoundry/config/models.py +240 -0
  36. messagefoundry/config/reference.py +158 -0
  37. messagefoundry/config/response.py +83 -0
  38. messagefoundry/config/run_context.py +153 -0
  39. messagefoundry/config/settings.py +1311 -0
  40. messagefoundry/config/state.py +99 -0
  41. messagefoundry/config/tls_policy.py +110 -0
  42. messagefoundry/config/wiring.py +1918 -0
  43. messagefoundry/console/__init__.py +20 -0
  44. messagefoundry/console/__main__.py +274 -0
  45. messagefoundry/console/_async.py +107 -0
  46. messagefoundry/console/change_password.py +111 -0
  47. messagefoundry/console/client.py +552 -0
  48. messagefoundry/console/connections.py +324 -0
  49. messagefoundry/console/login.py +107 -0
  50. messagefoundry/console/mfa.py +205 -0
  51. messagefoundry/console/reauth.py +94 -0
  52. messagefoundry/console/search.py +57 -0
  53. messagefoundry/console/service_control.py +137 -0
  54. messagefoundry/console/sessions.py +122 -0
  55. messagefoundry/console/shell.py +410 -0
  56. messagefoundry/console/status.py +377 -0
  57. messagefoundry/console/users_page.py +282 -0
  58. messagefoundry/console/widgets.py +553 -0
  59. messagefoundry/generators/README.md +27 -0
  60. messagefoundry/generators/__init__.py +15 -0
  61. messagefoundry/generators/_core.py +589 -0
  62. messagefoundry/generators/_hl7data.py +428 -0
  63. messagefoundry/generators/adt.py +286 -0
  64. messagefoundry/generators/all_types.py +24 -0
  65. messagefoundry/generators/bar.py +28 -0
  66. messagefoundry/generators/dft.py +20 -0
  67. messagefoundry/generators/mdm.py +39 -0
  68. messagefoundry/generators/mfn.py +46 -0
  69. messagefoundry/generators/oml.py +32 -0
  70. messagefoundry/generators/orl.py +30 -0
  71. messagefoundry/generators/orm.py +23 -0
  72. messagefoundry/generators/oru.py +21 -0
  73. messagefoundry/generators/ras.py +20 -0
  74. messagefoundry/generators/rde.py +54 -0
  75. messagefoundry/generators/siu.py +64 -0
  76. messagefoundry/generators/vxu.py +20 -0
  77. messagefoundry/hl7schema.py +75 -0
  78. messagefoundry/last_resort.py +55 -0
  79. messagefoundry/logging_setup.py +332 -0
  80. messagefoundry/parsing/__init__.py +64 -0
  81. messagefoundry/parsing/consistency.py +166 -0
  82. messagefoundry/parsing/groups.py +228 -0
  83. messagefoundry/parsing/message.py +453 -0
  84. messagefoundry/parsing/peek.py +237 -0
  85. messagefoundry/parsing/split.py +120 -0
  86. messagefoundry/parsing/summary.py +46 -0
  87. messagefoundry/parsing/tree.py +128 -0
  88. messagefoundry/parsing/validate.py +95 -0
  89. messagefoundry/parsing/x12/__init__.py +46 -0
  90. messagefoundry/parsing/x12/delimiters.py +140 -0
  91. messagefoundry/parsing/x12/errors.py +30 -0
  92. messagefoundry/parsing/x12/interchange.py +232 -0
  93. messagefoundry/parsing/x12/message.py +200 -0
  94. messagefoundry/parsing/x12/peek.py +207 -0
  95. messagefoundry/pipeline/__init__.py +21 -0
  96. messagefoundry/pipeline/alert_sinks.py +486 -0
  97. messagefoundry/pipeline/alerts.py +100 -0
  98. messagefoundry/pipeline/cert_expiry.py +219 -0
  99. messagefoundry/pipeline/cluster.py +955 -0
  100. messagefoundry/pipeline/cluster_sqlserver.py +444 -0
  101. messagefoundry/pipeline/config_convergence.py +137 -0
  102. messagefoundry/pipeline/dryrun.py +450 -0
  103. messagefoundry/pipeline/engine.py +756 -0
  104. messagefoundry/pipeline/leader_tasks.py +158 -0
  105. messagefoundry/pipeline/reference_sync.py +369 -0
  106. messagefoundry/pipeline/retention.py +289 -0
  107. messagefoundry/pipeline/security_notify.py +168 -0
  108. messagefoundry/pipeline/state_convergence.py +143 -0
  109. messagefoundry/pipeline/wiring_runner.py +1722 -0
  110. messagefoundry/py.typed +0 -0
  111. messagefoundry/redaction.py +71 -0
  112. messagefoundry/scaffold.py +321 -0
  113. messagefoundry/secrets_dpapi.py +129 -0
  114. messagefoundry/store/__init__.py +46 -0
  115. messagefoundry/store/audit_tee.py +67 -0
  116. messagefoundry/store/base.py +758 -0
  117. messagefoundry/store/crypto.py +166 -0
  118. messagefoundry/store/keyprovider.py +192 -0
  119. messagefoundry/store/postgres.py +3447 -0
  120. messagefoundry/store/sqlserver.py +3014 -0
  121. messagefoundry/store/store.py +3790 -0
  122. messagefoundry/timezone.py +207 -0
  123. messagefoundry/transports/__init__.py +50 -0
  124. messagefoundry/transports/base.py +269 -0
  125. messagefoundry/transports/database.py +693 -0
  126. messagefoundry/transports/file.py +551 -0
  127. messagefoundry/transports/framing.py +164 -0
  128. messagefoundry/transports/loopback.py +53 -0
  129. messagefoundry/transports/mllp.py +644 -0
  130. messagefoundry/transports/remotefile.py +664 -0
  131. messagefoundry/transports/rest.py +281 -0
  132. messagefoundry/transports/signing.py +321 -0
  133. messagefoundry/transports/soap.py +507 -0
  134. messagefoundry/transports/tcp.py +307 -0
  135. messagefoundry/transports/timer.py +146 -0
  136. messagefoundry/transports/x12.py +323 -0
  137. messagefoundry-0.1.0.dist-info/METADATA +212 -0
  138. messagefoundry-0.1.0.dist-info/RECORD +142 -0
  139. messagefoundry-0.1.0.dist-info/WHEEL +4 -0
  140. messagefoundry-0.1.0.dist-info/entry_points.txt +2 -0
  141. messagefoundry-0.1.0.dist-info/licenses/LICENSE +662 -0
  142. messagefoundry-0.1.0.dist-info/licenses/NOTICE +27 -0
@@ -0,0 +1,3447 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2026 MessageFoundry Organization and contributors
3
+ """PostgreSQL implementation of the :class:`~messagefoundry.store.base.Store` protocol.
4
+
5
+ A **production** server-database backend with single-node parity to the SQLite
6
+ :class:`~messagefoundry.store.store.MessageStore`: it implements the **full** staged pipeline
7
+ (ingress → routed → outbound, ADR 0001 Step B; ``supports_ingest_stage = True``), the transform-state
8
+ read-through cache (ADR 0005), reference snapshots (ADR 0006), at-rest encryption/rotation (STORE-1 /
9
+ WP-5), and retention purges. It replicates every semantic of the SQLite reference — most importantly
10
+ the finalizer's ROUTED→FILTERED collapse in :meth:`_maybe_finalize_message`.
11
+
12
+ Concurrency is handled by Postgres row-locking: the staged claims use ``FOR UPDATE SKIP LOCKED`` so
13
+ independent workers don't block or double-claim (correct for a single node today, and the foundation
14
+ for multi-node leases in Track B Step 2). This phase is single-node parity — there is **no** lease
15
+ fencing yet — but per-message finalize, the audit chain, and schema init are serialized with Postgres
16
+ **advisory locks** so the four known SQL-Server-backend concurrency bugs are fixed by construction:
17
+
18
+ * **H-6** — the pool sets ``command_timeout`` so a statement actually times out (the SQL Server
19
+ backend's per-connection timeout was inert on some drivers).
20
+ * **H-7** — ``record_audit`` and ``_backfill_audit_chain`` take ``pg_advisory_xact_lock`` on the audit
21
+ chain before read-tail + insert, so concurrent writers can't fork the chain.
22
+ * **H-8** — :meth:`_maybe_finalize_message` ports the full multi-stage finalizer (not the simpler
23
+ outbound-only one) and is serialized per ``message_id`` with a per-message advisory lock, so it
24
+ re-counts on a fresh snapshot — no double-finalize; different ids never contend. A finalizer that
25
+ spans **more than one** message (``cancel_queued`` and the dead-letter sweeps) pre-acquires every
26
+ per-message lock up front in a **canonical (sorted) order** via :meth:`_lock_finalize_batch`, so two
27
+ such callers with overlapping message sets can't form a lock cycle (no multi-message deadlock). The
28
+ per-message lock only mutually-excludes finalize-vs-finalize; the direct ``messages.status`` writers
29
+ (``handoff``/``route_handoff``/``replay``/``replay_dead``) are pipeline-ordered to never overlap a
30
+ finalize for the same id (the router produces the routed rows a later transform consumes+finalizes),
31
+ so they don't take it — narrower than SQLite's global single-writer lock, but safe by ordering.
32
+ * **M-6** — every multi-statement write runs inside ``async with conn.transaction():`` (asyncpg
33
+ auto-rolls-back on exception), so a failed statement can't strand a half-open txn on a pooled
34
+ connection.
35
+
36
+ ``asyncpg`` is an **optional extra** (``pip install 'messagefoundry[postgres]'``); it's imported
37
+ lazily in :meth:`PostgresStore.open` so SQLite-only installs never touch it. Placeholders are
38
+ ``$1,$2,…`` and variable-length IN-lists use ``= ANY($n)`` (never a dynamically-built ``IN (?,?)``).
39
+ """
40
+
41
+ from __future__ import annotations
42
+
43
+ import json
44
+ import logging
45
+ import os
46
+ import socket
47
+ import time
48
+ from collections.abc import Iterable, Mapping, Sequence
49
+ from types import MappingProxyType
50
+ from typing import Any
51
+ from uuid import uuid4
52
+
53
+ from messagefoundry.config.models import RetryPolicy
54
+ from messagefoundry.config.settings import (
55
+ INSECURE_TLS_ESCAPE_ENV,
56
+ StoreSettings,
57
+ insecure_tls_allowed,
58
+ )
59
+ from messagefoundry.redaction import safe_text
60
+ from messagefoundry.store.audit_tee import emit_audit_tee
61
+ from messagefoundry.store.base import Row
62
+ from messagefoundry.store.crypto import PREFIX as _ENC_PREFIX
63
+ from messagefoundry.store.crypto import AesGcmCipher, Cipher, CipherError, IdentityCipher
64
+ from messagefoundry.store.store import (
65
+ ConnectionMetrics,
66
+ DbStatus,
67
+ DestinationMetrics,
68
+ InboundMetrics,
69
+ CapturedResponse,
70
+ MessageStatus,
71
+ MessageStore,
72
+ OutboxItem,
73
+ OutboxStatus,
74
+ SessionRecord,
75
+ Stage,
76
+ UserRecord,
77
+ audit_row_hash,
78
+ )
79
+
80
+ log = logging.getLogger(__name__)
81
+
82
+ # Advisory-lock keys passed to the TWO-key pg_advisory_xact_lock(classid, hashtext($key)). They
83
+ # serialize the audit-chain append (H-7) and schema init across concurrent opens; the finalize lock is
84
+ # per-message (its key is built from the message id) so different messages never contend (H-8). A
85
+ # distinct integer ``classid`` per family partitions the 32-bit hashtext space, so an audit/schema key
86
+ # can never hash-collide with a finalize key (only same-family same-hash keys can still collide, which
87
+ # is acceptable). The key strings are namespaced by db_schema at runtime (see ``_lock_key``) so two
88
+ # deployments sharing one database via different schemas don't share lock identity.
89
+ _LOCK_CLASS_AUDIT = 1
90
+ _LOCK_CLASS_SCHEMA = 2
91
+ _LOCK_CLASS_FINALIZE = 3
92
+ _AUDIT_LOCK = "mefor_audit_chain"
93
+ _SCHEMA_LOCK = "mefor_schema_init"
94
+ _FINALIZE_LOCK_PREFIX = "mefor_finalize:"
95
+
96
+ # Schema (PostgreSQL). All DDL is `IF NOT EXISTS`, run once under the schema advisory lock so
97
+ # concurrent opens don't race on CREATE. Epoch timestamps are DOUBLE PRECISION; ids TEXT (uuid4 hex);
98
+ # bodies/PHI columns TEXT; booleans BOOLEAN; auto-ids BIGSERIAL. The `queue.seq` BIGSERIAL is the FIFO
99
+ # tiebreak that replaces SQLite's implicit rowid (same-transaction inserts get increasing seq, so
100
+ # handler-list order survives ORDER BY created_at, seq).
101
+ _SCHEMA: list[str] = [
102
+ """CREATE TABLE IF NOT EXISTS messages (
103
+ id TEXT PRIMARY KEY,
104
+ channel_id TEXT NOT NULL,
105
+ received_at DOUBLE PRECISION NOT NULL,
106
+ source_type TEXT,
107
+ control_id TEXT,
108
+ message_type TEXT,
109
+ raw TEXT NOT NULL,
110
+ status TEXT NOT NULL,
111
+ error TEXT,
112
+ summary TEXT,
113
+ metadata TEXT
114
+ )""",
115
+ "CREATE INDEX IF NOT EXISTS ix_messages_channel ON messages(channel_id, received_at)",
116
+ "CREATE INDEX IF NOT EXISTS ix_messages_control ON messages(channel_id, control_id)",
117
+ """CREATE TABLE IF NOT EXISTS queue (
118
+ id TEXT PRIMARY KEY,
119
+ message_id TEXT NOT NULL REFERENCES messages(id),
120
+ stage TEXT NOT NULL,
121
+ channel_id TEXT NOT NULL,
122
+ destination_name TEXT,
123
+ handler_name TEXT,
124
+ payload TEXT NOT NULL,
125
+ status TEXT NOT NULL,
126
+ attempts INTEGER NOT NULL DEFAULT 0,
127
+ next_attempt_at DOUBLE PRECISION NOT NULL,
128
+ last_error TEXT,
129
+ created_at DOUBLE PRECISION NOT NULL,
130
+ updated_at DOUBLE PRECISION NOT NULL,
131
+ seq BIGSERIAL,
132
+ owner TEXT,
133
+ lease_expires_at DOUBLE PRECISION
134
+ )""",
135
+ # Track B Step 2: additive multi-node row-lease columns. NULL while a row is pending/terminal; set
136
+ # only while inflight (owner = the claiming store instance, lease_expires_at = claim time + TTL).
137
+ # CREATE TABLE above declares them on a fresh DB; a pre-existing Step-1 `queue` table is migrated
138
+ # by the guarded one-shot ADD COLUMN in _migrate_lease_columns (NOT a per-open ALTER, which would
139
+ # take ACCESS EXCLUSIVE on `queue` every startup).
140
+ "CREATE INDEX IF NOT EXISTS ix_queue_ready ON queue(stage, status, next_attempt_at)",
141
+ "CREATE INDEX IF NOT EXISTS ix_queue_fifo_out"
142
+ " ON queue(stage, destination_name, status, created_at, seq)",
143
+ "CREATE INDEX IF NOT EXISTS ix_queue_fifo_in"
144
+ " ON queue(stage, channel_id, status, created_at, seq)",
145
+ "CREATE INDEX IF NOT EXISTS ix_queue_message ON queue(message_id)",
146
+ # ix_queue_lease (the reclaim sweep's index) is created in _migrate_lease_columns, AFTER the lease
147
+ # columns are guaranteed present — on a Step-1 table the index references a not-yet-added column.
148
+ """CREATE TABLE IF NOT EXISTS message_events (
149
+ id BIGSERIAL PRIMARY KEY,
150
+ message_id TEXT NOT NULL REFERENCES messages(id),
151
+ ts DOUBLE PRECISION NOT NULL,
152
+ event TEXT NOT NULL,
153
+ destination TEXT,
154
+ detail TEXT
155
+ )""",
156
+ "CREATE INDEX IF NOT EXISTS ix_events_message ON message_events(message_id, ts)",
157
+ """CREATE TABLE IF NOT EXISTS state (
158
+ namespace TEXT NOT NULL,
159
+ key TEXT NOT NULL,
160
+ value TEXT NOT NULL,
161
+ set_at DOUBLE PRECISION NOT NULL,
162
+ message_id TEXT,
163
+ PRIMARY KEY (namespace, key)
164
+ )""",
165
+ "CREATE INDEX IF NOT EXISTS ix_state_set_at ON state(set_at)",
166
+ """CREATE TABLE IF NOT EXISTS reference (
167
+ name TEXT NOT NULL,
168
+ version TEXT NOT NULL,
169
+ key TEXT NOT NULL,
170
+ value TEXT NOT NULL,
171
+ PRIMARY KEY (name, version, key)
172
+ )""",
173
+ "CREATE INDEX IF NOT EXISTS ix_reference_name ON reference(name)",
174
+ """CREATE TABLE IF NOT EXISTS reference_version (
175
+ name TEXT PRIMARY KEY,
176
+ version TEXT NOT NULL,
177
+ synced_at DOUBLE PRECISION NOT NULL,
178
+ row_count INTEGER NOT NULL
179
+ )""",
180
+ # Captured request/response replies (ADR 0013) — immutable artifact, NOT a queue stage, so it is
181
+ # invisible to _maybe_finalize_message's `FROM queue` scan. response_seq is replay-stable (1+MAX,
182
+ # not queue.attempts which replay resets). body/detail are cipher-encrypted at rest (PHI).
183
+ """CREATE TABLE IF NOT EXISTS response (
184
+ message_id TEXT NOT NULL REFERENCES messages(id),
185
+ destination_name TEXT NOT NULL,
186
+ response_seq INTEGER NOT NULL,
187
+ body TEXT,
188
+ outcome TEXT NOT NULL,
189
+ detail TEXT,
190
+ captured_at DOUBLE PRECISION NOT NULL,
191
+ PRIMARY KEY (message_id, destination_name, response_seq)
192
+ )""",
193
+ "CREATE INDEX IF NOT EXISTS ix_response_message ON response(message_id)",
194
+ """CREATE TABLE IF NOT EXISTS audit_log (
195
+ id BIGSERIAL PRIMARY KEY,
196
+ ts DOUBLE PRECISION NOT NULL,
197
+ actor TEXT,
198
+ action TEXT NOT NULL,
199
+ channel_id TEXT,
200
+ detail TEXT,
201
+ row_hash TEXT
202
+ )""",
203
+ "CREATE INDEX IF NOT EXISTS ix_audit_ts ON audit_log(ts)",
204
+ """CREATE TABLE IF NOT EXISTS pending_approvals (
205
+ id TEXT PRIMARY KEY,
206
+ operation TEXT NOT NULL,
207
+ params TEXT NOT NULL,
208
+ requester TEXT NOT NULL,
209
+ requested_at DOUBLE PRECISION NOT NULL,
210
+ status TEXT NOT NULL DEFAULT 'pending',
211
+ approver TEXT,
212
+ decided_at DOUBLE PRECISION,
213
+ expires_at DOUBLE PRECISION
214
+ )""",
215
+ "CREATE INDEX IF NOT EXISTS ix_pending_approvals_status"
216
+ " ON pending_approvals(status, requested_at)",
217
+ """CREATE TABLE IF NOT EXISTS users (
218
+ id TEXT PRIMARY KEY,
219
+ username TEXT NOT NULL UNIQUE,
220
+ auth_provider TEXT NOT NULL,
221
+ display_name TEXT,
222
+ email TEXT,
223
+ disabled BOOLEAN NOT NULL DEFAULT FALSE,
224
+ created_at DOUBLE PRECISION NOT NULL,
225
+ updated_at DOUBLE PRECISION NOT NULL,
226
+ last_login_at DOUBLE PRECISION,
227
+ password_hash TEXT,
228
+ password_changed_at DOUBLE PRECISION,
229
+ must_change_password BOOLEAN NOT NULL DEFAULT FALSE,
230
+ failed_attempts INTEGER NOT NULL DEFAULT 0,
231
+ locked_until DOUBLE PRECISION,
232
+ channel_scope TEXT,
233
+ totp_secret TEXT,
234
+ totp_enabled BOOLEAN NOT NULL DEFAULT FALSE,
235
+ totp_enrolled_at DOUBLE PRECISION,
236
+ totp_recovery_codes TEXT,
237
+ last_totp_step INTEGER
238
+ )""",
239
+ """CREATE TABLE IF NOT EXISTS roles (
240
+ id TEXT PRIMARY KEY,
241
+ display_name TEXT NOT NULL,
242
+ description TEXT,
243
+ builtin BOOLEAN NOT NULL DEFAULT TRUE
244
+ )""",
245
+ """CREATE TABLE IF NOT EXISTS user_roles (
246
+ user_id TEXT NOT NULL REFERENCES users(id),
247
+ role_id TEXT NOT NULL REFERENCES roles(id),
248
+ assigned_at DOUBLE PRECISION NOT NULL,
249
+ assigned_by TEXT,
250
+ PRIMARY KEY (user_id, role_id)
251
+ )""",
252
+ """CREATE TABLE IF NOT EXISTS ad_group_role_map (
253
+ ad_group TEXT NOT NULL,
254
+ role_id TEXT NOT NULL REFERENCES roles(id),
255
+ PRIMARY KEY (ad_group, role_id)
256
+ )""",
257
+ """CREATE TABLE IF NOT EXISTS ad_group_scope_map (
258
+ ad_group TEXT NOT NULL,
259
+ channel TEXT NOT NULL,
260
+ PRIMARY KEY (ad_group, channel)
261
+ )""",
262
+ """CREATE TABLE IF NOT EXISTS sessions (
263
+ token_hash TEXT PRIMARY KEY,
264
+ user_id TEXT NOT NULL REFERENCES users(id),
265
+ created_at DOUBLE PRECISION NOT NULL,
266
+ expires_at DOUBLE PRECISION NOT NULL,
267
+ last_used_at DOUBLE PRECISION NOT NULL,
268
+ revoked_at DOUBLE PRECISION,
269
+ client TEXT,
270
+ reauth_at DOUBLE PRECISION,
271
+ mfa_verified_at DOUBLE PRECISION
272
+ )""",
273
+ "CREATE INDEX IF NOT EXISTS ix_sessions_user ON sessions(user_id)",
274
+ "CREATE INDEX IF NOT EXISTS ix_sessions_expires ON sessions(expires_at)",
275
+ # Track B Step 5: per-lane FIFO ownership. A FIFO lane (one stage+name) is owned by exactly ONE
276
+ # node at a time; only the owner may claim that lane's rows, so head-of-line blocking restores
277
+ # strict per-lane order ACROSS nodes (without ownership the FOR UPDATE SKIP LOCKED head claim lets a
278
+ # second node skip a sibling's locked head and claim row 2 ahead of row 1). The lane key is
279
+ # f"{stage}:{name}" (stage + destination_name|channel_id); the PK on `lane` covers every lookup.
280
+ # Acquire/renew/take-over-on-expiry happens ATOMICALLY in the same txn as the head claim
281
+ # (claim_next_fifo, owner != None), so there is ZERO reorder window. CREATE TABLE IF NOT EXISTS is
282
+ # safe on a fresh OR existing DB (no ALTER/migration) and lands in db_schema via the pool's
283
+ # search_path like every other table.
284
+ """CREATE TABLE IF NOT EXISTS lane_leases (
285
+ lane TEXT PRIMARY KEY,
286
+ owner TEXT NOT NULL,
287
+ lease_expires_at DOUBLE PRECISION NOT NULL
288
+ )""",
289
+ # Track B Step 6: the cluster-wide CONFIG-RELOAD version token. A single-row table (id always 1)
290
+ # holding a monotonically-increasing config_version: an operator reload on one node bumps it, and
291
+ # every other node's config-convergence loop sees the higher version and reloads its OWN (identically
292
+ # deployed) config dir to converge. The DbCoordinator owns it (read/cache/bump); it lands in
293
+ # db_schema via the pool's search_path like every other table. CREATE TABLE IF NOT EXISTS is safe on
294
+ # a fresh OR existing DB (no ALTER/migration).
295
+ """CREATE TABLE IF NOT EXISTS cluster_config (
296
+ id INTEGER PRIMARY KEY,
297
+ config_version BIGINT NOT NULL,
298
+ updated_at DOUBLE PRECISION NOT NULL
299
+ )""",
300
+ # Track B Step 6b: the per-namespace transform-STATE version token (mirrors reference_version's role
301
+ # for reference sets). A clustered node's transform_handoff bumps a namespace's version in the SAME
302
+ # txn as its state writes; every other node's state-convergence loop sees the higher version and
303
+ # read-throughs that whole namespace's rows from the shared state table into its OWN _state_cache. So
304
+ # a sibling's transform-state write reaches every node without each node re-reading the state table.
305
+ # Single-node never bumps (the gate is off), so this table stays empty and behaviour is byte-identical.
306
+ # CREATE TABLE IF NOT EXISTS is safe on a fresh OR existing DB (no ALTER/migration) and lands in
307
+ # db_schema via the pool's search_path like every other table.
308
+ """CREATE TABLE IF NOT EXISTS state_version (
309
+ namespace TEXT PRIMARY KEY,
310
+ version BIGINT NOT NULL,
311
+ updated_at DOUBLE PRECISION NOT NULL
312
+ )""",
313
+ ]
314
+
315
+
316
+ def _build_ssl(settings: StoreSettings) -> Any:
317
+ """Build the asyncpg ``ssl`` arg from the store settings, mirroring the SQL Server backend's
318
+ refuse-weakened-TLS logic (ASVS 12.3.2).
319
+
320
+ A weakened posture (``trust_server_certificate=true`` or ``encrypt=false``) is MITM-able, so it
321
+ **refuses** unless the explicit ``MEFOR_ALLOW_INSECURE_TLS`` dev escape is set — it can't be
322
+ silently turned on in production. Returns the ``ssl`` value to pass to ``asyncpg.create_pool``:
323
+ ``False`` (no TLS) only under the escape with ``encrypt=false``; an SSLContext that skips cert
324
+ verification under the escape with ``trust_server_certificate=true``; otherwise a default
325
+ verifying SSLContext (``True``)."""
326
+ if (settings.trust_server_certificate or not settings.encrypt) and not insecure_tls_allowed():
327
+ raise ValueError(
328
+ "Postgres TLS is weakened (trust_server_certificate=true or encrypt=false), which is "
329
+ f"MITM-able. Use a trusted server certificate, or set {INSECURE_TLS_ESCAPE_ENV}=1 to "
330
+ "explicitly allow it for a trusted-network dev/test bind."
331
+ )
332
+ if not settings.encrypt:
333
+ return False # escape set (checked above) — plaintext connection, dev/test only
334
+ if settings.trust_server_certificate:
335
+ import ssl as _ssl
336
+
337
+ # escape set (checked above) — encrypt but skip server-cert verification (trusted-net dev).
338
+ ctx = _ssl.create_default_context()
339
+ ctx.check_hostname = False
340
+ ctx.verify_mode = _ssl.CERT_NONE
341
+ return ctx
342
+ return True # verifying TLS (the secure default)
343
+
344
+
345
+ class PostgresStore:
346
+ """PostgreSQL-backed durable queue (the :class:`Store` protocol). Open with :meth:`open`."""
347
+
348
+ # Postgres implements the full staged ingress pipeline (enqueue_ingress/route_handoff/
349
+ # transform_handoff), so the engine starts the staged runner on this backend.
350
+ supports_ingest_stage = True
351
+
352
+ # Postgres implements request/response capture (ADR 0013: the `response` table +
353
+ # complete_with_response), with the same single-transaction atomicity as SQLite.
354
+ supports_response_capture = True
355
+
356
+ #: Every (table, column) the store cipher covers — raw bodies plus the PHI-bearing nullable text
357
+ #: columns (error/last_error/detail). Used by the on-open migration and rotate-key (mirrors
358
+ #: MessageStore._CIPHER_COLUMNS).
359
+ _CIPHER_COLUMNS = (
360
+ ("messages", "raw"),
361
+ ("queue", "payload"),
362
+ ("messages", "error"),
363
+ ("queue", "last_error"),
364
+ ("message_events", "detail"),
365
+ ("users", "totp_secret"), # MFA secret (WP-14) — id-keyed, rides the migration + rotation
366
+ # NB: the `response` table (ADR 0013) is cipher-covered (body, detail) but has a COMPOSITE PK,
367
+ # so it rides the composite helpers below, not this id-keyed list (like state/reference).
368
+ )
369
+
370
+ def __init__(self, pool: Any, settings: StoreSettings, *, cipher: Cipher | None = None) -> None:
371
+ self._pool = pool
372
+ self._settings = settings
373
+ self._cipher: Cipher = cipher or IdentityCipher()
374
+ self.path = f"{settings.server}/{settings.database}" # descriptor for db_status
375
+ # Track B Step 2: the identity stamped on a row's lease when THIS instance claims it (host:pid
376
+ # + a short random suffix so two stores in one process still differ). renew_leases/
377
+ # reclaim_expired_leases use it to extend only our own leases and to never steal a live
378
+ # sibling's in-flight row.
379
+ self._owner = f"{socket.gethostname()}:{os.getpid()}:{uuid4().hex[:8]}"
380
+ # Read-through caches (loaded at open; updated only after the owning txn commits) — mirror the
381
+ # SQLite store's _state_cache / _reference_cache so a Handler's synchronous state_get(...) /
382
+ # reference("name").get(key) resolves.
383
+ self._state_cache: dict[tuple[str, str], Any] = {}
384
+ self._reference_cache: dict[str, dict[str, Any]] = {}
385
+ # The active reference VERSION currently reflected in _reference_cache, per set (Track B Step 6).
386
+ # converge_reference_cache() compares the shared store's authoritative active version against
387
+ # this to decide which sets a FOLLOWER must re-load (read-through), so a leader-materialized
388
+ # snapshot reaches every node without each node re-reading the external source. Populated at
389
+ # open (_load_reference_cache) and on every write_reference_snapshot.
390
+ self._reference_versions: dict[str, str] = {}
391
+ # Per-namespace version this node's _state_cache reflects (Track B Step 6b). converge_state_cache()
392
+ # compares the shared store's authoritative state_version against this to decide which namespaces a
393
+ # FOLLOWER must re-read (read-through), mirroring _reference_versions for reference sets. Seeded at
394
+ # open (_load_state_cache) and advanced on every gated transform_handoff/purge_state bump.
395
+ self._state_versions: dict[str, int] = {}
396
+ # Gates the in-txn state-version bump. The engine flips it on (enable_state_convergence) only when
397
+ # clustered, BEFORE workers start; single-node never turns it on, so no state_version rows are ever
398
+ # written and the backend stays byte-identical.
399
+ self._cluster_state_convergence: bool = False
400
+
401
+ @classmethod
402
+ async def open(
403
+ cls, settings: StoreSettings, *, cipher: Cipher | None = None
404
+ ) -> "PostgresStore":
405
+ try:
406
+ import asyncpg
407
+ except ImportError as exc: # pragma: no cover - exercised only without the extra
408
+ raise RuntimeError(
409
+ "Postgres backend requires the 'postgres' extra: "
410
+ "pip install 'messagefoundry[postgres]'"
411
+ ) from exc
412
+ server_settings = {"application_name": settings.application_name}
413
+ if settings.db_schema:
414
+ # Resolve unqualified table names against the configured schema (it must already exist).
415
+ server_settings["search_path"] = settings.db_schema
416
+ pool = await asyncpg.create_pool(
417
+ host=settings.server,
418
+ port=settings.port,
419
+ database=settings.database,
420
+ user=settings.username,
421
+ password=settings.password,
422
+ ssl=_build_ssl(settings),
423
+ min_size=1,
424
+ max_size=max(1, settings.pool_size),
425
+ timeout=settings.connect_timeout, # connection-acquire/connect timeout (seconds)
426
+ # H-6: a real per-statement timeout (>0) so a hung statement actually times out, unlike
427
+ # the SQL Server backend's inert per-connection attribute. 0 = no limit (asyncpg: None).
428
+ command_timeout=(settings.command_timeout or None),
429
+ server_settings=server_settings,
430
+ )
431
+ store = cls(pool, settings, cipher=cipher)
432
+ await store._ensure_schema()
433
+ await store._encrypt_existing_rows() # one-time PHI-at-rest migration when a key is set
434
+ await store._backfill_audit_chain() # chain any pre-existing (unhashed) audit rows
435
+ await (
436
+ store._load_state_cache()
437
+ ) # populate the in-memory state read-through cache (ADR 0005)
438
+ await store._load_reference_cache() # populate the reference-snapshot read cache (ADR 0006)
439
+ return store
440
+
441
+ async def _ensure_schema(self) -> None:
442
+ """Create the schema once, serialized across concurrent opens by a schema advisory lock so
443
+ two processes can't race the DDL (the lock auto-releases at txn end)."""
444
+ async with self._pool.acquire() as conn:
445
+ async with conn.transaction():
446
+ await self._advisory_lock(conn, _LOCK_CLASS_SCHEMA, _SCHEMA_LOCK)
447
+ for statement in _SCHEMA:
448
+ await conn.execute(statement)
449
+ await self._migrate_lease_columns(conn)
450
+
451
+ async def _migrate_lease_columns(self, conn: Any) -> None:
452
+ """Track B Step 2: add the lease columns to a pre-existing Step-1 ``queue`` table, but ONLY if
453
+ they are actually missing. ``ALTER TABLE ... ADD COLUMN IF NOT EXISTS`` is a no-op on an
454
+ already-migrated table yet still takes an ACCESS EXCLUSIVE lock on ``queue`` to inspect the
455
+ catalog — far heavier than the neighbouring ``CREATE INDEX IF NOT EXISTS`` and run on EVERY
456
+ open. Gating on ``information_schema`` keeps re-opens of an already-migrated DB lock-free
457
+ (this runs under the schema advisory lock alongside the CREATEs). Also creates the lease
458
+ index here — AFTER the columns are guaranteed present — since on a Step-1 table the index
459
+ would reference a not-yet-added column if it lived in the _SCHEMA loop."""
460
+ present = {
461
+ r["column_name"]
462
+ for r in await conn.fetch(
463
+ "SELECT column_name FROM information_schema.columns"
464
+ " WHERE table_name='queue' AND column_name = ANY($1::text[])",
465
+ ["owner", "lease_expires_at"],
466
+ )
467
+ }
468
+ if "owner" not in present:
469
+ await conn.execute("ALTER TABLE queue ADD COLUMN owner TEXT")
470
+ if "lease_expires_at" not in present:
471
+ await conn.execute("ALTER TABLE queue ADD COLUMN lease_expires_at DOUBLE PRECISION")
472
+ # The reclaim sweep scans inflight rows by lease expiry (reclaim_expired_leases). Partial:
473
+ # only inflight rows carry a lease, so the index needn't cover the pending/terminal majority.
474
+ await conn.execute(
475
+ "CREATE INDEX IF NOT EXISTS ix_queue_lease ON queue(lease_expires_at)"
476
+ " WHERE status='inflight'"
477
+ )
478
+ # Step-up re-verification (ASVS 7.5.3) adds sessions.reauth_at; pre-existing rows get NULL.
479
+ sessions_has_reauth = await conn.fetch(
480
+ "SELECT 1 FROM information_schema.columns"
481
+ " WHERE table_name='sessions' AND column_name='reauth_at'"
482
+ )
483
+ if not sessions_has_reauth:
484
+ await conn.execute("ALTER TABLE sessions ADD COLUMN reauth_at DOUBLE PRECISION")
485
+ # MFA (WP-14): TOTP columns on users + sessions.mfa_verified_at on a pre-existing DB. Column
486
+ # names are static literals (not user input). Idempotent: skipped once present.
487
+ users_cols = {
488
+ r["column_name"]
489
+ for r in await conn.fetch(
490
+ "SELECT column_name FROM information_schema.columns WHERE table_name='users'"
491
+ )
492
+ }
493
+ for column, decl in (
494
+ ("totp_secret", "TEXT"),
495
+ ("totp_enabled", "BOOLEAN NOT NULL DEFAULT FALSE"),
496
+ ("totp_enrolled_at", "DOUBLE PRECISION"),
497
+ ("totp_recovery_codes", "TEXT"),
498
+ ("last_totp_step", "INTEGER"),
499
+ ):
500
+ if column not in users_cols:
501
+ await conn.execute(f"ALTER TABLE users ADD COLUMN {column} {decl}")
502
+ sessions_has_mfa = await conn.fetch(
503
+ "SELECT 1 FROM information_schema.columns"
504
+ " WHERE table_name='sessions' AND column_name='mfa_verified_at'"
505
+ )
506
+ if not sessions_has_mfa:
507
+ await conn.execute("ALTER TABLE sessions ADD COLUMN mfa_verified_at DOUBLE PRECISION")
508
+
509
+ async def close(self) -> None:
510
+ await self._pool.close()
511
+
512
+ # --- PHI-at-rest cipher seam for nullable text columns (WP-5) -------------
513
+
514
+ def _enc(self, value: str | None) -> str | None:
515
+ if not value: # None or "" → leave blank (covers purged/empty values)
516
+ return value
517
+ return self._cipher.encrypt(value)
518
+
519
+ def _dec(self, value: str | None) -> str | None:
520
+ if value is None:
521
+ return value
522
+ return self._cipher.decrypt(value) # '' and legacy plaintext pass through unchanged
523
+
524
+ def _decode_record(self, record: Any, *columns: str) -> dict[str, Any]:
525
+ """Materialize an ``asyncpg.Record`` as a dict and decrypt the named cipher-covered text
526
+ columns (mirrors MessageStore._decode_row)."""
527
+ d = dict(record)
528
+ for col in columns:
529
+ if col in d:
530
+ d[col] = self._dec(d[col])
531
+ return d
532
+
533
+ # --- advisory-lock helpers -----------------------------------------------
534
+
535
+ def _lock_key(self, key: str) -> str:
536
+ """Namespace an advisory-lock key by the configured schema so two deployments sharing one
537
+ database via different ``db_schema`` values don't share lock identity (advisory locks are
538
+ database-scoped, not schema-scoped)."""
539
+ return f"{self._settings.db_schema or 'public'}:{key}"
540
+
541
+ async def _advisory_lock(self, conn: Any, classid: int, key: str) -> None:
542
+ """Take a transaction-scoped advisory lock in the ``classid`` family (auto-released at commit).
543
+ Uses the two-key form so each family has its own 32-bit hashtext namespace (no cross-family
544
+ collisions)."""
545
+ await conn.execute(
546
+ "SELECT pg_advisory_xact_lock($1, hashtext($2))", classid, self._lock_key(key)
547
+ )
548
+
549
+ async def _lock_finalize_batch(self, conn: Any, message_ids: Iterable[str]) -> None:
550
+ """Acquire the per-message finalize advisory lock for every id in a **canonical (sorted)**
551
+ order, up front, before any finalize work. A multi-message finalizer (cancel_queued, the
552
+ dead-letter sweeps) holds all its per-message xact locks until commit; acquiring them in one
553
+ deterministic order across all such callers means no two can form a lock cycle, so concurrent
554
+ multi-message finalizes can't deadlock (the per-message lock is re-entrant, so
555
+ :meth:`_maybe_finalize_message` re-taking it inside the loop is a no-op)."""
556
+ for mid in sorted(set(message_ids)):
557
+ await self._advisory_lock(conn, _LOCK_CLASS_FINALIZE, f"{_FINALIZE_LOCK_PREFIX}{mid}")
558
+
559
+ # --- pooled-statement helpers --------------------------------------------
560
+
561
+ async def _fetchall(self, sql: str, *params: Any) -> list[Any]:
562
+ return list(await self._pool.fetch(sql, *params))
563
+
564
+ async def _fetchone(self, sql: str, *params: Any) -> Any:
565
+ return await self._pool.fetchrow(sql, *params)
566
+
567
+ async def _execute(self, sql: str, *params: Any) -> None:
568
+ await self._pool.execute(sql, *params)
569
+
570
+ async def _count(self, table: str) -> int:
571
+ row = await self._pool.fetchrow(f"SELECT COUNT(*) AS n FROM {table}") # table is a constant
572
+ return int(row["n"]) if row else 0
573
+
574
+ # --- open-time loaders / migrations --------------------------------------
575
+
576
+ async def _load_state_cache(self) -> None:
577
+ """Populate the in-memory transform-state cache from the ``state`` table (ADR 0005).
578
+
579
+ Also seeds :attr:`_state_versions` from ``state_version`` (Track B Step 6b): a fresh clustered node
580
+ loads the WHOLE ``state`` table here (so it starts fully converged), and recording the per-namespace
581
+ versions means its first convergence tick won't needlessly re-read every namespace it already holds."""
582
+ rows = await self._fetchall("SELECT namespace, key, value FROM state")
583
+ cache: dict[tuple[str, str], Any] = {}
584
+ for r in rows:
585
+ cache[(r["namespace"], r["key"])] = json.loads(self._cipher.decrypt(r["value"]))
586
+ self._state_cache = cache
587
+ vrows = await self._fetchall("SELECT namespace, version FROM state_version")
588
+ self._state_versions = {r["namespace"]: int(r["version"]) for r in vrows}
589
+
590
+ async def _load_reference_cache(self) -> None:
591
+ """Populate the in-memory reference cache from the ACTIVE snapshot of each set (ADR 0006).
592
+
593
+ Drives from ``reference_version`` (the authoritative active-version list) with a LEFT JOIN so a
594
+ set synced to ZERO rows still loads as a present empty ``{}`` after a reopen. Also records each
595
+ set's active version in :attr:`_reference_versions` (Track B Step 6) so a later
596
+ :meth:`converge_reference_cache` knows which sets a follower must read-through."""
597
+ cache, versions = await self._read_active_reference_snapshots()
598
+ self._reference_cache = cache
599
+ self._reference_versions = versions
600
+
601
+ async def _read_active_reference_snapshots(
602
+ self,
603
+ ) -> tuple[dict[str, dict[str, Any]], dict[str, str]]:
604
+ """Read every set's ACTIVE snapshot (rows + version) from the shared store, decrypting values.
605
+
606
+ The shared JOIN/decrypt logic behind both the open-time :meth:`_load_reference_cache` and the
607
+ follower :meth:`converge_reference_cache`. Drives from ``reference_version`` (the authoritative
608
+ active-version list) LEFT JOIN ``reference`` so a set synced to ZERO rows is still a present
609
+ empty ``{}``. Returns ``({name: {key: value}}, {name: version})``."""
610
+ rows = await self._fetchall(
611
+ "SELECT v.name AS name, v.version AS version, r.key AS key, r.value AS value "
612
+ "FROM reference_version v "
613
+ "LEFT JOIN reference r ON r.name = v.name AND r.version = v.version"
614
+ )
615
+ cache: dict[str, dict[str, Any]] = {}
616
+ versions: dict[str, str] = {}
617
+ for r in rows:
618
+ entry = cache.setdefault(r["name"], {})
619
+ versions[r["name"]] = r["version"]
620
+ if r["key"] is not None: # NULL key = the LEFT-JOIN miss of an empty snapshot
621
+ entry[r["key"]] = json.loads(self._cipher.decrypt(r["value"]))
622
+ return cache, versions
623
+
624
+ async def converge_reference_cache(self) -> list[str]:
625
+ """Pull any newer shared reference snapshot into this node's local cache (Track B Step 6).
626
+
627
+ The FOLLOWER read-through: read the authoritative active versions from the shared store and,
628
+ for each set whose active version differs from the one this handle currently reflects, re-load
629
+ that set's rows from the shared ``reference`` table (decrypt) into :attr:`_reference_cache` —
630
+ **without** re-reading the external source. It issues a real read each call (a
631
+ ``reference_version`` JOIN ``reference`` + per-row decrypt), but mutates nothing when the
632
+ versions already match (the leader's own just-written sets). Returns the names refreshed
633
+ (``[]`` when none advanced). The runner only calls this when clustered
634
+ (``coordinator.is_clustered()``), so single-node Postgres never issues this read."""
635
+ cache, versions = await self._read_active_reference_snapshots()
636
+ refreshed: list[str] = []
637
+ for name, version in versions.items():
638
+ if self._reference_versions.get(name) != version:
639
+ self._reference_cache[name] = cache[name]
640
+ self._reference_versions[name] = version
641
+ refreshed.append(name)
642
+ return refreshed
643
+
644
+ def enable_state_convergence(self) -> None:
645
+ """Turn on per-namespace state-version bumping (Track B Step 6b). The engine calls this in a
646
+ cluster (is_clustered()) BEFORE workers start, so a sibling's converge_state_cache sees every
647
+ write. Single-node never calls it → no state_version writes → byte-identical."""
648
+ self._cluster_state_convergence = True
649
+
650
+ async def converge_state_cache(self) -> list[str]:
651
+ """Pull any newer shared transform-state writes into this node's local cache (Track B Step 6b).
652
+
653
+ FOLLOWER read-through: read the per-namespace versions, and for each namespace whose version
654
+ differs from the one this handle reflects, re-read THAT WHOLE namespace's rows from the shared
655
+ state table (decrypt) and swap them into _state_cache. Returns the namespace names refreshed.
656
+ Only called when clustered (coordinator.is_clustered()), so single-node never issues this read.
657
+
658
+ Read-skew-safe ordering: the version scan runs FIRST, then each changed namespace's rows, so the
659
+ recorded version is always ≤ the data freshness — the worst case is one harmless extra re-converge,
660
+ never a skipped write. Decrypt every changed namespace into locals BEFORE mutating the cache, so a
661
+ decrypt failure raises before any partial mutation (like :meth:`converge_reference_cache`).
662
+
663
+ Unlike reference convergence (sets are written only by the leader, so a node never converges a set
664
+ it also writes), transform state is written on EVERY node (one transform worker per inbound), so a
665
+ local ``transform_handoff`` can commit + publish a new key to a namespace we are mid-converge on.
666
+ Each pending entry therefore captures the per-node reflected version observed when its rows were
667
+ read (``seen``); if a local write advanced that version before we mutate, the read snapshot is stale
668
+ and the destructive del-then-reseed would transiently drop the just-committed local key (regressing
669
+ ``_state_versions`` below the DB), so we MERGE the read rows non-destructively instead (never
670
+ clobbering a newer local write) and leave the reflected version below the DB so the next tick does a
671
+ clean reseed that reconciles any sibling deletes the merge skipped."""
672
+ vrows = await self._fetchall("SELECT namespace, version FROM state_version")
673
+ versions = {r["namespace"]: int(r["version"]) for r in vrows}
674
+ pending: list[tuple[str, int, int | None, dict[str, Any]]] = []
675
+ for ns, version in versions.items():
676
+ seen = self._state_versions.get(ns)
677
+ if seen != version:
678
+ rows = await self._fetchall("SELECT key, value FROM state WHERE namespace=$1", ns)
679
+ fresh = {r["key"]: json.loads(self._cipher.decrypt(r["value"])) for r in rows}
680
+ pending.append((ns, version, seen, fresh))
681
+ refreshed: list[str] = []
682
+ for ns, version, seen, fresh in pending:
683
+ if self._state_versions.get(ns) == seen:
684
+ # No local write intervened during our read → safe destructive reseed (drop the
685
+ # namespace's old entries first, handling a sibling's deletes/purges, then re-seed).
686
+ for ck in [c for c in self._state_cache if c[0] == ns]:
687
+ del self._state_cache[ck]
688
+ for k, v in fresh.items():
689
+ self._state_cache[(ns, k)] = v
690
+ else:
691
+ # A local transform_handoff committed + published to THIS namespace during our read
692
+ # window, advancing the DB version past `version`. A destructive reseed from the stale
693
+ # `fresh` would drop that just-committed local key; merge non-destructively instead
694
+ # (setdefault keeps any newer local value) so the sibling rows we read still land. We
695
+ # deliberately record `version` (< the DB version the local write bumped to) so the next
696
+ # tick does a clean reseed that reconciles any sibling deletes this merge could not see.
697
+ for k, v in fresh.items():
698
+ self._state_cache.setdefault((ns, k), v)
699
+ self._state_versions[ns] = version
700
+ refreshed.append(ns)
701
+ return refreshed
702
+
703
+ async def _backfill_audit_chain(self) -> None:
704
+ """Fill ``row_hash`` for audit rows written before hash-chaining (idempotent; fills only
705
+ NULLs, chained from the prior row). H-7: takes the audit-chain advisory lock first so a
706
+ concurrent ``record_audit`` can't fork the chain while this backfills."""
707
+ async with self._pool.acquire() as conn:
708
+ async with conn.transaction():
709
+ await self._advisory_lock(conn, _LOCK_CLASS_AUDIT, _AUDIT_LOCK)
710
+ rows = await conn.fetch(
711
+ "SELECT id, ts, actor, action, channel_id, detail, row_hash FROM audit_log"
712
+ " ORDER BY id"
713
+ )
714
+ prev = ""
715
+ updates: list[tuple[str, int]] = []
716
+ for r in rows:
717
+ if r["row_hash"]:
718
+ prev = r["row_hash"]
719
+ continue
720
+ prev = audit_row_hash(
721
+ prev,
722
+ ts=r["ts"],
723
+ actor=r["actor"],
724
+ action=r["action"],
725
+ channel_id=r["channel_id"],
726
+ detail=r["detail"],
727
+ )
728
+ updates.append((prev, r["id"]))
729
+ for row_hash, rid in updates:
730
+ await conn.execute(
731
+ "UPDATE audit_log SET row_hash=$1 WHERE id=$2", row_hash, rid
732
+ )
733
+
734
+ async def _encrypt_existing_rows(self) -> None:
735
+ """Encrypt legacy plaintext values in the cipher-covered columns in place when encryption is
736
+ enabled (STORE-1 / WP-5). Idempotent + batched: skips rows already carrying the ciphertext
737
+ prefix and NULL / blank values; bounded memory (chunks of 500)."""
738
+ if not self._cipher.encrypts:
739
+ return
740
+ like = f"{_ENC_PREFIX}%"
741
+ total = 0
742
+ for table, column in self._CIPHER_COLUMNS:
743
+ while True:
744
+ rows = await self._fetchall(
745
+ f"SELECT id, {column} AS v FROM {table}"
746
+ f" WHERE {column} NOT LIKE $1 AND {column} <> '' LIMIT 500",
747
+ like,
748
+ )
749
+ if not rows:
750
+ break
751
+ async with self._pool.acquire() as conn:
752
+ async with conn.transaction():
753
+ for r in rows:
754
+ await conn.execute(
755
+ f"UPDATE {table} SET {column}=$1 WHERE id=$2",
756
+ self._cipher.encrypt(r["v"]),
757
+ r["id"],
758
+ )
759
+ total += len(rows)
760
+ total += await self._encrypt_existing_composite(
761
+ "state", ("namespace", "key"), like, encrypt=True
762
+ )
763
+ total += await self._encrypt_existing_composite(
764
+ "reference", ("name", "version", "key"), like, encrypt=True
765
+ )
766
+ # The `response` table (composite PK + TWO cipher columns — ADR 0013) migrates each column.
767
+ for col in ("body", "detail"):
768
+ total += await self._encrypt_existing_composite(
769
+ "response",
770
+ ("message_id", "destination_name", "response_seq"),
771
+ like,
772
+ encrypt=True,
773
+ value_col=col,
774
+ )
775
+ if total:
776
+ log.info("encrypted %d existing value(s) at rest", total)
777
+
778
+ async def _encrypt_existing_composite(
779
+ self,
780
+ table: str,
781
+ pk_cols: tuple[str, ...],
782
+ like: str,
783
+ *,
784
+ encrypt: bool,
785
+ value_col: str = "value",
786
+ ) -> int:
787
+ """Encrypt the ``value_col`` of a composite-PK table (``state``/``reference``/``response``) in
788
+ place — the migration loop for tables that can't ride the id-keyed loop. ``encrypt=True`` is the
789
+ on-open plaintext→active migration (this method's only caller; rotation uses
790
+ :meth:`_reencrypt_composite`). ``value_col`` defaults to ``value`` (state/reference);
791
+ ``response`` passes ``body``/``detail``."""
792
+ rotated = 0
793
+ pk_select = ", ".join(pk_cols)
794
+ while True:
795
+ rows = await self._fetchall(
796
+ f"SELECT {pk_select}, {value_col} AS v FROM {table}"
797
+ f" WHERE {value_col} NOT LIKE $1 AND {value_col} <> '' LIMIT 500",
798
+ like,
799
+ )
800
+ if not rows:
801
+ break
802
+ where = " AND ".join(f"{c}=${i + 2}" for i, c in enumerate(pk_cols))
803
+ async with self._pool.acquire() as conn:
804
+ async with conn.transaction():
805
+ for r in rows:
806
+ await conn.execute(
807
+ f"UPDATE {table} SET {value_col}=$1 WHERE {where}",
808
+ self._cipher.encrypt(r["v"]),
809
+ *[r[c] for c in pk_cols],
810
+ )
811
+ rotated += len(rows)
812
+ return rotated
813
+
814
+ # --- at-rest key rotation (PHI.md §3, ASVS 11.2.2) -----------------------
815
+
816
+ async def reencrypt_to_active(self, *, batch: int = 500) -> int:
817
+ """Re-encrypt every cipher-covered value under the **active** key — the key-rotation
818
+ re-encrypt path (run offline via ``messagefoundry rotate-key``). Rewrites plaintext or
819
+ retired-key values; skips values already under the active key (idempotent) and NULL/blank
820
+ ones. A value no configured key can decrypt raises before any UPDATE (PHI is never dropped).
821
+ Returns the number of values rewritten. Ported, not stubbed — Postgres supports rotation."""
822
+ cipher = self._cipher
823
+ if not isinstance(cipher, AesGcmCipher):
824
+ return 0 # identity cipher (no key) — nothing to rotate
825
+ active_like = f"{_ENC_PREFIX}{cipher.active_key_id}:%"
826
+ total = 0
827
+ for table, column in self._CIPHER_COLUMNS:
828
+ while True:
829
+ rows = await self._fetchall(
830
+ f"SELECT id, {column} AS v FROM {table}"
831
+ f" WHERE {column} NOT LIKE $1 AND {column} <> '' LIMIT $2",
832
+ active_like,
833
+ batch,
834
+ )
835
+ if not rows:
836
+ break
837
+ # decrypt (via the keyring) → encrypt (active) up front so a CipherError (a prior key
838
+ # not supplied) propagates before any UPDATE — the batch is all-or-nothing.
839
+ updates = [(cipher.encrypt(cipher.decrypt(r["v"])), r["id"]) for r in rows]
840
+ async with self._pool.acquire() as conn:
841
+ async with conn.transaction():
842
+ for new_value, rid in updates:
843
+ await conn.execute(
844
+ f"UPDATE {table} SET {column}=$1 WHERE id=$2", new_value, rid
845
+ )
846
+ total += len(rows)
847
+ total += await self._reencrypt_composite(
848
+ cipher, "state", ("namespace", "key"), active_like, batch
849
+ )
850
+ total += await self._reencrypt_composite(
851
+ cipher, "reference", ("name", "version", "key"), active_like, batch
852
+ )
853
+ # The `response` table (composite PK + two cipher columns — ADR 0013) rotates each column.
854
+ for col in ("body", "detail"):
855
+ total += await self._reencrypt_composite(
856
+ cipher,
857
+ "response",
858
+ ("message_id", "destination_name", "response_seq"),
859
+ active_like,
860
+ batch,
861
+ value_col=col,
862
+ )
863
+ if total:
864
+ log.info("re-encrypted %d value(s) under the active key (rotation)", total)
865
+ return total
866
+
867
+ async def _reencrypt_composite(
868
+ self,
869
+ cipher: AesGcmCipher,
870
+ table: str,
871
+ pk_cols: tuple[str, ...],
872
+ active_like: str,
873
+ batch: int,
874
+ value_col: str = "value",
875
+ ) -> int:
876
+ """Re-encrypt the ``value_col`` of a composite-PK table under the active key (the rotation
877
+ parallel of :meth:`_encrypt_existing_composite`). Decrypt→encrypt up front; a value no key can
878
+ decrypt raises before any UPDATE. ``value_col`` defaults to ``value``; ``response`` rotates
879
+ ``body``/``detail``."""
880
+ rotated = 0
881
+ pk_select = ", ".join(pk_cols)
882
+ where = " AND ".join(f"{c}=${i + 2}" for i, c in enumerate(pk_cols))
883
+ while True:
884
+ rows = await self._fetchall(
885
+ f"SELECT {pk_select}, {value_col} AS v FROM {table}"
886
+ f" WHERE {value_col} NOT LIKE $1 AND {value_col} <> '' LIMIT $2",
887
+ active_like,
888
+ batch,
889
+ )
890
+ if not rows:
891
+ break
892
+ updates = [
893
+ (cipher.encrypt(cipher.decrypt(r["v"])), [r[c] for c in pk_cols]) for r in rows
894
+ ]
895
+ async with self._pool.acquire() as conn:
896
+ async with conn.transaction():
897
+ for new_value, pk_vals in updates:
898
+ await conn.execute(
899
+ f"UPDATE {table} SET {value_col}=$1 WHERE {where}", new_value, *pk_vals
900
+ )
901
+ rotated += len(rows)
902
+ return rotated
903
+
904
+ # --- internal write helpers ----------------------------------------------
905
+
906
+ async def _event(
907
+ self,
908
+ conn: Any,
909
+ message_id: str,
910
+ event: str,
911
+ destination: str | None,
912
+ detail: str | None,
913
+ now: float,
914
+ ) -> None:
915
+ """Append a ``message_events`` row, encrypting ``detail`` here so the cipher boundary lives in
916
+ ONE place (mirrors MessageStore._event; ``detail`` is a declared cipher column). Callers pass
917
+ plaintext — never pre-wrap with ``_enc``."""
918
+ detail = safe_text(detail) if detail else detail # PHI chokepoint (#120)
919
+ await conn.execute(
920
+ "INSERT INTO message_events (message_id, ts, event, destination, detail)"
921
+ " VALUES ($1,$2,$3,$4,$5)",
922
+ message_id,
923
+ now,
924
+ event,
925
+ destination,
926
+ self._enc(detail),
927
+ )
928
+
929
+ async def _insert_message(
930
+ self,
931
+ conn: Any,
932
+ mid: str,
933
+ *,
934
+ channel_id: str,
935
+ raw: str,
936
+ status: str,
937
+ control_id: str | None,
938
+ message_type: str | None,
939
+ source_type: str | None,
940
+ summary: str | None,
941
+ metadata: str | None,
942
+ error: str | None,
943
+ now: float,
944
+ ) -> None:
945
+ await conn.execute(
946
+ "INSERT INTO messages"
947
+ " (id, channel_id, received_at, source_type, control_id,"
948
+ " message_type, raw, status, error, summary, metadata)"
949
+ " VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11)",
950
+ mid,
951
+ channel_id,
952
+ now,
953
+ source_type,
954
+ control_id,
955
+ message_type,
956
+ self._cipher.encrypt(raw),
957
+ status,
958
+ self._enc(error),
959
+ summary,
960
+ metadata,
961
+ )
962
+
963
+ async def _fifo_created_at(
964
+ self, conn: Any, stage: str, lane_col: str, lane_val: str, now: float
965
+ ) -> float:
966
+ """The ``created_at`` to stamp on a new ``stage`` row so per-lane FIFO order (``ORDER BY
967
+ created_at, seq``) survives a backward wall-clock step — clamps up to the lane's current max
968
+ (mirrors MessageStore._fifo_created_at). ``lane_col`` is a code-controlled literal."""
969
+ row = await conn.fetchrow(
970
+ f"SELECT MAX(created_at) AS m FROM queue WHERE stage=$1 AND {lane_col}=$2",
971
+ stage,
972
+ lane_val,
973
+ )
974
+ last = None if row is None else row["m"]
975
+ if last is not None and now < last:
976
+ log.warning(
977
+ "clock regression on the %s lane %r: created_at %.6f < lane max %.6f; clamping to "
978
+ "preserve FIFO order",
979
+ stage,
980
+ lane_val,
981
+ now,
982
+ last,
983
+ )
984
+ return float(last)
985
+ return now
986
+
987
+ async def _insert_outbound_row(
988
+ self, conn: Any, mid: str, channel_id: str, dest_name: str, payload: str, now: float
989
+ ) -> None:
990
+ """Insert one ``stage='outbound'`` queue row (one message→destination delivery)."""
991
+ created_at = await self._fifo_created_at(
992
+ conn, Stage.OUTBOUND.value, "destination_name", dest_name, now
993
+ )
994
+ await conn.execute(
995
+ "INSERT INTO queue"
996
+ " (id, message_id, stage, channel_id, destination_name, payload,"
997
+ " status, attempts, next_attempt_at, created_at, updated_at)"
998
+ " VALUES ($1,$2,$3,$4,$5,$6,$7,0,$8,$9,$10)",
999
+ uuid4().hex,
1000
+ mid,
1001
+ Stage.OUTBOUND.value,
1002
+ channel_id,
1003
+ dest_name,
1004
+ self._cipher.encrypt(payload),
1005
+ OutboxStatus.PENDING.value,
1006
+ now,
1007
+ created_at,
1008
+ now,
1009
+ )
1010
+
1011
+ async def _insert_routed_row(
1012
+ self, conn: Any, mid: str, channel_id: str, handler_name: str, payload: str, now: float
1013
+ ) -> None:
1014
+ """Insert one ``stage='routed'`` queue row (one handler assignment awaiting transform)."""
1015
+ created_at = await self._fifo_created_at(
1016
+ conn, Stage.ROUTED.value, "channel_id", channel_id, now
1017
+ )
1018
+ await conn.execute(
1019
+ "INSERT INTO queue"
1020
+ " (id, message_id, stage, channel_id, destination_name, handler_name, payload,"
1021
+ " status, attempts, next_attempt_at, created_at, updated_at)"
1022
+ " VALUES ($1,$2,$3,$4,NULL,$5,$6,$7,0,$8,$9,$10)",
1023
+ uuid4().hex,
1024
+ mid,
1025
+ Stage.ROUTED.value,
1026
+ channel_id,
1027
+ handler_name,
1028
+ self._cipher.encrypt(payload),
1029
+ OutboxStatus.PENDING.value,
1030
+ now,
1031
+ created_at,
1032
+ now,
1033
+ )
1034
+
1035
+ async def _apply_state_op(
1036
+ self, conn: Any, namespace: str, key: str, value_json: str, message_id: str, now: float
1037
+ ) -> None:
1038
+ """Upsert one state entry within the current transaction (ON CONFLICT (namespace,key) — the
1039
+ Postgres equivalent of SQLite's INSERT OR REPLACE). ``value_json`` is JSON-encoded then
1040
+ cipher-encrypted so PHI never hits disk in the clear."""
1041
+ await conn.execute(
1042
+ "INSERT INTO state (namespace, key, value, set_at, message_id)"
1043
+ " VALUES ($1,$2,$3,$4,$5)"
1044
+ " ON CONFLICT (namespace, key) DO UPDATE SET"
1045
+ " value=excluded.value, set_at=excluded.set_at, message_id=excluded.message_id",
1046
+ namespace,
1047
+ key,
1048
+ self._cipher.encrypt(value_json),
1049
+ now,
1050
+ message_id,
1051
+ )
1052
+
1053
+ @staticmethod
1054
+ def _lane_col(stage: str) -> str:
1055
+ """The FIFO/depth lane column for a stage (code-controlled literal): ``channel_id`` for
1056
+ ingress/routed/response, ``destination_name`` for outbound."""
1057
+ return (
1058
+ "channel_id"
1059
+ if stage in (Stage.INGRESS.value, Stage.ROUTED.value, Stage.RESPONSE.value)
1060
+ else "destination_name"
1061
+ )
1062
+
1063
+ # --- write path ----------------------------------------------------------
1064
+
1065
+ async def enqueue_message(
1066
+ self,
1067
+ *,
1068
+ channel_id: str,
1069
+ raw: str,
1070
+ deliveries: Sequence[tuple[str, str]],
1071
+ control_id: str | None = None,
1072
+ message_type: str | None = None,
1073
+ source_type: str | None = None,
1074
+ summary: str | None = None,
1075
+ metadata: str | None = None,
1076
+ now: float | None = None,
1077
+ ) -> str:
1078
+ """Atomically persist an inbound message and its per-destination outbound rows directly — the
1079
+ pre-staged-pipeline single-step write (kept for tests and any single-dispatcher path). With
1080
+ ``deliveries`` the message is ``ROUTED``; with none it is ``UNROUTED``."""
1081
+ now = time.time() if now is None else now
1082
+ mid = uuid4().hex
1083
+ status = MessageStatus.ROUTED.value if deliveries else MessageStatus.UNROUTED.value
1084
+ async with self._pool.acquire() as conn:
1085
+ async with conn.transaction():
1086
+ await self._insert_message(
1087
+ conn,
1088
+ mid,
1089
+ channel_id=channel_id,
1090
+ raw=raw,
1091
+ status=status,
1092
+ control_id=control_id,
1093
+ message_type=message_type,
1094
+ source_type=source_type,
1095
+ summary=summary,
1096
+ metadata=metadata,
1097
+ error=None,
1098
+ now=now,
1099
+ )
1100
+ for dest_name, payload in deliveries:
1101
+ await self._insert_outbound_row(conn, mid, channel_id, dest_name, payload, now)
1102
+ await self._event(
1103
+ conn, mid, "received", None, f"{len(deliveries)} destination(s)", now
1104
+ )
1105
+ return mid
1106
+
1107
+ async def record_received(
1108
+ self,
1109
+ *,
1110
+ channel_id: str,
1111
+ raw: str,
1112
+ status: MessageStatus,
1113
+ error: str | None = None,
1114
+ control_id: str | None = None,
1115
+ message_type: str | None = None,
1116
+ source_type: str | None = None,
1117
+ summary: str | None = None,
1118
+ metadata: str | None = None,
1119
+ now: float | None = None,
1120
+ ) -> str:
1121
+ """Log an inbound message that isn't routed (``FILTERED`` or parse/validation ``ERROR``),
1122
+ stored with no queue rows so an operator still sees exactly what arrived."""
1123
+ error = safe_text(error) if error else error # PHI chokepoint (#120)
1124
+ now = time.time() if now is None else now
1125
+ mid = uuid4().hex
1126
+ event = "error" if status is MessageStatus.ERROR else "filtered"
1127
+ async with self._pool.acquire() as conn:
1128
+ async with conn.transaction():
1129
+ await self._insert_message(
1130
+ conn,
1131
+ mid,
1132
+ channel_id=channel_id,
1133
+ raw=raw,
1134
+ status=status.value,
1135
+ control_id=control_id,
1136
+ message_type=message_type,
1137
+ source_type=source_type,
1138
+ summary=summary,
1139
+ metadata=metadata,
1140
+ error=error,
1141
+ now=now,
1142
+ )
1143
+ await self._event(conn, mid, event, None, error, now)
1144
+ return mid
1145
+
1146
+ async def enqueue_ingress(
1147
+ self,
1148
+ *,
1149
+ channel_id: str,
1150
+ raw: str,
1151
+ control_id: str | None = None,
1152
+ message_type: str | None = None,
1153
+ source_type: str | None = None,
1154
+ summary: str | None = None,
1155
+ metadata: str | None = None,
1156
+ now: float | None = None,
1157
+ ) -> str:
1158
+ """Durably persist a freshly-received raw message to the **ingress stage** — the staged
1159
+ pipeline's ACK-on-receipt boundary (ADR 0001). In one transaction: insert the message
1160
+ (status ``RECEIVED``) and a single ``stage='ingress'`` queue row holding the raw body. Once
1161
+ this returns the message is durable and the inbound may be ACKed. Returns the message id."""
1162
+ now = time.time() if now is None else now
1163
+ mid = uuid4().hex
1164
+ async with self._pool.acquire() as conn:
1165
+ async with conn.transaction():
1166
+ await self._insert_message(
1167
+ conn,
1168
+ mid,
1169
+ channel_id=channel_id,
1170
+ raw=raw,
1171
+ status=MessageStatus.RECEIVED.value,
1172
+ control_id=control_id,
1173
+ message_type=message_type,
1174
+ source_type=source_type,
1175
+ summary=summary,
1176
+ metadata=metadata,
1177
+ error=None,
1178
+ now=now,
1179
+ )
1180
+ ingress_created_at = await self._fifo_created_at(
1181
+ conn, Stage.INGRESS.value, "channel_id", channel_id, now
1182
+ )
1183
+ await conn.execute(
1184
+ "INSERT INTO queue"
1185
+ " (id, message_id, stage, channel_id, destination_name, payload,"
1186
+ " status, attempts, next_attempt_at, created_at, updated_at)"
1187
+ " VALUES ($1,$2,$3,$4,NULL,$5,$6,0,$7,$8,$9)",
1188
+ uuid4().hex,
1189
+ mid,
1190
+ Stage.INGRESS.value,
1191
+ channel_id,
1192
+ self._cipher.encrypt(raw),
1193
+ OutboxStatus.PENDING.value,
1194
+ now,
1195
+ ingress_created_at,
1196
+ now,
1197
+ )
1198
+ await self._event(conn, mid, "received", None, "ingress", now)
1199
+ return mid
1200
+
1201
+ async def handoff(
1202
+ self,
1203
+ *,
1204
+ ingress_id: str,
1205
+ message_id: str,
1206
+ channel_id: str,
1207
+ deliveries: Sequence[tuple[str, str]],
1208
+ disposition: MessageStatus,
1209
+ now: float | None = None,
1210
+ ) -> bool:
1211
+ """Advance a message from ingress to outbound in one transaction (the Step-A combined
1212
+ primitive): consume the in-flight ingress row, insert one outbound row per delivery, set the
1213
+ post-router ``disposition``. Idempotent: ``False`` (no-op) if the ingress row was already
1214
+ consumed by a prior run."""
1215
+ now = time.time() if now is None else now
1216
+ async with self._pool.acquire() as conn:
1217
+ async with conn.transaction():
1218
+ deleted = await conn.fetchval(
1219
+ "DELETE FROM queue WHERE id=$1 AND stage=$2 AND status=$3 RETURNING id",
1220
+ ingress_id,
1221
+ Stage.INGRESS.value,
1222
+ OutboxStatus.INFLIGHT.value,
1223
+ )
1224
+ if deleted is None:
1225
+ return False # already handed off (crash-restart) — idempotent no-op
1226
+ for dest_name, payload in deliveries:
1227
+ await self._insert_outbound_row(
1228
+ conn, message_id, channel_id, dest_name, payload, now
1229
+ )
1230
+ await conn.execute(
1231
+ "UPDATE messages SET status=$1 WHERE id=$2", disposition.value, message_id
1232
+ )
1233
+ event = {
1234
+ MessageStatus.ROUTED: "routed",
1235
+ MessageStatus.FILTERED: "filtered",
1236
+ MessageStatus.UNROUTED: "unrouted",
1237
+ }.get(disposition, "routed")
1238
+ await self._event(
1239
+ conn,
1240
+ message_id,
1241
+ event,
1242
+ None,
1243
+ f"{len(deliveries)} destination(s)",
1244
+ now,
1245
+ )
1246
+ return True
1247
+
1248
+ async def route_handoff(
1249
+ self,
1250
+ *,
1251
+ ingress_id: str,
1252
+ message_id: str,
1253
+ channel_id: str,
1254
+ handlers: Sequence[tuple[str, str]],
1255
+ disposition: MessageStatus,
1256
+ now: float | None = None,
1257
+ ) -> bool:
1258
+ """Advance a message from the ingress stage to the **routed** stage (the router half of the
1259
+ split pipeline): consume the in-flight ingress row, insert one ``stage='routed'`` row per
1260
+ selected handler (in handler-list order — same-txn rows get increasing ``seq``, preserving
1261
+ order), set the intermediate ``disposition`` (``ROUTED``/``UNROUTED``). Idempotent: ``False``
1262
+ if the ingress row was already consumed."""
1263
+ now = time.time() if now is None else now
1264
+ async with self._pool.acquire() as conn:
1265
+ async with conn.transaction():
1266
+ deleted = await conn.fetchval(
1267
+ "DELETE FROM queue WHERE id=$1 AND stage=$2 AND status=$3 RETURNING id",
1268
+ ingress_id,
1269
+ Stage.INGRESS.value,
1270
+ OutboxStatus.INFLIGHT.value,
1271
+ )
1272
+ if deleted is None:
1273
+ return False # already handed off (crash-restart) — idempotent no-op
1274
+ for handler_name, payload in handlers:
1275
+ await self._insert_routed_row(
1276
+ conn, message_id, channel_id, handler_name, payload, now
1277
+ )
1278
+ await conn.execute(
1279
+ "UPDATE messages SET status=$1 WHERE id=$2", disposition.value, message_id
1280
+ )
1281
+ event = "routed" if disposition is MessageStatus.ROUTED else "unrouted"
1282
+ await self._event(conn, message_id, event, None, f"{len(handlers)} handler(s)", now)
1283
+ return True
1284
+
1285
+ async def transform_handoff(
1286
+ self,
1287
+ *,
1288
+ routed_id: str,
1289
+ message_id: str,
1290
+ channel_id: str,
1291
+ deliveries: Sequence[tuple[str, str]],
1292
+ state_ops: Sequence[tuple[str, str, Any]] = (),
1293
+ now: float | None = None,
1294
+ ) -> bool:
1295
+ """Advance one handler assignment from the **routed** stage to outbound (the transform half of
1296
+ the split pipeline): consume the in-flight routed row, insert one outbound row per delivery,
1297
+ apply each declared state write (ADR 0005) atomically with them, then let the finalizer
1298
+ recompute the terminal disposition (this method never writes ``messages.status``). State
1299
+ exactly-once: each op upserts by (namespace,key) inside this same transaction; the read cache
1300
+ is updated only after commit. Idempotent: ``False`` if the routed row was already consumed."""
1301
+ now = time.time() if now is None else now
1302
+ applied: list[tuple[tuple[str, str], Any]] = []
1303
+ async with self._pool.acquire() as conn:
1304
+ async with conn.transaction():
1305
+ deleted = await conn.fetchval(
1306
+ "DELETE FROM queue WHERE id=$1 AND stage=$2 AND status=$3 RETURNING id",
1307
+ routed_id,
1308
+ Stage.ROUTED.value,
1309
+ OutboxStatus.INFLIGHT.value,
1310
+ )
1311
+ if deleted is None:
1312
+ return False # already handed off (crash-restart) — idempotent no-op
1313
+ for dest_name, payload in deliveries:
1314
+ await self._insert_outbound_row(
1315
+ conn, message_id, channel_id, dest_name, payload, now
1316
+ )
1317
+ for namespace, key, value in state_ops:
1318
+ value_json = json.dumps(value)
1319
+ await self._apply_state_op(conn, namespace, key, value_json, message_id, now)
1320
+ applied.append(((namespace, key), value))
1321
+ # Track B Step 6b: bump each DISTINCT namespace's version IN THE SAME txn as its writes —
1322
+ # atomic, so a follower that sees the new version is guaranteed the rows are committed.
1323
+ # Gated on clustered (single-node never writes state_version → byte-identical). Idempotent
1324
+ # on a crash-restart: a re-run returns False above (the routed row is gone) before reaching
1325
+ # here, so the bump never double-applies.
1326
+ bumped: list[tuple[str, int]] = []
1327
+ if self._cluster_state_convergence and state_ops:
1328
+ for ns in dict.fromkeys(n for n, _, _ in state_ops): # distinct, order-stable
1329
+ row = await conn.fetchrow(
1330
+ "INSERT INTO state_version (namespace, version, updated_at) "
1331
+ "VALUES ($1, 1, $2) "
1332
+ "ON CONFLICT (namespace) DO UPDATE SET "
1333
+ "version = state_version.version + 1, updated_at = excluded.updated_at "
1334
+ "RETURNING version",
1335
+ ns,
1336
+ now,
1337
+ )
1338
+ assert row is not None, "state_version upsert returned no row"
1339
+ bumped.append((ns, int(row["version"])))
1340
+ await self._event(
1341
+ conn,
1342
+ message_id,
1343
+ "transformed",
1344
+ None,
1345
+ f"{len(deliveries)} destination(s)",
1346
+ now,
1347
+ )
1348
+ # H-8: serialize per-message finalize on the advisory lock, then recompute on a fresh
1349
+ # snapshot. The lock is taken inside this txn, so it auto-releases at commit.
1350
+ await self._maybe_finalize_message(conn, message_id, now)
1351
+ # Commit succeeded → publish the committed state writes to the read-through cache.
1352
+ for ck, cv in applied:
1353
+ self._state_cache[ck] = cv
1354
+ # The writer records its own new per-namespace versions so its own converge_state_cache() skips
1355
+ # re-reading the namespaces it just wrote (Track B Step 6b).
1356
+ for ns, ver in bumped:
1357
+ self._state_versions[ns] = ver
1358
+ return True
1359
+
1360
+ # --- transform-state / reference views (ADR 0005 / 0006) -----------------
1361
+
1362
+ def state_view(self) -> Mapping[tuple[str, str], Any]:
1363
+ """A read-only, live window onto the transform-state read-through cache (ADR 0005)."""
1364
+ return MappingProxyType(self._state_cache)
1365
+
1366
+ def reference_view(self) -> Mapping[str, Mapping[str, Any]]:
1367
+ """A read-only, live window onto the active reference snapshots (ADR 0006)."""
1368
+ return MappingProxyType(self._reference_cache)
1369
+
1370
+ async def write_reference_snapshot(
1371
+ self, *, name: str, version: str, rows: Mapping[str, Any]
1372
+ ) -> None:
1373
+ """Materialize a new reference snapshot and atomically make it active (ADR 0006). In one
1374
+ transaction: drop the set's prior rows, insert the new snapshot (each value JSON-encoded then
1375
+ encrypted), and upsert the ``reference_version`` pointer. The read cache swaps only after
1376
+ commit, so a failed sync leaves the last-good snapshot live. Ported, not stubbed."""
1377
+ encrypted = [
1378
+ (name, version, k, self._cipher.encrypt(json.dumps(v))) for k, v in rows.items()
1379
+ ]
1380
+ async with self._pool.acquire() as conn:
1381
+ async with conn.transaction():
1382
+ await conn.execute("DELETE FROM reference WHERE name=$1", name)
1383
+ for n, ver, k, v in encrypted:
1384
+ await conn.execute(
1385
+ "INSERT INTO reference (name, version, key, value) VALUES ($1,$2,$3,$4)",
1386
+ n,
1387
+ ver,
1388
+ k,
1389
+ v,
1390
+ )
1391
+ await conn.execute(
1392
+ "INSERT INTO reference_version (name, version, synced_at, row_count)"
1393
+ " VALUES ($1,$2,$3,$4)"
1394
+ " ON CONFLICT (name) DO UPDATE SET"
1395
+ " version=excluded.version, synced_at=excluded.synced_at,"
1396
+ " row_count=excluded.row_count",
1397
+ name,
1398
+ version,
1399
+ time.time(),
1400
+ len(encrypted),
1401
+ )
1402
+ # Commit succeeded → swap the active snapshot in the read cache (plaintext, decoded form) and
1403
+ # record the active version so a follower's converge_reference_cache() (Track B Step 6) can tell
1404
+ # this node already reflects it (no needless re-load on the node that just wrote it).
1405
+ self._reference_cache[name] = dict(rows)
1406
+ self._reference_versions[name] = version
1407
+
1408
+ # --- delivery worker path ------------------------------------------------
1409
+
1410
+ async def claim_ready(
1411
+ self,
1412
+ limit: int = 10,
1413
+ now: float | None = None,
1414
+ *,
1415
+ stage: str = Stage.OUTBOUND.value,
1416
+ channel_id: str | None = None,
1417
+ destination_name: str | None = None,
1418
+ ) -> list[OutboxItem]:
1419
+ """Atomically claim up to ``limit`` due rows at ``stage`` (UNORDERED — skips a backing-off row
1420
+ to drain others), marking them ``inflight`` and bumping ``attempts``, via a single
1421
+ ``FOR UPDATE SKIP LOCKED`` CTE so concurrent workers don't block or double-claim. An
1422
+ undecryptable payload is dead-lettered and dropped (poison-row containment), not raised."""
1423
+ now = time.time() if now is None else now
1424
+ lease_until = now + self._settings.lease_ttl_seconds # Track B Step 2: stamp the lease
1425
+ # All filters are bound; explicit ::text casts let asyncpg type the optional-filter idiom.
1426
+ sql = (
1427
+ "WITH due AS ("
1428
+ " SELECT id FROM queue"
1429
+ " WHERE stage=$1 AND status=$2 AND next_attempt_at<=$3"
1430
+ " AND ($4::text IS NULL OR channel_id=$4)"
1431
+ " AND ($5::text IS NULL OR destination_name=$5)"
1432
+ " ORDER BY next_attempt_at LIMIT $6 FOR UPDATE SKIP LOCKED"
1433
+ ")"
1434
+ " UPDATE queue q SET status=$7, attempts=attempts+1, updated_at=$3,"
1435
+ " owner=$8, lease_expires_at=$9"
1436
+ " FROM due WHERE q.id=due.id RETURNING q.*"
1437
+ )
1438
+ rows = await self._fetchall(
1439
+ sql,
1440
+ stage,
1441
+ OutboxStatus.PENDING.value,
1442
+ now,
1443
+ channel_id,
1444
+ destination_name,
1445
+ limit,
1446
+ OutboxStatus.INFLIGHT.value,
1447
+ self._owner,
1448
+ lease_until,
1449
+ )
1450
+ items: list[OutboxItem] = []
1451
+ for row in rows:
1452
+ try:
1453
+ items.append(self._outbox_item(row))
1454
+ except CipherError as exc:
1455
+ log.warning("dead-lettering undecryptable queue row %s: %s", row["id"], exc)
1456
+ await self.dead_letter_now(row["id"], f"undecryptable payload: {exc}")
1457
+ return items
1458
+
1459
+ async def claim_next_fifo(
1460
+ self,
1461
+ name: str,
1462
+ now: float | None = None,
1463
+ *,
1464
+ stage: str = Stage.OUTBOUND.value,
1465
+ owner: str | None = None,
1466
+ ) -> OutboxItem | None:
1467
+ """Claim the single oldest *due* pending row for one lane at ``stage`` (strict FIFO — the head
1468
+ blocks the lane while it backs off). Lane key is stage-aware (``destination_name`` outbound,
1469
+ ``channel_id`` ingress/routed). Ordering is ``created_at, seq`` (seq = the BIGSERIAL tiebreak
1470
+ that preserves same-txn insertion order). ``FOR UPDATE SKIP LOCKED`` on the head keeps
1471
+ concurrent pollers non-blocking. ``None`` when nothing is pending or the head isn't due.
1472
+
1473
+ ``owner`` is THIS node's cluster identity (the coordinator's node_id) when clustered, ``None``
1474
+ single-node. ``None`` → the byte-identical single-node path (no ``lane_leases`` touch — SQLite
1475
+ parity). When set (Track B Step 5), the claim is gated by an atomic per-lane lease so a FIFO
1476
+ lane is processed by exactly ONE node at a time → strict per-lane FIFO holds across nodes (see
1477
+ :meth:`_claim_next_fifo_owned`)."""
1478
+ now = time.time() if now is None else now
1479
+ if owner is not None:
1480
+ return await self._claim_next_fifo_owned(name, now, stage, owner)
1481
+ lease_until = now + self._settings.lease_ttl_seconds # Track B Step 2: stamp the lease
1482
+ lane_col = self._lane_col(stage) # code-controlled literal
1483
+ sql = (
1484
+ "WITH head AS ("
1485
+ f" SELECT id, next_attempt_at FROM queue WHERE stage=$1 AND {lane_col}=$2 AND status=$3"
1486
+ " ORDER BY created_at, seq LIMIT 1 FOR UPDATE SKIP LOCKED"
1487
+ ")"
1488
+ " UPDATE queue q SET status=$4, attempts=attempts+1, updated_at=$5,"
1489
+ " owner=$6, lease_expires_at=$7"
1490
+ " FROM head WHERE q.id=head.id AND head.next_attempt_at<=$5 RETURNING q.*"
1491
+ )
1492
+ row = await self._fetchone(
1493
+ sql,
1494
+ stage,
1495
+ name,
1496
+ OutboxStatus.PENDING.value,
1497
+ OutboxStatus.INFLIGHT.value,
1498
+ now,
1499
+ self._owner,
1500
+ lease_until,
1501
+ )
1502
+ return await self._fifo_item_or_dead_letter(row)
1503
+
1504
+ async def _claim_next_fifo_owned(
1505
+ self, name: str, now: float, stage: str, owner: str
1506
+ ) -> OutboxItem | None:
1507
+ """The clustered FIFO claim (Track B Step 5): acquire-or-verify the lane lease, recover the
1508
+ crashed predecessor's stranded head, and claim the head — all in ONE transaction so ownership +
1509
+ head recovery + claim commit atomically.
1510
+
1511
+ Claim-time DB-authoritative ownership means two nodes can NEVER claim the same lane
1512
+ concurrently. Even an alive-but-partitioned node whose lease expired fails the ``ON CONFLICT``
1513
+ ``WHERE`` (the DB row shows another live owner / a not-yet-expired lease), so there is ZERO
1514
+ reorder window — unlike a cached gate, which could be stale at the instant of a claim. A lane
1515
+ whose active processor goes idle simply lets its lease expire and the next node with work for
1516
+ that lane re-acquires it (atomically, one at a time), so head-of-line blocking is preserved
1517
+ across the handoff.
1518
+
1519
+ Crash-during-delivery recovery: a node that dies holding the lane leaves its claimed head
1520
+ ``inflight`` under an expired ROW lease, and the lane lease + that row lease expire on the SAME
1521
+ TTL but are recovered by INDEPENDENT mechanisms — the lane lease is taken over instantly here,
1522
+ while the inflight row is otherwise only returned to ``pending`` by the LEADER's periodic
1523
+ ``reclaim_expired_leases`` sweep (a separate, later cadence). In the window between the lane
1524
+ handoff and that sweep, the head SELECT (status=PENDING) would skip the still-inflight head N
1525
+ and claim N+1 → N+1 delivered before N → FIFO broken across a crash. So the new owner reclaims
1526
+ THIS lane's expired-lease inflight rows back to ``pending`` in the SAME txn, BEFORE the head
1527
+ SELECT, restoring head-of-line blocking: the recovered N is reconsidered as the (due) head and
1528
+ blocks the lane until it is delivered. Scoped to ``lease_expires_at < now`` so it never disturbs
1529
+ OUR own actively-processed rows (their leases are kept in the future by the worker's renew
1530
+ timer) — only a crashed predecessor's stranded rows. The wall-clock lease shares Track B
1531
+ Step 2's NTP assumption: set ``lease_ttl_seconds`` comfortably above clock skew + the claim
1532
+ cadence."""
1533
+ lane_key = f"{stage}:{name}"
1534
+ # The lane lease and the queue ROW lease (Step 2) deliberately share ONE TTL
1535
+ # (lease_ttl_seconds): they are renewed together (the worker's renew timer keeps the row lease
1536
+ # alive while the lane lease is renewed on each claim), and the crash-recovery reclaim below
1537
+ # relies on both expiring together so a freed lane's stranded head is recoverable as soon as the
1538
+ # lane is re-acquired. Two distinct names, identical expression — not a copy-paste bug.
1539
+ lane_until = now + self._settings.lease_ttl_seconds # the per-LANE lease (Step 5)
1540
+ lease_until = now + self._settings.lease_ttl_seconds # the queue ROW lease (Step 2)
1541
+ lane_col = self._lane_col(stage) # code-controlled literal
1542
+ head_sql = (
1543
+ "WITH head AS ("
1544
+ f" SELECT id, next_attempt_at FROM queue WHERE stage=$1 AND {lane_col}=$2 AND status=$3"
1545
+ " ORDER BY created_at, seq LIMIT 1 FOR UPDATE SKIP LOCKED"
1546
+ ")"
1547
+ " UPDATE queue q SET status=$4, attempts=attempts+1, updated_at=$5,"
1548
+ " owner=$6, lease_expires_at=$7"
1549
+ " FROM head WHERE q.id=head.id AND head.next_attempt_at<=$5 RETURNING q.*"
1550
+ )
1551
+ async with self._pool.acquire() as conn:
1552
+ async with conn.transaction():
1553
+ # FIRST atomically acquire-or-verify the lane lease. The ON CONFLICT WHERE only takes the
1554
+ # lane when WE already hold it (renew) or its lease has EXPIRED (take over a freed lane).
1555
+ held = await conn.fetchval(
1556
+ "INSERT INTO lane_leases (lane, owner, lease_expires_at) VALUES ($1,$2,$3)"
1557
+ " ON CONFLICT (lane) DO UPDATE SET owner=$2, lease_expires_at=$3"
1558
+ " WHERE lane_leases.owner=$2 OR lane_leases.lease_expires_at < $4"
1559
+ " RETURNING owner",
1560
+ lane_key,
1561
+ owner,
1562
+ lane_until,
1563
+ now,
1564
+ )
1565
+ # held is None when the ON CONFLICT WHERE was false → another LIVE node owns the lane.
1566
+ # Back off exactly like an empty lane (the worker waits / re-polls).
1567
+ if held != owner:
1568
+ return None
1569
+ # THEN recover this lane's stranded head: a crashed predecessor's claimed rows are still
1570
+ # inflight under an expired ROW lease, and the PENDING-only head SELECT would skip them
1571
+ # and reorder past N. Return them to pending IN THIS TXN before the head SELECT so the
1572
+ # oldest recovered row is reconsidered as the (now-due) head and blocks the lane. Bounded
1573
+ # to this single lane and to already-expired leases, so it never steals our own live rows.
1574
+ await conn.execute(
1575
+ f"UPDATE queue SET status=$3, owner=NULL, lease_expires_at=NULL,"
1576
+ f" next_attempt_at=$4, updated_at=$4"
1577
+ f" WHERE stage=$1 AND {lane_col}=$2 AND status=$5"
1578
+ f" AND lease_expires_at IS NOT NULL AND lease_expires_at < $4",
1579
+ stage,
1580
+ name,
1581
+ OutboxStatus.PENDING.value,
1582
+ now,
1583
+ OutboxStatus.INFLIGHT.value,
1584
+ )
1585
+ # THEN claim the head in the SAME txn, stamping the queue row's own owner + row lease.
1586
+ # The lane lease is keyed by the passed `owner` (= coordinator node_id) while the row's
1587
+ # owner column is `self._owner` (the store-instance id). These are INDEPENDENT identities
1588
+ # — single-owner-per-lane is enforced entirely inside lane_leases via `owner`, so the row
1589
+ # owner need not equal it (they coincide by build_coordinator's default node_id=store._owner,
1590
+ # but pinning [cluster].node_id to a different value stays correct and harmless).
1591
+ row = await conn.fetchrow(
1592
+ head_sql,
1593
+ stage,
1594
+ name,
1595
+ OutboxStatus.PENDING.value,
1596
+ OutboxStatus.INFLIGHT.value,
1597
+ now,
1598
+ self._owner,
1599
+ lease_until,
1600
+ )
1601
+ return await self._fifo_item_or_dead_letter(row)
1602
+
1603
+ async def _fifo_item_or_dead_letter(self, row: Any) -> OutboxItem | None:
1604
+ """Decode a claimed FIFO head into an :class:`OutboxItem`, or dead-letter an undecryptable head
1605
+ and return ``None`` so the lane advances on the next poll (mirrors SQLite). Runs AFTER the claim
1606
+ txn so the dead-letter is its own transaction."""
1607
+ if row is None:
1608
+ return None # nothing pending, or the head is backing off — block the lane
1609
+ try:
1610
+ return self._outbox_item(row)
1611
+ except CipherError as exc:
1612
+ # An undecryptable head must not stall the lane — dead-letter it and let the next poll
1613
+ # advance, rather than raising into the worker (mirrors SQLite).
1614
+ log.warning("dead-lettering undecryptable queue row %s: %s", row["id"], exc)
1615
+ await self.dead_letter_now(row["id"], f"undecryptable payload: {exc}")
1616
+ return None
1617
+
1618
+ def _outbox_item(self, row: Any) -> OutboxItem:
1619
+ """Build an :class:`OutboxItem` from a claimed ``queue`` record, decrypting the payload (may
1620
+ raise :class:`CipherError`, which the callers contain)."""
1621
+ return OutboxItem(
1622
+ id=row["id"],
1623
+ message_id=row["message_id"],
1624
+ channel_id=row["channel_id"],
1625
+ destination_name=row["destination_name"],
1626
+ payload=self._cipher.decrypt(row["payload"]),
1627
+ attempts=row["attempts"],
1628
+ stage=row["stage"],
1629
+ handler_name=row["handler_name"],
1630
+ created_at=row["created_at"], # claim RETURNING q.* includes it (ingest-time, ADR 0009)
1631
+ )
1632
+
1633
+ async def dead_letter_now(self, outbox_id: str, error: str, now: float | None = None) -> None:
1634
+ """Force one row terminal (``DEAD``) immediately — fail-fast, no retry consumed. Serializes
1635
+ the finalize per message (H-8)."""
1636
+ error = safe_text(
1637
+ error
1638
+ ) # PHI chokepoint (#120) — incl. the f"undecryptable payload: {exc}" callers
1639
+ now = time.time() if now is None else now
1640
+ async with self._pool.acquire() as conn:
1641
+ async with conn.transaction():
1642
+ row = await conn.fetchrow("SELECT * FROM queue WHERE id=$1", outbox_id)
1643
+ if row is None:
1644
+ return
1645
+ await conn.execute(
1646
+ "UPDATE queue SET status=$1, next_attempt_at=$2, last_error=$3, updated_at=$4,"
1647
+ " owner=NULL, lease_expires_at=NULL WHERE id=$5",
1648
+ OutboxStatus.DEAD.value,
1649
+ now,
1650
+ self._enc(error),
1651
+ now,
1652
+ outbox_id,
1653
+ )
1654
+ await self._event(
1655
+ conn, row["message_id"], "dead", row["destination_name"], error, now
1656
+ )
1657
+ await self._maybe_finalize_message(conn, row["message_id"], now)
1658
+
1659
+ async def mark_done(self, outbox_id: str, now: float | None = None) -> None:
1660
+ now = time.time() if now is None else now
1661
+ async with self._pool.acquire() as conn:
1662
+ async with conn.transaction():
1663
+ row = await conn.fetchrow("SELECT * FROM queue WHERE id=$1", outbox_id)
1664
+ if row is None:
1665
+ return
1666
+ await conn.execute(
1667
+ "UPDATE queue SET status=$1, last_error=NULL, updated_at=$2,"
1668
+ " owner=NULL, lease_expires_at=NULL WHERE id=$3",
1669
+ OutboxStatus.DONE.value,
1670
+ now,
1671
+ outbox_id,
1672
+ )
1673
+ await self._event(
1674
+ conn,
1675
+ row["message_id"],
1676
+ "delivered",
1677
+ row["destination_name"],
1678
+ f"attempt {row['attempts']}",
1679
+ now,
1680
+ )
1681
+ await self._maybe_finalize_message(conn, row["message_id"], now)
1682
+
1683
+ async def complete_with_response(
1684
+ self,
1685
+ outbox_id: str,
1686
+ *,
1687
+ body: str,
1688
+ outcome: str,
1689
+ detail: str | None = None,
1690
+ reingress_to: str | None = None,
1691
+ now: float | None = None,
1692
+ ) -> None:
1693
+ """Mark one outbound row delivered AND persist the partner's captured reply in one transaction
1694
+ (ADR 0013) — the Postgres twin of :meth:`MessageStore.complete_with_response`, with the
1695
+ **identical single-transaction atomicity** (mark-done + INSERT response under one
1696
+ ``conn.transaction()``). ``response_seq`` is ``1 + MAX(seq)`` per ``(message_id,
1697
+ destination_name)`` so it is replay-stable, and the ``response`` table is invisible to the
1698
+ finalizer (it scans ``queue`` only). When ``reingress_to`` is set (Increment 2) the same
1699
+ transaction also inserts the drainable ``Stage.RESPONSE`` work-row (identical to SQLite)."""
1700
+ now = time.time() if now is None else now
1701
+ async with self._pool.acquire() as conn:
1702
+ async with conn.transaction():
1703
+ row = await conn.fetchrow("SELECT * FROM queue WHERE id=$1", outbox_id)
1704
+ if row is None:
1705
+ return
1706
+ message_id = row["message_id"]
1707
+ destination_name = row["destination_name"]
1708
+ await conn.execute(
1709
+ "UPDATE queue SET status=$1, last_error=NULL, updated_at=$2,"
1710
+ " owner=NULL, lease_expires_at=NULL WHERE id=$3",
1711
+ OutboxStatus.DONE.value,
1712
+ now,
1713
+ outbox_id,
1714
+ )
1715
+ seq = await conn.fetchval(
1716
+ "SELECT COALESCE(MAX(response_seq), 0) + 1 FROM response"
1717
+ " WHERE message_id=$1 AND destination_name=$2",
1718
+ message_id,
1719
+ destination_name,
1720
+ )
1721
+ await conn.execute(
1722
+ "INSERT INTO response"
1723
+ " (message_id, destination_name, response_seq, body, outcome, detail, captured_at)"
1724
+ " VALUES ($1,$2,$3,$4,$5,$6,$7)",
1725
+ message_id,
1726
+ destination_name,
1727
+ seq,
1728
+ self._enc(body),
1729
+ outcome,
1730
+ self._enc(detail),
1731
+ now,
1732
+ )
1733
+ if reingress_to is not None:
1734
+ # ADR 0013 Increment 2: drainable Stage.RESPONSE work-row in the SAME txn (orphan-free)
1735
+ # — a token referencing the immutable artifact by its PK, on the loopback inbound's lane.
1736
+ artifact_ref = f"{message_id}\x1f{destination_name}\x1f{seq}"
1737
+ work_created = await self._fifo_created_at(
1738
+ conn, Stage.RESPONSE.value, "channel_id", reingress_to, now
1739
+ )
1740
+ await conn.execute(
1741
+ "INSERT INTO queue"
1742
+ " (id, message_id, stage, channel_id, destination_name, handler_name, payload,"
1743
+ " status, attempts, next_attempt_at, created_at, updated_at)"
1744
+ " VALUES ($1,$2,$3,$4,NULL,NULL,$5,$6,0,$7,$8,$9)",
1745
+ uuid4().hex,
1746
+ message_id,
1747
+ Stage.RESPONSE.value,
1748
+ reingress_to,
1749
+ self._enc(artifact_ref),
1750
+ OutboxStatus.PENDING.value,
1751
+ now,
1752
+ work_created,
1753
+ now,
1754
+ )
1755
+ await self._event(
1756
+ conn,
1757
+ message_id,
1758
+ "delivered",
1759
+ destination_name,
1760
+ f"attempt {row['attempts']} (response {outcome})",
1761
+ now,
1762
+ )
1763
+ await self._maybe_finalize_message(conn, message_id, now)
1764
+
1765
+ async def ingress_handoff(
1766
+ self,
1767
+ *,
1768
+ response_row_id: str,
1769
+ loopback_channel_id: str,
1770
+ correlation_depth_cap: int,
1771
+ control_id: str | None,
1772
+ message_type: str | None,
1773
+ summary: str | None,
1774
+ peek_failed: bool = False,
1775
+ now: float | None = None,
1776
+ ) -> bool:
1777
+ """Postgres twin of :meth:`MessageStore.ingress_handoff` (ADR 0013 Increment 2) — the same
1778
+ single-transaction guarded-DELETE handoff under one ``conn.transaction()``. A no-op (work-row
1779
+ already consumed) rolls the transaction back via the internal sentinel and returns ``False``."""
1780
+
1781
+ class _Noop(Exception):
1782
+ pass
1783
+
1784
+ now = time.time() if now is None else now
1785
+ try:
1786
+ async with self._pool.acquire() as conn:
1787
+ async with conn.transaction():
1788
+ wr = await conn.fetchrow(
1789
+ "SELECT message_id, payload FROM queue WHERE id=$1 AND stage=$2 AND status=$3",
1790
+ response_row_id,
1791
+ Stage.RESPONSE.value,
1792
+ OutboxStatus.INFLIGHT.value,
1793
+ )
1794
+ if wr is None:
1795
+ raise _Noop() # already consumed by a committed prior run
1796
+ origin_id = wr["message_id"]
1797
+ try:
1798
+ ref = self._dec(wr["payload"]) or ""
1799
+ origin_msg_id, dest, seq_s = ref.split("\x1f")
1800
+ seq = int(seq_s)
1801
+ except Exception: # noqa: BLE001 - any decrypt/parse failure = an unrecoverable ref
1802
+ # A corrupt/undecryptable work-row reference can never be re-ingressed: dead-letter
1803
+ # the token + ERROR the origin in THIS transaction and CONSUME it (return True ⇒
1804
+ # the conn.transaction() commits) — never re-loop. Mirrors the SQLite branch and
1805
+ # the depth-cap branch (NOT the _Noop rollback, which would leave the token live).
1806
+ await conn.execute(
1807
+ "UPDATE queue SET status=$1, last_error=$2, next_attempt_at=$3,"
1808
+ " updated_at=$4 WHERE id=$5",
1809
+ OutboxStatus.DEAD.value,
1810
+ self._enc("re-ingress work-row reference is corrupt/unparseable"),
1811
+ now,
1812
+ now,
1813
+ response_row_id,
1814
+ )
1815
+ await self._event(
1816
+ conn, origin_id, "dead", None, "re-ingress ref corrupt", now
1817
+ )
1818
+ await self._maybe_finalize_message(conn, origin_id, now)
1819
+ return True
1820
+ art = await conn.fetchrow(
1821
+ "SELECT body FROM response"
1822
+ " WHERE message_id=$1 AND destination_name=$2 AND response_seq=$3",
1823
+ origin_msg_id,
1824
+ dest,
1825
+ seq,
1826
+ )
1827
+ body = self._dec(art["body"]) if (art and art["body"] is not None) else ""
1828
+ body = body or ""
1829
+ mrow = await conn.fetchrow(
1830
+ "SELECT metadata FROM messages WHERE id=$1", origin_id
1831
+ )
1832
+ origin_meta: dict[str, Any] = {}
1833
+ if mrow and mrow["metadata"]:
1834
+ loaded = json.loads(mrow["metadata"])
1835
+ if isinstance(loaded, dict):
1836
+ origin_meta = loaded
1837
+ child_depth = int(origin_meta.get("correlation_depth", 0) or 0) + 1
1838
+ root = origin_meta.get("correlation_root_id") or origin_id
1839
+ if child_depth > correlation_depth_cap:
1840
+ await conn.execute(
1841
+ "UPDATE queue SET status=$1, last_error=$2, next_attempt_at=$3,"
1842
+ " updated_at=$4 WHERE id=$5",
1843
+ OutboxStatus.DEAD.value,
1844
+ self._enc(
1845
+ f"re-ingress correlation depth exceeded "
1846
+ f"({child_depth} > {correlation_depth_cap})"
1847
+ ),
1848
+ now,
1849
+ now,
1850
+ response_row_id,
1851
+ )
1852
+ await self._event(
1853
+ conn,
1854
+ origin_id,
1855
+ "dead",
1856
+ dest,
1857
+ f"re-ingress depth cap ({child_depth})",
1858
+ now,
1859
+ )
1860
+ await self._maybe_finalize_message(conn, origin_id, now)
1861
+ return True
1862
+ new_mid = MessageStore._reingress_message_id(origin_id, dest, seq, body)
1863
+ exists = await conn.fetchval("SELECT 1 FROM messages WHERE id=$1", new_mid)
1864
+ if exists is None:
1865
+ child_meta = json.dumps(
1866
+ {
1867
+ "correlation_id": origin_id,
1868
+ "correlation_root_id": root,
1869
+ "correlation_depth": child_depth,
1870
+ "reingress_of_seq": seq,
1871
+ }
1872
+ )
1873
+ await self._insert_message(
1874
+ conn,
1875
+ new_mid,
1876
+ channel_id=loopback_channel_id,
1877
+ raw=body,
1878
+ status=(
1879
+ MessageStatus.ERROR.value
1880
+ if peek_failed
1881
+ else MessageStatus.RECEIVED.value
1882
+ ),
1883
+ control_id=control_id,
1884
+ message_type=message_type,
1885
+ source_type="reingress",
1886
+ summary=summary,
1887
+ metadata=child_meta,
1888
+ error="re-ingress body failed HL7 peek" if peek_failed else None,
1889
+ now=now,
1890
+ )
1891
+ if not peek_failed:
1892
+ ingress_created = await self._fifo_created_at(
1893
+ conn, Stage.INGRESS.value, "channel_id", loopback_channel_id, now
1894
+ )
1895
+ await conn.execute(
1896
+ "INSERT INTO queue (id, message_id, stage, channel_id,"
1897
+ " destination_name, handler_name, payload, status, attempts,"
1898
+ " next_attempt_at, created_at, updated_at)"
1899
+ " VALUES ($1,$2,$3,$4,NULL,NULL,$5,$6,0,$7,$8,$9)",
1900
+ uuid4().hex,
1901
+ new_mid,
1902
+ Stage.INGRESS.value,
1903
+ loopback_channel_id,
1904
+ self._cipher.encrypt(body),
1905
+ OutboxStatus.PENDING.value,
1906
+ now,
1907
+ ingress_created,
1908
+ now,
1909
+ )
1910
+ await self._event(
1911
+ conn,
1912
+ new_mid,
1913
+ "received",
1914
+ None,
1915
+ f"reingress from {origin_id}/{dest}/seq{seq}",
1916
+ now,
1917
+ )
1918
+ await self._event(
1919
+ conn,
1920
+ origin_id,
1921
+ "reingressed",
1922
+ dest,
1923
+ f"-> {new_mid} depth {child_depth}",
1924
+ now,
1925
+ )
1926
+ deleted = await conn.fetchval(
1927
+ "DELETE FROM queue WHERE id=$1 AND stage=$2 AND status=$3 RETURNING id",
1928
+ response_row_id,
1929
+ Stage.RESPONSE.value,
1930
+ OutboxStatus.INFLIGHT.value,
1931
+ )
1932
+ if deleted is None:
1933
+ raise _Noop() # unreachable under single-owner claim; roll back defensively
1934
+ await self._maybe_finalize_message(conn, origin_id, now)
1935
+ except _Noop:
1936
+ return False
1937
+ return True
1938
+
1939
+ async def response_body_for_work_row(self, response_row_id: str) -> str | None:
1940
+ """The decrypted artifact body a ``Stage.RESPONSE`` work-row references (ADR 0013) — the Postgres
1941
+ twin of :meth:`MessageStore.response_body_for_work_row`."""
1942
+ row = await self._pool.fetchrow(
1943
+ "SELECT payload FROM queue WHERE id=$1 AND stage=$2",
1944
+ response_row_id,
1945
+ Stage.RESPONSE.value,
1946
+ )
1947
+ if row is None:
1948
+ return None
1949
+ ref = self._dec(row["payload"]) or ""
1950
+ try:
1951
+ mid, dest, seq_s = ref.split("\x1f")
1952
+ except ValueError:
1953
+ return None
1954
+ art = await self._pool.fetchrow(
1955
+ "SELECT body FROM response WHERE message_id=$1 AND destination_name=$2 AND response_seq=$3",
1956
+ mid,
1957
+ dest,
1958
+ int(seq_s),
1959
+ )
1960
+ return self._dec(art["body"]) if (art and art["body"] is not None) else ""
1961
+
1962
+ async def correlate_response(self, message_id: str) -> list[CapturedResponse]:
1963
+ """Every captured reply for ``message_id`` (ADR 0013), ordered by destination then
1964
+ ``response_seq``; ``body``/``detail`` decrypted. The PHI read surface behind the audited,
1965
+ body-gated ``GET /messages/{id}/responses`` route."""
1966
+ rows = await self._pool.fetch(
1967
+ "SELECT message_id, destination_name, response_seq, body, outcome, detail, captured_at"
1968
+ " FROM response WHERE message_id=$1 ORDER BY destination_name, response_seq",
1969
+ message_id,
1970
+ )
1971
+ return [
1972
+ CapturedResponse(
1973
+ message_id=r["message_id"],
1974
+ destination_name=r["destination_name"],
1975
+ response_seq=r["response_seq"],
1976
+ outcome=r["outcome"],
1977
+ detail=self._dec(r["detail"]),
1978
+ captured_at=r["captured_at"],
1979
+ body=self._dec(r["body"]),
1980
+ )
1981
+ for r in rows
1982
+ ]
1983
+
1984
+ async def mark_failed(
1985
+ self, outbox_id: str, error: str, retry: RetryPolicy, now: float | None = None
1986
+ ) -> None:
1987
+ """Reschedule with exponential backoff, or dead-letter if retries are exhausted."""
1988
+ error = safe_text(error) # PHI chokepoint (#120)
1989
+ now = time.time() if now is None else now
1990
+ async with self._pool.acquire() as conn:
1991
+ async with conn.transaction():
1992
+ row = await conn.fetchrow("SELECT * FROM queue WHERE id=$1", outbox_id)
1993
+ if row is None:
1994
+ return
1995
+ attempts = row["attempts"]
1996
+ # max_attempts None = retry forever; a finite cap dead-letters once exhausted.
1997
+ if retry.max_attempts is not None and attempts >= retry.max_attempts:
1998
+ status, next_at, event = OutboxStatus.DEAD.value, now, "dead"
1999
+ else:
2000
+ backoff = min(
2001
+ retry.max_backoff_seconds,
2002
+ retry.backoff_seconds * (retry.backoff_multiplier ** (attempts - 1)),
2003
+ )
2004
+ status, next_at, event = OutboxStatus.PENDING.value, now + backoff, "failed"
2005
+ await conn.execute(
2006
+ "UPDATE queue SET status=$1, next_attempt_at=$2, last_error=$3, updated_at=$4,"
2007
+ " owner=NULL, lease_expires_at=NULL WHERE id=$5",
2008
+ status,
2009
+ next_at,
2010
+ self._enc(error),
2011
+ now,
2012
+ outbox_id,
2013
+ )
2014
+ await self._event(
2015
+ conn,
2016
+ row["message_id"],
2017
+ event,
2018
+ row["destination_name"],
2019
+ f"attempt {attempts}: {error}",
2020
+ now,
2021
+ )
2022
+ if status == OutboxStatus.DEAD.value:
2023
+ await self._maybe_finalize_message(conn, row["message_id"], now)
2024
+
2025
+ async def pending_depth(
2026
+ self, name: str, *, stage: str = Stage.OUTBOUND.value
2027
+ ) -> tuple[int, float | None]:
2028
+ """``(pending_count, oldest_created_at)`` for one lane at ``stage`` (lane key stage-aware)."""
2029
+ lane_col = self._lane_col(stage)
2030
+ row = await self._fetchone(
2031
+ f"SELECT COUNT(*) AS n, MIN(created_at) AS oldest FROM queue"
2032
+ f" WHERE stage=$1 AND {lane_col}=$2 AND status=$3",
2033
+ stage,
2034
+ name,
2035
+ OutboxStatus.PENDING.value,
2036
+ )
2037
+ count = int(row["n"]) if row is not None else 0
2038
+ oldest = row["oldest"] if row is not None else None
2039
+ return count, (float(oldest) if oldest is not None else None)
2040
+
2041
+ # --- recovery / replay ---------------------------------------------------
2042
+
2043
+ async def reset_stale_inflight(
2044
+ self, now: float | None = None, *, stage: str | None = None
2045
+ ) -> int:
2046
+ """Return ``inflight`` rows (claimed before a crash) to ``pending``. ``stage=None`` recovers
2047
+ every stage in one pass (the right startup behavior).
2048
+
2049
+ This is the **unconditional** single-node recovery: it reclaims *every* inflight row, ignoring
2050
+ the lease columns, which is correct today (one node, so any inflight row at startup is this
2051
+ node's own crash residue). The additive multi-node mechanism is the lease columns + the
2052
+ owner-aware :meth:`renew_leases` / :meth:`reclaim_expired_leases`: a later coordination step
2053
+ will, in clustered mode, STOP calling this unconditional startup reset (which would steal a live
2054
+ sibling's in-flight rows) and instead run :meth:`reclaim_expired_leases` periodically on the
2055
+ leader, recovering only rows whose lease has actually expired. Expiry-gating this startup reset
2056
+ now — without that periodic sweep wired up — would strand a just-crashed single node's in-flight
2057
+ rows until their leases expire, so the gating is deferred on purpose."""
2058
+ now = time.time() if now is None else now
2059
+ sql = (
2060
+ "UPDATE queue SET status=$1, next_attempt_at=$2, updated_at=$2,"
2061
+ " owner=NULL, lease_expires_at=NULL"
2062
+ " WHERE status=$3 AND ($4::text IS NULL OR stage=$4)"
2063
+ )
2064
+ result = await self._pool.execute(
2065
+ sql, OutboxStatus.PENDING.value, now, OutboxStatus.INFLIGHT.value, stage
2066
+ )
2067
+ return _rowcount(result)
2068
+
2069
+ # --- multi-node row leases (Track B Step 2; additive, Postgres-only) ------
2070
+ # These are NOT on the Store protocol and NOT on the SQLite backend: SQLite is single-node, so its
2071
+ # unconditional reset_stale_inflight remains correct. A later coordination step wires a worker timer
2072
+ # onto renew_leases and a leader sweep onto reclaim_expired_leases (see reset_stale_inflight).
2073
+
2074
+ async def renew_leases(self, ids: Sequence[str], *, now: float | None = None) -> int:
2075
+ """Extend the lease on THIS owner's still-inflight rows (a worker calls this on a timer to keep
2076
+ rows it is actively processing from being reclaimed). Only touches rows this instance owns and
2077
+ that are still ``inflight`` — a row already completed, dead-lettered, or reclaimed by another
2078
+ node is left alone. Returns the number of leases extended."""
2079
+ now = time.time() if now is None else now
2080
+ lease_until = now + self._settings.lease_ttl_seconds
2081
+ result = await self._pool.execute(
2082
+ "UPDATE queue SET lease_expires_at=$2, updated_at=$3"
2083
+ " WHERE id = ANY($1::text[]) AND owner=$4 AND status=$5",
2084
+ list(ids),
2085
+ lease_until,
2086
+ now,
2087
+ self._owner,
2088
+ OutboxStatus.INFLIGHT.value,
2089
+ )
2090
+ return _rowcount(result)
2091
+
2092
+ async def reclaim_expired_leases(
2093
+ self, now: float | None = None, *, stage: str | None = None
2094
+ ) -> int:
2095
+ """Multi-node-safe reclaim: return to ``pending`` ONLY inflight rows whose lease has **expired**
2096
+ (``lease_expires_at < now``), clearing ``owner``/``lease_expires_at`` and making them due now.
2097
+ This is what a future leader periodic sweep calls; it must NEVER reclaim a row whose lease is
2098
+ still in the future, since that would steal a live sibling node's in-flight row. ``stage=None``
2099
+ sweeps every stage; pass a stage to scope it. Returns the number reclaimed.
2100
+
2101
+ Clock assumption: the no-theft guarantee is a wall-clock lease — the reclaiming node compares
2102
+ its own ``now`` against a ``lease_expires_at`` stamped by the (possibly different) holder node's
2103
+ clock. It holds only when node clocks are synchronized (NTP) to well within ``lease_ttl_seconds``;
2104
+ set the TTL comfortably larger than expected skew + the renew interval so a skewed reclaimer
2105
+ can't beat a live holder's lease."""
2106
+ now = time.time() if now is None else now
2107
+ result = await self._pool.execute(
2108
+ "UPDATE queue SET status=$3, owner=NULL, lease_expires_at=NULL,"
2109
+ " next_attempt_at=$1, updated_at=$1"
2110
+ " WHERE status=$4 AND lease_expires_at IS NOT NULL AND lease_expires_at < $1"
2111
+ " AND ($2::text IS NULL OR stage=$2)",
2112
+ now,
2113
+ stage,
2114
+ OutboxStatus.PENDING.value,
2115
+ OutboxStatus.INFLIGHT.value,
2116
+ )
2117
+ return _rowcount(result)
2118
+
2119
+ async def recover_inflight_on_promotion(
2120
+ self, *, lane_owner: str | None, now: float | None = None
2121
+ ) -> int:
2122
+ """On active-passive promotion: recover the PRIOR leader's stranded work IMMEDIATELY, without
2123
+ waiting out the per-row/lane lease TTL — the dominant ~``[store].lease_ttl_seconds`` failover-
2124
+ recovery delay on Postgres (#293; SQL Server already recovers at once via its on-promotion
2125
+ ``reset_stale_inflight``). Two parts, ONE transaction:
2126
+
2127
+ 1. **Owner-scoped queue-row reclaim** — return INFLIGHT rows owned by ANY OTHER store instance
2128
+ (the prior leader) to PENDING, **ignoring** lease expiry. Scoped to ``owner IS DISTINCT FROM
2129
+ self._owner``, so it is STRUCTURALLY incapable of re-pending THIS node's own freshly-claimed
2130
+ rows (no self-theft); at promotion this node has not claimed anything yet, so the set is
2131
+ exactly the prior leader's residue. Re-pending the stranded lane HEAD restores per-lane
2132
+ head-of-line blocking (no N+1-before-N reorder).
2133
+ 2. **Lane-lease takeover** — delete the prior coordinator's ``lane_leases`` (``owner <>
2134
+ lane_owner``) so the new leader can claim those lanes at once instead of waiting
2135
+ ~``lease_ttl_seconds`` for them to expire. ``reset_stale_inflight`` / ``reclaim_expired_leases``
2136
+ touch only the ``queue`` table, so without this the LANE lease independently blocks the new
2137
+ leader for the full TTL — both gates must be cleared.
2138
+
2139
+ **Safe ONLY in active-passive** (the wired graph runs on the leader ONLY): the prior leader
2140
+ self-fenced and its LEADERSHIP lease expired on the DB clock before this node could acquire it
2141
+ (``heartbeat < fence < leader_lease_ttl`` is validator-enforced), so there is no live processor
2142
+ whose rows/lanes this could steal — the SAME interlock the shipping SQL Server on-promotion
2143
+ ``reset_stale_inflight`` relies on. A future **active-active** build (parked for 0.2) MUST NOT
2144
+ call this — with multiple live processors it would steal a live sibling's rows; that model uses
2145
+ only the lease-gated :meth:`reclaim_expired_leases`. Returns the number of queue rows re-pended."""
2146
+ now = time.time() if now is None else now
2147
+ async with self._pool.acquire() as conn:
2148
+ async with conn.transaction():
2149
+ result = await conn.execute(
2150
+ "UPDATE queue SET status=$1, owner=NULL, lease_expires_at=NULL,"
2151
+ " next_attempt_at=$2, updated_at=$2"
2152
+ " WHERE status=$3 AND owner IS DISTINCT FROM $4",
2153
+ OutboxStatus.PENDING.value,
2154
+ now,
2155
+ OutboxStatus.INFLIGHT.value,
2156
+ self._owner,
2157
+ )
2158
+ # lane_owner is the coordinator node_id (always set in clustered mode, where this runs);
2159
+ # None defensively clears all lanes (single-node never calls this).
2160
+ if lane_owner is None:
2161
+ await conn.execute("DELETE FROM lane_leases")
2162
+ else:
2163
+ await conn.execute("DELETE FROM lane_leases WHERE owner <> $1", lane_owner)
2164
+ return _rowcount(result)
2165
+
2166
+ async def dead_letter_missing_destinations(
2167
+ self, valid_names: set[str], now: float | None = None
2168
+ ) -> int:
2169
+ """Dead-letter every non-terminal **outbound** row whose ``destination_name`` left the
2170
+ registry. Scoped to ``stage='outbound'``. Returns the rows killed."""
2171
+ now = time.time() if now is None else now
2172
+ async with self._pool.acquire() as conn:
2173
+ async with conn.transaction():
2174
+ rows = await conn.fetch(
2175
+ "SELECT id, message_id, destination_name FROM queue"
2176
+ " WHERE stage=$1 AND status = ANY($2::text[])",
2177
+ Stage.OUTBOUND.value,
2178
+ [OutboxStatus.PENDING.value, OutboxStatus.INFLIGHT.value],
2179
+ )
2180
+ orphans = [r for r in rows if r["destination_name"] not in valid_names]
2181
+ if not orphans:
2182
+ return 0
2183
+ error = "destination removed from outbound registry"
2184
+ # Pre-lock all affected messages' finalize locks in canonical order before the loop
2185
+ # finalizes any, so concurrent multi-message sweeps/cancels can't deadlock.
2186
+ await self._lock_finalize_batch(conn, (r["message_id"] for r in orphans))
2187
+ for row in orphans:
2188
+ await conn.execute(
2189
+ "UPDATE queue SET status=$1, next_attempt_at=$2, last_error=$3, updated_at=$4,"
2190
+ " owner=NULL, lease_expires_at=NULL WHERE id=$5",
2191
+ OutboxStatus.DEAD.value,
2192
+ now,
2193
+ self._enc(error),
2194
+ now,
2195
+ row["id"],
2196
+ )
2197
+ await self._event(
2198
+ conn,
2199
+ row["message_id"],
2200
+ "dead",
2201
+ row["destination_name"],
2202
+ error,
2203
+ now,
2204
+ )
2205
+ await self._maybe_finalize_message(conn, row["message_id"], now)
2206
+ log.warning(
2207
+ "dead-lettered %d orphaned outbox row(s) at startup for missing destination(s): %s",
2208
+ len(orphans),
2209
+ ", ".join(sorted({r["destination_name"] for r in orphans})),
2210
+ )
2211
+ return len(orphans)
2212
+
2213
+ async def dead_letter_missing_handlers(
2214
+ self, valid_names: set[str], now: float | None = None
2215
+ ) -> int:
2216
+ """Dead-letter every non-terminal **routed** row whose ``handler_name`` left the registry
2217
+ (no transform worker can run it). Scoped to ``stage='routed'``. Returns the rows killed."""
2218
+ now = time.time() if now is None else now
2219
+ async with self._pool.acquire() as conn:
2220
+ async with conn.transaction():
2221
+ rows = await conn.fetch(
2222
+ "SELECT id, message_id, handler_name FROM queue"
2223
+ " WHERE stage=$1 AND status = ANY($2::text[])",
2224
+ Stage.ROUTED.value,
2225
+ [OutboxStatus.PENDING.value, OutboxStatus.INFLIGHT.value],
2226
+ )
2227
+ orphans = [r for r in rows if r["handler_name"] not in valid_names]
2228
+ if not orphans:
2229
+ return 0
2230
+ error = "handler removed from registry"
2231
+ # Pre-lock all affected messages' finalize locks in canonical order before the loop
2232
+ # finalizes any, so concurrent multi-message sweeps/cancels can't deadlock.
2233
+ await self._lock_finalize_batch(conn, (r["message_id"] for r in orphans))
2234
+ for row in orphans:
2235
+ await conn.execute(
2236
+ "UPDATE queue SET status=$1, next_attempt_at=$2, last_error=$3, updated_at=$4,"
2237
+ " owner=NULL, lease_expires_at=NULL WHERE id=$5",
2238
+ OutboxStatus.DEAD.value,
2239
+ now,
2240
+ self._enc(error),
2241
+ now,
2242
+ row["id"],
2243
+ )
2244
+ await self._event(conn, row["message_id"], "dead", None, error, now)
2245
+ await self._maybe_finalize_message(conn, row["message_id"], now)
2246
+ log.warning(
2247
+ "dead-lettered %d orphaned routed row(s) at startup for missing handler(s): %s",
2248
+ len(orphans),
2249
+ ", ".join(sorted({r["handler_name"] for r in orphans})),
2250
+ )
2251
+ return len(orphans)
2252
+
2253
+ async def replay(self, message_id: str, now: float | None = None) -> int:
2254
+ """Re-queue a message for re-processing/re-delivery (attempts reset). Two modes: **recover**
2255
+ any ``dead``/``pending`` row (never a ``done`` sibling — the M-2 hazard), else **re-send** the
2256
+ ``done`` rows. ``cancelled`` rows are never touched. Returns rows requeued."""
2257
+ now = time.time() if now is None else now
2258
+ async with self._pool.acquire() as conn:
2259
+ async with conn.transaction():
2260
+ stuck_row = await conn.fetchrow(
2261
+ "SELECT COUNT(*) AS n FROM queue WHERE message_id=$1 AND status = ANY($2::text[])",
2262
+ message_id,
2263
+ [OutboxStatus.DEAD.value, OutboxStatus.PENDING.value],
2264
+ )
2265
+ stuck = int(stuck_row["n"]) if stuck_row else 0
2266
+ replay_from = (
2267
+ [OutboxStatus.DEAD.value, OutboxStatus.PENDING.value]
2268
+ if stuck
2269
+ else [OutboxStatus.DONE.value]
2270
+ )
2271
+ result = await conn.execute(
2272
+ "UPDATE queue SET status=$1, attempts=0, next_attempt_at=$2, last_error=NULL,"
2273
+ " updated_at=$2 WHERE message_id=$3 AND status = ANY($4::text[])",
2274
+ OutboxStatus.PENDING.value,
2275
+ now,
2276
+ message_id,
2277
+ replay_from,
2278
+ )
2279
+ count = _rowcount(result)
2280
+ if count:
2281
+ pre = await conn.fetchrow(
2282
+ "SELECT 1 FROM queue WHERE message_id=$1 AND stage = ANY($2::text[])"
2283
+ " AND status=$3 LIMIT 1",
2284
+ message_id,
2285
+ [Stage.INGRESS.value, Stage.ROUTED.value],
2286
+ OutboxStatus.PENDING.value,
2287
+ )
2288
+ status = MessageStatus.RECEIVED.value if pre else MessageStatus.ROUTED.value
2289
+ await conn.execute(
2290
+ "UPDATE messages SET status=$1, error=NULL WHERE id=$2", status, message_id
2291
+ )
2292
+ await self._event(conn, message_id, "replayed", None, f"{count} row(s)", now)
2293
+ return count
2294
+
2295
+ async def replay_dead(
2296
+ self,
2297
+ *,
2298
+ channel_id: str | None = None,
2299
+ destination_name: str | None = None,
2300
+ now: float | None = None,
2301
+ ) -> int:
2302
+ """Re-queue dead-lettered **outbound** deliveries only (optionally scoped): set them back to
2303
+ ``pending`` with attempts reset, revert each affected message from ``error`` to ``routed``.
2304
+ Scoped to ``stage='outbound'`` to match the dead-letter view. Returns rows requeued."""
2305
+ now = time.time() if now is None else now
2306
+ async with self._pool.acquire() as conn:
2307
+ async with conn.transaction():
2308
+ ids = await conn.fetch(
2309
+ "SELECT DISTINCT message_id FROM queue WHERE stage=$1 AND status=$2"
2310
+ " AND ($3::text IS NULL OR channel_id=$3)"
2311
+ " AND ($4::text IS NULL OR destination_name=$4)",
2312
+ Stage.OUTBOUND.value,
2313
+ OutboxStatus.DEAD.value,
2314
+ channel_id,
2315
+ destination_name,
2316
+ )
2317
+ message_ids = [r["message_id"] for r in ids]
2318
+ if not message_ids:
2319
+ return 0
2320
+ result = await conn.execute(
2321
+ "UPDATE queue SET status=$1, attempts=0, next_attempt_at=$2, last_error=NULL,"
2322
+ " updated_at=$2 WHERE stage=$3 AND status=$4"
2323
+ " AND ($5::text IS NULL OR channel_id=$5)"
2324
+ " AND ($6::text IS NULL OR destination_name=$6)",
2325
+ OutboxStatus.PENDING.value,
2326
+ now,
2327
+ Stage.OUTBOUND.value,
2328
+ OutboxStatus.DEAD.value,
2329
+ channel_id,
2330
+ destination_name,
2331
+ )
2332
+ count = _rowcount(result)
2333
+ for message_id in message_ids:
2334
+ await conn.execute(
2335
+ "UPDATE messages SET status=$1, error=NULL WHERE id=$2 AND status=$3",
2336
+ MessageStatus.ROUTED.value,
2337
+ message_id,
2338
+ MessageStatus.ERROR.value,
2339
+ )
2340
+ await self._event(conn, message_id, "replayed", None, "dead-letter replay", now)
2341
+ return count
2342
+
2343
+ async def cancel_queued(
2344
+ self,
2345
+ channel_id: str | None,
2346
+ destination_name: str,
2347
+ *,
2348
+ top_only: bool = False,
2349
+ now: float | None = None,
2350
+ ) -> int:
2351
+ """Soft-cancel **pending** deliveries for a destination: mark them ``cancelled``, log a
2352
+ ``cancelled`` event each, and finalize any message whose deliveries are now all terminal.
2353
+ ``channel_id=None`` cancels across all producers; ``top_only`` cancels just the head. Returns
2354
+ the number cancelled."""
2355
+ now = time.time() if now is None else now
2356
+ query = (
2357
+ "SELECT id, message_id FROM queue"
2358
+ " WHERE destination_name=$1 AND status=$2 AND ($3::text IS NULL OR channel_id=$3)"
2359
+ " ORDER BY next_attempt_at, created_at"
2360
+ )
2361
+ if top_only:
2362
+ query += " LIMIT 1"
2363
+ async with self._pool.acquire() as conn:
2364
+ async with conn.transaction():
2365
+ rows = await conn.fetch(
2366
+ query, destination_name, OutboxStatus.PENDING.value, channel_id
2367
+ )
2368
+ if not rows:
2369
+ return 0
2370
+ ids = [r["id"] for r in rows]
2371
+ await conn.execute(
2372
+ "UPDATE queue SET status=$1, updated_at=$2 WHERE id = ANY($3::text[])",
2373
+ OutboxStatus.CANCELLED.value,
2374
+ now,
2375
+ ids,
2376
+ )
2377
+ for r in rows:
2378
+ await self._event(
2379
+ conn,
2380
+ r["message_id"],
2381
+ "cancelled",
2382
+ destination_name,
2383
+ "manual purge",
2384
+ now,
2385
+ )
2386
+ # Pre-lock every affected message's finalize lock in canonical order before finalizing
2387
+ # any, so two concurrent multi-message cancels can't form a lock cycle (deadlock).
2388
+ await self._lock_finalize_batch(conn, (r["message_id"] for r in rows))
2389
+ for message_id in {r["message_id"] for r in rows}:
2390
+ await self._maybe_finalize_message(conn, message_id, now)
2391
+ return len(ids)
2392
+
2393
+ # --- read helpers (API / console) ----------------------------------------
2394
+
2395
+ async def get_message(self, message_id: str) -> dict[str, Any] | None:
2396
+ record = await self._fetchone("SELECT * FROM messages WHERE id=$1", message_id)
2397
+ if record is None:
2398
+ return None
2399
+ d = dict(record)
2400
+ d["raw"] = self._cipher.decrypt(d["raw"]) # decrypt the body for display
2401
+ d["error"] = self._dec(d["error"]) # error may embed raw HL7 fragments (WP-5)
2402
+ return d
2403
+
2404
+ async def list_messages(
2405
+ self,
2406
+ *,
2407
+ channel_id: str | None = None,
2408
+ status: str | None = None,
2409
+ message_type: str | None = None,
2410
+ control_id: str | None = None,
2411
+ limit: int = 50,
2412
+ offset: int = 0,
2413
+ allowed_channels: Sequence[str] | None = None,
2414
+ ) -> list[dict[str, Any]]:
2415
+ """Most-recent-first message listing (metadata only — bodies omitted until a message is
2416
+ opened + audited). ``allowed_channels`` restricts to a per-channel RBAC scope."""
2417
+ where, params = self._message_filter(
2418
+ channel_id, status, message_type, control_id, allowed_channels
2419
+ )
2420
+ n = len(params)
2421
+ rows = await self._fetchall(
2422
+ "SELECT id, channel_id, received_at, source_type, control_id, message_type,"
2423
+ " status, error, summary, metadata,"
2424
+ " (SELECT event FROM message_events e WHERE e.message_id = messages.id"
2425
+ " ORDER BY e.id DESC LIMIT 1) AS last_event"
2426
+ f" FROM messages{where}"
2427
+ f" ORDER BY received_at DESC, id DESC LIMIT ${n + 1} OFFSET ${n + 2}",
2428
+ *params,
2429
+ limit,
2430
+ offset,
2431
+ )
2432
+ return [self._decode_record(r, "error") for r in rows]
2433
+
2434
+ async def count_messages(
2435
+ self,
2436
+ *,
2437
+ channel_id: str | None = None,
2438
+ status: str | None = None,
2439
+ message_type: str | None = None,
2440
+ control_id: str | None = None,
2441
+ allowed_channels: Sequence[str] | None = None,
2442
+ ) -> int:
2443
+ where, params = self._message_filter(
2444
+ channel_id, status, message_type, control_id, allowed_channels
2445
+ )
2446
+ row = await self._fetchone(f"SELECT COUNT(*) AS n FROM messages{where}", *params)
2447
+ return int(row["n"]) if row else 0
2448
+
2449
+ async def list_dead(
2450
+ self,
2451
+ *,
2452
+ channel_id: str | None = None,
2453
+ destination_name: str | None = None,
2454
+ limit: int = 50,
2455
+ offset: int = 0,
2456
+ allowed_channels: Sequence[str] | None = None,
2457
+ ) -> list[dict[str, Any]]:
2458
+ """Dead-lettered deliveries (one row per failed message→destination), newest first, joined
2459
+ with message metadata. Bodies omitted. ``allowed_channels`` restricts to a per-channel scope."""
2460
+ where, params = self._dead_filter(channel_id, destination_name, allowed_channels)
2461
+ n = len(params)
2462
+ rows = await self._fetchall(
2463
+ "SELECT o.id AS outbox_id, o.message_id, o.channel_id, o.destination_name,"
2464
+ " o.attempts, o.last_error, o.updated_at,"
2465
+ " m.control_id, m.message_type, m.received_at, m.summary"
2466
+ f" FROM queue o JOIN messages m ON m.id = o.message_id{where}"
2467
+ f" ORDER BY o.updated_at DESC, o.id DESC LIMIT ${n + 1} OFFSET ${n + 2}",
2468
+ *params,
2469
+ limit,
2470
+ offset,
2471
+ )
2472
+ return [self._decode_record(r, "last_error") for r in rows]
2473
+
2474
+ async def count_dead(
2475
+ self,
2476
+ *,
2477
+ channel_id: str | None = None,
2478
+ destination_name: str | None = None,
2479
+ allowed_channels: Sequence[str] | None = None,
2480
+ ) -> int:
2481
+ where, params = self._dead_filter(channel_id, destination_name, allowed_channels)
2482
+ row = await self._fetchone(f"SELECT COUNT(*) AS n FROM queue o{where}", *params)
2483
+ return int(row["n"]) if row else 0
2484
+
2485
+ @staticmethod
2486
+ def _message_filter(
2487
+ channel_id: str | None,
2488
+ status: str | None,
2489
+ message_type: str | None,
2490
+ control_id: str | None,
2491
+ allowed_channels: Sequence[str] | None = None,
2492
+ ) -> tuple[str, list[Any]]:
2493
+ clauses: list[str] = []
2494
+ params: list[Any] = []
2495
+ for column, value in (
2496
+ ("channel_id", channel_id),
2497
+ ("status", status),
2498
+ ("message_type", message_type),
2499
+ ("control_id", control_id),
2500
+ ):
2501
+ if value is not None:
2502
+ params.append(value)
2503
+ clauses.append(f"{column}=${len(params)}")
2504
+ _append_channel_scope_pg(clauses, params, "channel_id", allowed_channels)
2505
+ where = f" WHERE {' AND '.join(clauses)}" if clauses else ""
2506
+ return where, params
2507
+
2508
+ @staticmethod
2509
+ def _dead_filter(
2510
+ channel_id: str | None,
2511
+ destination_name: str | None,
2512
+ allowed_channels: Sequence[str] | None = None,
2513
+ ) -> tuple[str, list[Any]]:
2514
+ # Scoped to outbound DEAD rows — the per-destination delivery DLQ.
2515
+ params: list[Any] = [Stage.OUTBOUND.value, OutboxStatus.DEAD.value]
2516
+ clauses = ["o.stage=$1", "o.status=$2"]
2517
+ if channel_id is not None:
2518
+ params.append(channel_id)
2519
+ clauses.append(f"o.channel_id=${len(params)}")
2520
+ if destination_name is not None:
2521
+ params.append(destination_name)
2522
+ clauses.append(f"o.destination_name=${len(params)}")
2523
+ _append_channel_scope_pg(clauses, params, "o.channel_id", allowed_channels)
2524
+ return f" WHERE {' AND '.join(clauses)}", params
2525
+
2526
+ async def outbox_for(self, message_id: str) -> list[dict[str, Any]]:
2527
+ """The outbound deliveries for a message (one row per destination). Scoped to
2528
+ ``stage='outbound'`` — the transient ingress/routed rows aren't deliveries."""
2529
+ rows = await self._fetchall(
2530
+ "SELECT * FROM queue WHERE message_id=$1 AND stage=$2 ORDER BY destination_name",
2531
+ message_id,
2532
+ Stage.OUTBOUND.value,
2533
+ )
2534
+ # Only last_error is decrypted here; the encrypted `payload` body is left as-is on purpose —
2535
+ # bodies come through get_message (audited). Don't add `payload` to this projection's decrypt.
2536
+ return [self._decode_record(r, "last_error") for r in rows]
2537
+
2538
+ async def outbox_payloads_for(self, message_id: str) -> list[dict[str, Any]]:
2539
+ """Like :meth:`outbox_for`, but also decrypts the transformed ``payload`` (PHI body) for the
2540
+ parity-comparison read path (#14). A separate method so ``outbox_for`` (message-detail
2541
+ metadata) never decrypts bodies; the API gates this on ``MESSAGES_VIEW_RAW`` and audits it."""
2542
+ rows = await self._fetchall(
2543
+ "SELECT * FROM queue WHERE message_id=$1 AND stage=$2 ORDER BY destination_name",
2544
+ message_id,
2545
+ Stage.OUTBOUND.value,
2546
+ )
2547
+ return [self._decode_record(r, "last_error", "payload") for r in rows]
2548
+
2549
+ async def events_for(self, message_id: str) -> list[dict[str, Any]]:
2550
+ rows = await self._fetchall(
2551
+ "SELECT * FROM message_events WHERE message_id=$1 ORDER BY id", message_id
2552
+ )
2553
+ return [self._decode_record(r, "detail") for r in rows]
2554
+
2555
+ async def stats(self) -> dict[str, int]:
2556
+ """Outbound-queue depth by status (scoped to outbound rows — the delivery backlog)."""
2557
+ rows = await self._fetchall(
2558
+ "SELECT status, COUNT(*) AS n FROM queue WHERE stage=$1 GROUP BY status",
2559
+ Stage.OUTBOUND.value,
2560
+ )
2561
+ return {r["status"]: int(r["n"]) for r in rows}
2562
+
2563
+ async def in_pipeline_depth(self) -> int:
2564
+ """NOT-DONE rows (``pending``|``inflight``) across every stage — the whole-pipeline drain gauge."""
2565
+ rows = await self._fetchall(
2566
+ "SELECT COUNT(*) AS n FROM queue WHERE stage IN ($1,$2,$3) AND status IN ($4,$5)",
2567
+ Stage.INGRESS.value,
2568
+ Stage.ROUTED.value,
2569
+ Stage.OUTBOUND.value,
2570
+ OutboxStatus.PENDING.value,
2571
+ OutboxStatus.INFLIGHT.value,
2572
+ )
2573
+ return int(rows[0]["n"]) if rows else 0
2574
+
2575
+ # --- audit log -----------------------------------------------------------
2576
+
2577
+ async def record_view(
2578
+ self, message_id: str, *, actor: str | None = None, now: float | None = None
2579
+ ) -> None:
2580
+ """Append a ``viewed`` audit event (called whenever a message body / PHI is opened)."""
2581
+ now = time.time() if now is None else now
2582
+ async with self._pool.acquire() as conn:
2583
+ async with conn.transaction():
2584
+ await self._event(conn, message_id, "viewed", None, actor or "", now)
2585
+
2586
+ async def record_audit(
2587
+ self,
2588
+ action: str,
2589
+ *,
2590
+ actor: str | None = None,
2591
+ channel_id: str | None = None,
2592
+ detail: str | None = None,
2593
+ now: float | None = None,
2594
+ ) -> None:
2595
+ """Append a row to the audit hash chain. H-7: takes the audit-chain advisory lock first, so
2596
+ concurrent writers serialize on the read-tail + insert and can't fork the chain.
2597
+
2598
+ After the row commits, a **PHI-safe metadata copy** is teed off-box via the shared
2599
+ :func:`~messagefoundry.store.audit_tee.emit_audit_tee` (sec-offbox-log) — the same redaction
2600
+ path the SQLite and SQL Server backends use."""
2601
+ now = time.time() if now is None else now
2602
+ async with self._pool.acquire() as conn:
2603
+ async with conn.transaction():
2604
+ await self._advisory_lock(conn, _LOCK_CLASS_AUDIT, _AUDIT_LOCK)
2605
+ last = await conn.fetchrow(
2606
+ "SELECT row_hash FROM audit_log ORDER BY id DESC LIMIT 1"
2607
+ )
2608
+ prev = last["row_hash"] if last and last["row_hash"] else ""
2609
+ row_hash = audit_row_hash(
2610
+ prev, ts=now, actor=actor, action=action, channel_id=channel_id, detail=detail
2611
+ )
2612
+ await conn.execute(
2613
+ "INSERT INTO audit_log (ts, actor, action, channel_id, detail, row_hash)"
2614
+ " VALUES ($1,$2,$3,$4,$5,$6)",
2615
+ now,
2616
+ actor,
2617
+ action,
2618
+ channel_id,
2619
+ detail,
2620
+ row_hash,
2621
+ )
2622
+ # Tee off-box AFTER the transaction commits + the connection is released (only forward what
2623
+ # truly persisted; never hold the advisory lock / a pooled connection across a syslog send).
2624
+ emit_audit_tee(action=action, actor=actor, channel_id=channel_id, detail=detail, ts=now)
2625
+
2626
+ async def list_audit(self, *, limit: int = 50) -> Sequence[Row]:
2627
+ """Most-recent-first audit entries (for review tooling / tests)."""
2628
+ return await self._fetchall("SELECT * FROM audit_log ORDER BY id DESC LIMIT $1", limit)
2629
+
2630
+ async def security_events_for_user(self, username: str, *, limit: int = 100) -> Sequence[Row]:
2631
+ """A user's own security events (``auth.*``), most-recent-first — for ``GET
2632
+ /me/security-events`` (ASVS 6.3.5/6.3.7); admin-initiated changes go out-of-band by email."""
2633
+ return await self._fetchall(
2634
+ "SELECT ts, action, detail FROM audit_log "
2635
+ "WHERE actor = $1 AND action LIKE 'auth.%' ORDER BY id DESC LIMIT $2",
2636
+ username,
2637
+ limit,
2638
+ )
2639
+
2640
+ # --- dual-control approvals (ASVS 2.3.5) ---------------------------------
2641
+
2642
+ async def create_pending_approval(
2643
+ self,
2644
+ *,
2645
+ approval_id: str,
2646
+ operation: str,
2647
+ params: str,
2648
+ requester: str,
2649
+ requested_at: float,
2650
+ expires_at: float | None,
2651
+ ) -> None:
2652
+ """Persist a high-value action awaiting a distinct second approver (dual-control, 2.3.5)."""
2653
+ await self._execute(
2654
+ "INSERT INTO pending_approvals "
2655
+ "(id, operation, params, requester, requested_at, status, expires_at) "
2656
+ "VALUES ($1,$2,$3,$4,$5,'pending',$6)",
2657
+ approval_id,
2658
+ operation,
2659
+ params,
2660
+ requester,
2661
+ requested_at,
2662
+ expires_at,
2663
+ )
2664
+
2665
+ async def get_pending_approval(self, approval_id: str) -> Row | None:
2666
+ row: Row | None = await self._fetchone(
2667
+ "SELECT id, operation, params, requester, requested_at, status, approver, decided_at,"
2668
+ " expires_at FROM pending_approvals WHERE id = $1",
2669
+ approval_id,
2670
+ )
2671
+ return row
2672
+
2673
+ async def list_pending_approvals(self, *, now: float, limit: int = 100) -> Sequence[Row]:
2674
+ """Open (still-``pending``, unexpired) approval requests, newest-first."""
2675
+ return await self._fetchall(
2676
+ "SELECT id, operation, params, requester, requested_at, status, approver, decided_at,"
2677
+ " expires_at FROM pending_approvals"
2678
+ " WHERE status = 'pending' AND (expires_at IS NULL OR expires_at > $1)"
2679
+ " ORDER BY requested_at DESC LIMIT $2",
2680
+ now,
2681
+ limit,
2682
+ )
2683
+
2684
+ async def decide_pending_approval(
2685
+ self, approval_id: str, *, status: str, approver: str | None, decided_at: float
2686
+ ) -> bool:
2687
+ """Atomically move a still-``pending`` request to ``status`` (approved/rejected/expired).
2688
+ Returns ``True`` iff this call made the transition — guards against a double decision."""
2689
+ result = await self._pool.execute(
2690
+ "UPDATE pending_approvals SET status = $1, approver = $2, decided_at = $3"
2691
+ " WHERE id = $4 AND status = 'pending'",
2692
+ status,
2693
+ approver,
2694
+ decided_at,
2695
+ approval_id,
2696
+ )
2697
+ return _rowcount(result) > 0
2698
+
2699
+ async def audit_anchor(self) -> tuple[int, str]:
2700
+ """The audit log's external anchor — ``(row_count, head_hash)`` (head ``""`` when empty)."""
2701
+ row = await self._fetchone(
2702
+ "SELECT COUNT(*) AS n, "
2703
+ "(SELECT row_hash FROM audit_log ORDER BY id DESC LIMIT 1) AS head FROM audit_log"
2704
+ )
2705
+ if row is None:
2706
+ return 0, ""
2707
+ return int(row["n"]), (row["head"] or "")
2708
+
2709
+ async def verify_audit_chain(
2710
+ self, *, expected_anchor: tuple[int, str] | None = None
2711
+ ) -> tuple[bool, str | None]:
2712
+ """Recompute the audit hash-chain in order; returns ``(ok, message)``. Pass ``expected_anchor``
2713
+ from :meth:`audit_anchor` (held out-of-band) to also detect tail-truncation."""
2714
+ rows = await self._fetchall(
2715
+ "SELECT id, ts, actor, action, channel_id, detail, row_hash FROM audit_log ORDER BY id"
2716
+ )
2717
+ prev = ""
2718
+ count = 0
2719
+ for r in rows:
2720
+ expected = audit_row_hash(
2721
+ prev,
2722
+ ts=r["ts"],
2723
+ actor=r["actor"],
2724
+ action=r["action"],
2725
+ channel_id=r["channel_id"],
2726
+ detail=r["detail"],
2727
+ )
2728
+ if r["row_hash"] != expected:
2729
+ return False, f"audit chain broken at row id={r['id']}"
2730
+ prev = r["row_hash"]
2731
+ count += 1
2732
+ if expected_anchor is not None:
2733
+ exp_count, exp_head = expected_anchor
2734
+ if count < exp_count or prev != exp_head:
2735
+ return (
2736
+ False,
2737
+ f"audit log diverges from recorded anchor (have {count} row(s) head {prev[:12]!r}, "
2738
+ f"expected {exp_count} head {exp_head[:12]!r}) — truncated or rewritten",
2739
+ )
2740
+ return True, f"verified {count} audit row(s)"
2741
+
2742
+ # --- auth: users / roles / sessions --------------------------------------
2743
+
2744
+ async def create_user(
2745
+ self,
2746
+ *,
2747
+ user_id: str,
2748
+ username: str,
2749
+ auth_provider: str,
2750
+ display_name: str | None = None,
2751
+ email: str | None = None,
2752
+ password_hash: str | None = None,
2753
+ must_change_password: bool = False,
2754
+ now: float | None = None,
2755
+ ) -> None:
2756
+ now = time.time() if now is None else now
2757
+ await self._execute(
2758
+ "INSERT INTO users (id, username, auth_provider, display_name, email, disabled,"
2759
+ " created_at, updated_at, last_login_at, password_hash, password_changed_at,"
2760
+ " must_change_password, failed_attempts, locked_until)"
2761
+ " VALUES ($1,$2,$3,$4,$5,FALSE,$6,$6,NULL,$7,$8,$9,0,NULL)",
2762
+ user_id,
2763
+ username,
2764
+ auth_provider,
2765
+ display_name,
2766
+ email,
2767
+ now,
2768
+ password_hash,
2769
+ now if password_hash is not None else None,
2770
+ must_change_password,
2771
+ )
2772
+
2773
+ async def get_user(self, user_id: str) -> UserRecord | None:
2774
+ d = await self._fetchone("SELECT * FROM users WHERE id=$1", user_id)
2775
+ return UserRecord.from_mapping(dict(d)) if d else None
2776
+
2777
+ async def get_user_by_username(self, username: str) -> UserRecord | None:
2778
+ d = await self._fetchone("SELECT * FROM users WHERE username=$1", username)
2779
+ return UserRecord.from_mapping(dict(d)) if d else None
2780
+
2781
+ async def list_users(self) -> list[UserRecord]:
2782
+ rows = await self._fetchall("SELECT * FROM users ORDER BY username")
2783
+ return [UserRecord.from_mapping(dict(r)) for r in rows]
2784
+
2785
+ async def count_users(self) -> int:
2786
+ return await self._count("users")
2787
+
2788
+ async def set_password(
2789
+ self,
2790
+ user_id: str,
2791
+ *,
2792
+ password_hash: str,
2793
+ must_change_password: bool = False,
2794
+ now: float | None = None,
2795
+ ) -> None:
2796
+ now = time.time() if now is None else now
2797
+ await self._execute(
2798
+ "UPDATE users SET password_hash=$1, password_changed_at=$2, must_change_password=$3,"
2799
+ " failed_attempts=0, locked_until=NULL, updated_at=$2 WHERE id=$4",
2800
+ password_hash,
2801
+ now,
2802
+ must_change_password,
2803
+ user_id,
2804
+ )
2805
+
2806
+ # --- MFA: native TOTP second factor (local accounts, WP-14) --------------
2807
+
2808
+ async def set_totp_secret(
2809
+ self, user_id: str, *, secret: str | None, now: float | None = None
2810
+ ) -> None:
2811
+ """Stage (or clear) a user's base32 TOTP secret, store-cipher encrypted. Does not enable MFA."""
2812
+ now = time.time() if now is None else now
2813
+ await self._execute(
2814
+ "UPDATE users SET totp_secret=$1, updated_at=$2 WHERE id=$3",
2815
+ self._enc(secret),
2816
+ now,
2817
+ user_id,
2818
+ )
2819
+
2820
+ async def get_totp_secret(self, user_id: str) -> str | None:
2821
+ d = await self._fetchone("SELECT totp_secret FROM users WHERE id=$1", user_id)
2822
+ if not d or d["totp_secret"] is None:
2823
+ return None
2824
+ return self._dec(d["totp_secret"])
2825
+
2826
+ async def enable_totp(
2827
+ self, user_id: str, *, recovery_code_hashes: list[str], now: float | None = None
2828
+ ) -> None:
2829
+ now = time.time() if now is None else now
2830
+ await self._execute(
2831
+ "UPDATE users SET totp_enabled=TRUE, totp_enrolled_at=$1, totp_recovery_codes=$2,"
2832
+ " updated_at=$1 WHERE id=$3",
2833
+ now,
2834
+ json.dumps(recovery_code_hashes),
2835
+ user_id,
2836
+ )
2837
+
2838
+ async def disable_totp(self, user_id: str, *, now: float | None = None) -> None:
2839
+ now = time.time() if now is None else now
2840
+ await self._execute(
2841
+ "UPDATE users SET totp_secret=NULL, totp_enabled=FALSE, totp_enrolled_at=NULL,"
2842
+ " totp_recovery_codes=NULL, updated_at=$1 WHERE id=$2",
2843
+ now,
2844
+ user_id,
2845
+ )
2846
+
2847
+ async def get_recovery_code_hashes(self, user_id: str) -> list[str]:
2848
+ d = await self._fetchone("SELECT totp_recovery_codes FROM users WHERE id=$1", user_id)
2849
+ if not d or d["totp_recovery_codes"] is None:
2850
+ return []
2851
+ return [str(h) for h in json.loads(d["totp_recovery_codes"])]
2852
+
2853
+ async def consume_recovery_code_hash(
2854
+ self, user_id: str, code_hash: str, *, now: float | None = None
2855
+ ) -> bool:
2856
+ """Atomically remove one recovery-code hash; ``True`` iff present. The ``SELECT ... FOR UPDATE``
2857
+ + ``UPDATE`` run in one transaction, so concurrent verifications (even cross-node) can't
2858
+ double-spend a single-use recovery code (WP-14)."""
2859
+ now = time.time() if now is None else now
2860
+ async with self._pool.acquire() as conn:
2861
+ async with conn.transaction():
2862
+ row = await conn.fetchrow(
2863
+ "SELECT totp_recovery_codes FROM users WHERE id=$1 FOR UPDATE", user_id
2864
+ )
2865
+ if row is None or row["totp_recovery_codes"] is None:
2866
+ return False
2867
+ hashes = [str(h) for h in json.loads(row["totp_recovery_codes"])]
2868
+ if code_hash not in hashes:
2869
+ return False # already consumed by a concurrent caller
2870
+ hashes.remove(code_hash)
2871
+ await conn.execute(
2872
+ "UPDATE users SET totp_recovery_codes=$1, updated_at=$2 WHERE id=$3",
2873
+ json.dumps(hashes),
2874
+ now,
2875
+ user_id,
2876
+ )
2877
+ return True
2878
+
2879
+ async def consume_totp_step(self, user_id: str, step: int) -> bool:
2880
+ """Atomically record ``step`` as the user's highest consumed TOTP time-step; ``True`` iff newly
2881
+ consumed (strictly greater than any prior step). A code replayed inside its ±1-step verify
2882
+ window resolves to a non-greater step and returns ``False`` — single-use per ASVS 6.5.1. The
2883
+ ``SELECT ... FOR UPDATE`` + ``UPDATE`` run in one transaction (no cross-node double-spend)."""
2884
+ async with self._pool.acquire() as conn:
2885
+ async with conn.transaction():
2886
+ row = await conn.fetchrow(
2887
+ "SELECT last_totp_step FROM users WHERE id=$1 FOR UPDATE", user_id
2888
+ )
2889
+ if row is None:
2890
+ return False
2891
+ last = row["last_totp_step"]
2892
+ if last is not None and last >= step:
2893
+ return False # already consumed (or an older step) — replay within the window
2894
+ await conn.execute("UPDATE users SET last_totp_step=$1 WHERE id=$2", step, user_id)
2895
+ return True
2896
+
2897
+ async def set_user_disabled(
2898
+ self, user_id: str, *, disabled: bool, now: float | None = None
2899
+ ) -> None:
2900
+ now = time.time() if now is None else now
2901
+ await self._execute(
2902
+ "UPDATE users SET disabled=$1, updated_at=$2 WHERE id=$3", disabled, now, user_id
2903
+ )
2904
+
2905
+ async def update_user_profile(
2906
+ self,
2907
+ user_id: str,
2908
+ *,
2909
+ display_name: str | None,
2910
+ email: str | None,
2911
+ now: float | None = None,
2912
+ ) -> None:
2913
+ now = time.time() if now is None else now
2914
+ await self._execute(
2915
+ "UPDATE users SET display_name=$1, email=$2, updated_at=$3 WHERE id=$4",
2916
+ display_name,
2917
+ email,
2918
+ now,
2919
+ user_id,
2920
+ )
2921
+
2922
+ async def delete_user(self, user_id: str) -> None:
2923
+ async with self._pool.acquire() as conn:
2924
+ async with conn.transaction():
2925
+ await conn.execute("DELETE FROM user_roles WHERE user_id=$1", user_id)
2926
+ await conn.execute("DELETE FROM sessions WHERE user_id=$1", user_id)
2927
+ await conn.execute("DELETE FROM users WHERE id=$1", user_id)
2928
+
2929
+ async def record_login_success(self, user_id: str, *, now: float | None = None) -> None:
2930
+ now = time.time() if now is None else now
2931
+ await self._execute(
2932
+ "UPDATE users SET last_login_at=$1, failed_attempts=0, locked_until=NULL,"
2933
+ " updated_at=$1 WHERE id=$2",
2934
+ now,
2935
+ user_id,
2936
+ )
2937
+
2938
+ async def record_login_failure(
2939
+ self,
2940
+ user_id: str,
2941
+ *,
2942
+ failed_attempts: int,
2943
+ locked_until: float | None,
2944
+ now: float | None = None,
2945
+ ) -> None:
2946
+ now = time.time() if now is None else now
2947
+ await self._execute(
2948
+ "UPDATE users SET failed_attempts=$1, locked_until=$2, updated_at=$3 WHERE id=$4",
2949
+ failed_attempts,
2950
+ locked_until,
2951
+ now,
2952
+ user_id,
2953
+ )
2954
+
2955
+ async def upsert_role(
2956
+ self,
2957
+ *,
2958
+ role_id: str,
2959
+ display_name: str,
2960
+ description: str | None = None,
2961
+ builtin: bool = True,
2962
+ ) -> None:
2963
+ await self._execute(
2964
+ "INSERT INTO roles (id, display_name, description, builtin) VALUES ($1,$2,$3,$4)"
2965
+ " ON CONFLICT (id) DO UPDATE SET display_name=excluded.display_name,"
2966
+ " description=excluded.description, builtin=excluded.builtin",
2967
+ role_id,
2968
+ display_name,
2969
+ description,
2970
+ builtin,
2971
+ )
2972
+
2973
+ async def list_roles(self) -> Sequence[Row]:
2974
+ return await self._fetchall("SELECT * FROM roles ORDER BY id")
2975
+
2976
+ async def get_user_role_ids(self, user_id: str) -> list[str]:
2977
+ rows = await self._fetchall(
2978
+ "SELECT role_id FROM user_roles WHERE user_id=$1 ORDER BY role_id", user_id
2979
+ )
2980
+ return [str(r["role_id"]) for r in rows]
2981
+
2982
+ async def set_user_roles(
2983
+ self,
2984
+ user_id: str,
2985
+ role_ids: Sequence[str],
2986
+ *,
2987
+ assigned_by: str | None = None,
2988
+ now: float | None = None,
2989
+ ) -> None:
2990
+ now = time.time() if now is None else now
2991
+ async with self._pool.acquire() as conn:
2992
+ async with conn.transaction():
2993
+ await conn.execute("DELETE FROM user_roles WHERE user_id=$1", user_id)
2994
+ for role_id in role_ids:
2995
+ await conn.execute(
2996
+ "INSERT INTO user_roles (user_id, role_id, assigned_at, assigned_by)"
2997
+ " VALUES ($1,$2,$3,$4)",
2998
+ user_id,
2999
+ role_id,
3000
+ now,
3001
+ assigned_by,
3002
+ )
3003
+
3004
+ async def set_user_channel_scope(
3005
+ self, user_id: str, scope_json: str | None, *, now: float | None = None
3006
+ ) -> None:
3007
+ """Set a user's per-channel scope (JSON list of connection names, or ``None`` = all)."""
3008
+ now = time.time() if now is None else now
3009
+ await self._execute(
3010
+ "UPDATE users SET channel_scope=$1, updated_at=$2 WHERE id=$3", scope_json, now, user_id
3011
+ )
3012
+
3013
+ async def roles_for_ad_groups(self, groups: Iterable[str]) -> set[str]:
3014
+ normalized = sorted({g.strip().lower() for g in groups if g.strip()})
3015
+ if not normalized:
3016
+ return set()
3017
+ rows = await self._fetchall(
3018
+ "SELECT DISTINCT role_id FROM ad_group_role_map WHERE ad_group = ANY($1::text[])",
3019
+ normalized,
3020
+ )
3021
+ return {str(r["role_id"]) for r in rows}
3022
+
3023
+ async def list_ad_group_role_map(self) -> Sequence[Row]:
3024
+ return await self._fetchall(
3025
+ "SELECT ad_group, role_id FROM ad_group_role_map ORDER BY ad_group, role_id"
3026
+ )
3027
+
3028
+ async def set_ad_group_role_map(self, entries: Iterable[tuple[str, str]]) -> None:
3029
+ pairs = sorted({(g.strip().lower(), r) for g, r in entries if g.strip()})
3030
+ async with self._pool.acquire() as conn:
3031
+ async with conn.transaction():
3032
+ await conn.execute("DELETE FROM ad_group_role_map")
3033
+ for ad_group, role_id in pairs:
3034
+ await conn.execute(
3035
+ "INSERT INTO ad_group_role_map (ad_group, role_id) VALUES ($1,$2)",
3036
+ ad_group,
3037
+ role_id,
3038
+ )
3039
+
3040
+ async def channels_for_ad_groups(self, groups: Iterable[str]) -> set[str]:
3041
+ normalized = sorted({g.strip().lower() for g in groups if g.strip()})
3042
+ if not normalized:
3043
+ return set()
3044
+ rows = await self._fetchall(
3045
+ "SELECT DISTINCT channel FROM ad_group_scope_map WHERE ad_group = ANY($1::text[])",
3046
+ normalized,
3047
+ )
3048
+ return {str(r["channel"]) for r in rows}
3049
+
3050
+ async def list_ad_group_scope_map(self) -> Sequence[Row]:
3051
+ return await self._fetchall(
3052
+ "SELECT ad_group, channel FROM ad_group_scope_map ORDER BY ad_group, channel"
3053
+ )
3054
+
3055
+ async def set_ad_group_scope_map(self, entries: Iterable[tuple[str, str]]) -> None:
3056
+ pairs = sorted(
3057
+ {(g.strip().lower(), c.strip()) for g, c in entries if g.strip() and c.strip()}
3058
+ )
3059
+ async with self._pool.acquire() as conn:
3060
+ async with conn.transaction():
3061
+ await conn.execute("DELETE FROM ad_group_scope_map")
3062
+ for ad_group, channel in pairs:
3063
+ await conn.execute(
3064
+ "INSERT INTO ad_group_scope_map (ad_group, channel) VALUES ($1,$2)",
3065
+ ad_group,
3066
+ channel,
3067
+ )
3068
+
3069
+ async def create_session(
3070
+ self,
3071
+ *,
3072
+ token_hash: str,
3073
+ user_id: str,
3074
+ expires_at: float,
3075
+ client: str | None = None,
3076
+ seed_reauth: bool = True,
3077
+ now: float | None = None,
3078
+ ) -> None:
3079
+ now = time.time() if now is None else now
3080
+ await self._execute(
3081
+ # reauth_at ($6) seeds the step-up window from login (ASVS 7.5.3); seed_reauth=False leaves
3082
+ # it NULL for an MFA-PENDING session (WP-14) so a stolen pre-MFA token can't enroll/step-up.
3083
+ "INSERT INTO sessions (token_hash, user_id, created_at, expires_at, last_used_at,"
3084
+ " revoked_at, client, reauth_at) VALUES ($1,$2,$3,$4,$3,NULL,$5,$6)",
3085
+ token_hash,
3086
+ user_id,
3087
+ now,
3088
+ expires_at,
3089
+ client,
3090
+ now if seed_reauth else None,
3091
+ )
3092
+
3093
+ async def get_session(self, token_hash: str) -> SessionRecord | None:
3094
+ d = await self._fetchone("SELECT * FROM sessions WHERE token_hash=$1", token_hash)
3095
+ return SessionRecord.from_mapping(dict(d)) if d else None
3096
+
3097
+ async def list_sessions(self, user_id: str, *, now: float | None = None) -> list[SessionRecord]:
3098
+ """A user's active (not revoked/expired) sessions, most-recently-used first (WP-10)."""
3099
+ now = time.time() if now is None else now
3100
+ rows = await self._fetchall(
3101
+ "SELECT * FROM sessions WHERE user_id=$1 AND revoked_at IS NULL AND expires_at > $2"
3102
+ " ORDER BY last_used_at DESC",
3103
+ user_id,
3104
+ now,
3105
+ )
3106
+ return [SessionRecord.from_mapping(dict(r)) for r in rows]
3107
+
3108
+ async def touch_session(self, token_hash: str, *, now: float | None = None) -> None:
3109
+ now = time.time() if now is None else now
3110
+ await self._execute(
3111
+ "UPDATE sessions SET last_used_at=$1 WHERE token_hash=$2", now, token_hash
3112
+ )
3113
+
3114
+ async def mark_session_reauthed(
3115
+ self, token_hash: str, *, now: float | None = None, client: str | None = None
3116
+ ) -> None:
3117
+ now = time.time() if now is None else now
3118
+ # COALESCE keeps the stored client when none is supplied; a re-verify carrying the current
3119
+ # address re-anchors the session to it (WP-L3-13 new-client-IP step-up).
3120
+ await self._execute(
3121
+ "UPDATE sessions SET reauth_at=$1, client=COALESCE($2, client) WHERE token_hash=$3",
3122
+ now,
3123
+ client,
3124
+ token_hash,
3125
+ )
3126
+
3127
+ async def mark_session_mfa_verified(self, token_hash: str, *, now: float | None = None) -> None:
3128
+ now = time.time() if now is None else now
3129
+ await self._execute(
3130
+ "UPDATE sessions SET mfa_verified_at=$1 WHERE token_hash=$2", now, token_hash
3131
+ )
3132
+
3133
+ async def revoke_session(self, token_hash: str, *, now: float | None = None) -> None:
3134
+ now = time.time() if now is None else now
3135
+ await self._execute(
3136
+ "UPDATE sessions SET revoked_at=$1 WHERE token_hash=$2 AND revoked_at IS NULL",
3137
+ now,
3138
+ token_hash,
3139
+ )
3140
+
3141
+ async def revoke_user_sessions(
3142
+ self, user_id: str, *, except_token_hash: str | None = None, now: float | None = None
3143
+ ) -> int:
3144
+ """Revoke a user's active sessions (all, or all but ``except_token_hash``). Returns the count."""
3145
+ now = time.time() if now is None else now
3146
+ result = await self._pool.execute(
3147
+ "UPDATE sessions SET revoked_at=$1 WHERE user_id=$2 AND revoked_at IS NULL"
3148
+ " AND ($3::text IS NULL OR token_hash != $3)",
3149
+ now,
3150
+ user_id,
3151
+ except_token_hash,
3152
+ )
3153
+ return _rowcount(result)
3154
+
3155
+ async def enforce_session_cap(
3156
+ self, user_id: str, *, keep: int, now: float | None = None
3157
+ ) -> None:
3158
+ """Revoke a user's active sessions beyond the ``keep`` most recently created (AUTH-SESS-CAP)."""
3159
+ if keep <= 0:
3160
+ return
3161
+ now = time.time() if now is None else now
3162
+ await self._execute(
3163
+ "UPDATE sessions SET revoked_at=$1 WHERE user_id=$2 AND revoked_at IS NULL"
3164
+ " AND token_hash NOT IN ("
3165
+ " SELECT token_hash FROM sessions WHERE user_id=$2 AND revoked_at IS NULL"
3166
+ " ORDER BY created_at DESC, token_hash DESC LIMIT $3"
3167
+ ")",
3168
+ now,
3169
+ user_id,
3170
+ keep,
3171
+ )
3172
+
3173
+ async def purge_expired_sessions(self, *, now: float | None = None) -> int:
3174
+ now = time.time() if now is None else now
3175
+ result = await self._pool.execute("DELETE FROM sessions WHERE expires_at < $1", now)
3176
+ return _rowcount(result)
3177
+
3178
+ # --- retention / purge + maintenance (PHI.md §8) -------------------------
3179
+
3180
+ async def purge_message_bodies(self, *, older_than: float, now: float | None = None) -> int:
3181
+ """Null the PHI **bodies** of fully-resolved messages received before ``older_than`` while
3182
+ keeping their metadata rows (the Mirth Data-Pruner pattern). Eligible only when the message has
3183
+ no queue row still ``pending``/``inflight``. Ported, not stubbed — Postgres supports retention.
3184
+ Returns the number of messages whose body was nulled."""
3185
+ now = time.time() if now is None else now
3186
+ inflight = [OutboxStatus.PENDING.value, OutboxStatus.INFLIGHT.value]
3187
+ # A message past the cutoff with nothing still in flight. This subquery is embedded in three
3188
+ # UPDATEs below; it consumes exactly $1 (older_than) and $2 (inflight[]), so each outer query
3189
+ # must keep passing those two FIRST and continue its own binds from $3. Don't add/remove a bind
3190
+ # here without re-numbering the outer queries.
3191
+ eligible = (
3192
+ "SELECT id FROM messages m WHERE m.received_at < $1"
3193
+ " AND NOT EXISTS (SELECT 1 FROM queue q WHERE q.message_id = m.id"
3194
+ " AND q.status = ANY($2::text[]))"
3195
+ )
3196
+ async with self._pool.acquire() as conn:
3197
+ async with conn.transaction():
3198
+ result = await conn.execute(
3199
+ f"UPDATE messages SET raw='', summary=NULL, error=NULL"
3200
+ f" WHERE raw <> '' AND id IN ({eligible})",
3201
+ older_than,
3202
+ inflight,
3203
+ )
3204
+ purged = _rowcount(result)
3205
+ await conn.execute(
3206
+ f"UPDATE queue SET payload='', last_error=NULL"
3207
+ f" WHERE stage=$3 AND status = ANY($4::text[]) AND payload <> ''"
3208
+ f" AND message_id IN ({eligible})",
3209
+ older_than,
3210
+ inflight,
3211
+ Stage.OUTBOUND.value,
3212
+ [OutboxStatus.DONE.value, OutboxStatus.CANCELLED.value],
3213
+ )
3214
+ await conn.execute(
3215
+ f"UPDATE message_events SET detail=NULL"
3216
+ f" WHERE detail IS NOT NULL AND message_id IN ({eligible})",
3217
+ older_than,
3218
+ inflight,
3219
+ )
3220
+ # Captured replies (ADR 0013) are PHI on the same window as the body; null in place
3221
+ # (row kept, FK to messages(id) never violated — purge keeps the messages row).
3222
+ await conn.execute(
3223
+ f"UPDATE response SET body=NULL, detail=NULL"
3224
+ f" WHERE (body IS NOT NULL OR detail IS NOT NULL) AND message_id IN ({eligible})",
3225
+ older_than,
3226
+ inflight,
3227
+ )
3228
+ return purged
3229
+
3230
+ async def purge_dead_letters(self, *, older_than: float, now: float | None = None) -> int:
3231
+ """Null the bodies of dead-lettered **outbound** rows last updated before ``older_than`` (their
3232
+ own retention window). Keeps the row + ``dead`` status; blanks ``payload`` + ``last_error``.
3233
+ Ported, not stubbed. Returns the number of dead rows purged."""
3234
+ now = time.time() if now is None else now
3235
+ result = await self._pool.execute(
3236
+ "UPDATE queue SET payload='', last_error=NULL"
3237
+ " WHERE stage=$1 AND status=$2 AND payload <> '' AND updated_at < $3",
3238
+ Stage.OUTBOUND.value,
3239
+ OutboxStatus.DEAD.value,
3240
+ older_than,
3241
+ )
3242
+ return _rowcount(result)
3243
+
3244
+ async def purge_state(self, *, older_than: float, now: float | None = None) -> int:
3245
+ """Delete transform-state entries last written before ``older_than`` (ADR 0005 retention) and
3246
+ evict them from the read cache after commit. Ported, not stubbed. Returns the number purged.
3247
+
3248
+ Track B Step 6b: when clustered, bump the version of each DISTINCT namespace a row was purged from
3249
+ (atomically with the delete) so a follower's converge_state_cache() re-reads it and drops the same
3250
+ keys (the version-scan reload re-seeds the surviving rows, leaving the purged keys gone). Gated, so
3251
+ single-node writes no state_version rows and stays byte-identical."""
3252
+ now = time.time() if now is None else now
3253
+ async with self._pool.acquire() as conn:
3254
+ async with conn.transaction():
3255
+ rows = await conn.fetch(
3256
+ "SELECT namespace, key FROM state WHERE set_at < $1", older_than
3257
+ )
3258
+ purged_keys = [(r["namespace"], r["key"]) for r in rows]
3259
+ if not purged_keys:
3260
+ return 0
3261
+ await conn.execute("DELETE FROM state WHERE set_at < $1", older_than)
3262
+ bumped: list[tuple[str, int]] = []
3263
+ if self._cluster_state_convergence:
3264
+ for ns in dict.fromkeys(n for n, _ in purged_keys): # distinct, order-stable
3265
+ vrow = await conn.fetchrow(
3266
+ "INSERT INTO state_version (namespace, version, updated_at) "
3267
+ "VALUES ($1, 1, $2) "
3268
+ "ON CONFLICT (namespace) DO UPDATE SET "
3269
+ "version = state_version.version + 1, updated_at = excluded.updated_at "
3270
+ "RETURNING version",
3271
+ ns,
3272
+ now,
3273
+ )
3274
+ assert vrow is not None, "state_version upsert returned no row"
3275
+ bumped.append((ns, int(vrow["version"])))
3276
+ # Commit succeeded → evict the purged keys from the read-through cache.
3277
+ for ck in purged_keys:
3278
+ self._state_cache.pop(ck, None)
3279
+ # Record this node's new per-namespace versions so its own converge skips re-reading them.
3280
+ for ns, ver in bumped:
3281
+ self._state_versions[ns] = ver
3282
+ return len(purged_keys)
3283
+
3284
+ async def wal_checkpoint(self) -> None:
3285
+ """No-op on Postgres — there is no SQLite WAL to checkpoint (Postgres autovacuum/checkpointer
3286
+ manage this). Present for ``Store`` protocol completeness."""
3287
+
3288
+ async def vacuum(self) -> None:
3289
+ """No-op on Postgres — autovacuum reclaims space; manual VACUUM is a DBA operation here, not
3290
+ an engine concern. Present for ``Store`` protocol completeness."""
3291
+
3292
+ # --- store health / metrics ----------------------------------------------
3293
+
3294
+ async def db_status(self) -> DbStatus:
3295
+ size = await self._fetchone("SELECT pg_database_size(current_database()) AS b")
3296
+ return DbStatus(
3297
+ path=self.path,
3298
+ size_bytes=int(size["b"]) if size and size["b"] is not None else 0,
3299
+ disk_free_bytes=0, # not readily available for a remote Postgres server
3300
+ journal_mode="postgres",
3301
+ messages=await self._count("messages"),
3302
+ events=await self._count("message_events"),
3303
+ audit=await self._count("audit_log"),
3304
+ )
3305
+
3306
+ async def integrity_check(self) -> tuple[bool, str]:
3307
+ # A connectivity probe; deep checks (amcheck / pg_amcheck) are an out-of-band DBA task.
3308
+ await self._fetchone("SELECT 1 AS ok")
3309
+ return True, "ok (postgres; deep checks are a DBA task)"
3310
+
3311
+ async def connection_metrics(
3312
+ self, *, since: float, now: float | None = None, rate_window: float = 60.0
3313
+ ) -> ConnectionMetrics:
3314
+ """Aggregate per-channel inbound and per-destination outbound metrics for the connections
3315
+ dashboard (mirrors the SQLite store; outbound rows only)."""
3316
+ now = time.time() if now is None else now
3317
+ rate_since = now - rate_window
3318
+
3319
+ count_rows = await self._fetchall(
3320
+ "SELECT channel_id, COUNT(*) AS read,"
3321
+ " SUM(CASE WHEN status=$1 THEN 1 ELSE 0 END) AS errored"
3322
+ " FROM messages WHERE received_at>=$2 GROUP BY channel_id",
3323
+ MessageStatus.ERROR.value,
3324
+ since,
3325
+ )
3326
+ counts = {r["channel_id"]: (r["read"], r["errored"]) for r in count_rows}
3327
+ last_rows = await self._fetchall(
3328
+ "SELECT channel_id, MAX(received_at) AS last_at FROM messages GROUP BY channel_id"
3329
+ )
3330
+ inbound: dict[str, InboundMetrics] = {}
3331
+ for r in last_rows:
3332
+ read, errored = counts.pop(r["channel_id"], (0, 0))
3333
+ inbound[r["channel_id"]] = InboundMetrics(
3334
+ read=int(read), errored=int(errored or 0), last_at=r["last_at"]
3335
+ )
3336
+ for cid, (read, errored) in counts.items():
3337
+ inbound[cid] = InboundMetrics(read=int(read), errored=int(errored or 0), last_at=None)
3338
+
3339
+ dest_rows = await self._fetchall(
3340
+ "SELECT channel_id, destination_name,"
3341
+ " SUM(CASE WHEN status IN ($1,$2) THEN 1 ELSE 0 END) AS queue_depth,"
3342
+ " SUM(CASE WHEN status=$3 AND updated_at>=$4 THEN 1 ELSE 0 END) AS written,"
3343
+ " SUM(CASE WHEN status=$5 AND updated_at>=$6 THEN 1 ELSE 0 END) AS dead,"
3344
+ " MIN(CASE WHEN status=$7 THEN created_at END) AS oldest_pending_at,"
3345
+ " SUM(CASE WHEN status=$8 AND updated_at>=$9 THEN 1 ELSE 0 END) AS recent_done,"
3346
+ " MAX(CASE WHEN status=$10 THEN updated_at END) AS last_done_at"
3347
+ " FROM queue WHERE stage=$11 GROUP BY channel_id, destination_name",
3348
+ OutboxStatus.PENDING.value,
3349
+ OutboxStatus.INFLIGHT.value,
3350
+ OutboxStatus.DONE.value,
3351
+ since,
3352
+ OutboxStatus.DEAD.value,
3353
+ since,
3354
+ OutboxStatus.PENDING.value,
3355
+ OutboxStatus.DONE.value,
3356
+ rate_since,
3357
+ OutboxStatus.DONE.value,
3358
+ Stage.OUTBOUND.value,
3359
+ )
3360
+ destinations: dict[tuple[str, str], DestinationMetrics] = {}
3361
+ for r in dest_rows:
3362
+ destinations[(r["channel_id"], r["destination_name"])] = DestinationMetrics(
3363
+ queue_depth=int(r["queue_depth"] or 0),
3364
+ written=int(r["written"] or 0),
3365
+ dead=int(r["dead"] or 0),
3366
+ oldest_pending_at=r["oldest_pending_at"],
3367
+ recent_done=int(r["recent_done"] or 0),
3368
+ last_done_at=r["last_done_at"],
3369
+ )
3370
+ return ConnectionMetrics(inbound=inbound, destinations=destinations)
3371
+
3372
+ # --- internals -----------------------------------------------------------
3373
+
3374
+ async def _maybe_finalize_message(self, conn: Any, message_id: str, now: float) -> None:
3375
+ """Drive a message to its terminal disposition from its queue rows across **all** stages — the
3376
+ single source of truth for the staged-pipeline count-and-log flow (ADR 0001 Step B; the FULL
3377
+ finalizer with the ROUTED→FILTERED collapse, ported from MessageStore, not the simpler
3378
+ outbound-only one).
3379
+
3380
+ H-8: takes the **per-message** finalize advisory lock (before the ``messages`` UPDATE *within
3381
+ finalize*) so per-message finalize is serialized — the lock auto-releases at the enclosing
3382
+ transaction's commit — and recomputes on a fresh snapshot, so no double-finalize. Different
3383
+ message_ids never contend. The lock is re-entrant, so a caller that pre-locks a batch in
3384
+ canonical order (:meth:`_lock_finalize_batch` in cancel_queued / the dead-letter sweeps, to
3385
+ avoid a multi-message lock-ordering deadlock) re-takes it here as a no-op.
3386
+
3387
+ The message is **not** finalized while ANY row at ANY stage is still pending/inflight. Once
3388
+ nothing is in flight, in strict precedence: any **dead** row anywhere → ``ERROR``; else any
3389
+ **outbound** row exists → ``PROCESSED``; else **no rows remain** and the message is still
3390
+ ``ROUTED`` → ``FILTERED`` (every selected handler ran and produced zero deliveries); else leave
3391
+ the disposition the handoff set."""
3392
+ await self._advisory_lock(
3393
+ conn, _LOCK_CLASS_FINALIZE, f"{_FINALIZE_LOCK_PREFIX}{message_id}"
3394
+ )
3395
+ rows = await conn.fetch(
3396
+ "SELECT stage, status, COUNT(*) AS n FROM queue WHERE message_id=$1 GROUP BY stage, status",
3397
+ message_id,
3398
+ )
3399
+ if any(
3400
+ r["status"] in (OutboxStatus.PENDING.value, OutboxStatus.INFLIGHT.value) for r in rows
3401
+ ):
3402
+ return # in flight at any stage → still moving; do not finalize
3403
+ if any(r["status"] == OutboxStatus.DEAD.value for r in rows):
3404
+ status = MessageStatus.ERROR.value
3405
+ elif any(r["stage"] == Stage.OUTBOUND.value for r in rows):
3406
+ status = MessageStatus.PROCESSED.value # all delivered (or operator-cancelled)
3407
+ elif not rows:
3408
+ # No queue rows remain. ROUTED here means every handler's transform produced zero
3409
+ # deliveries → collapse to FILTERED. UNROUTED / already-FILTERED keep their status.
3410
+ msg = await conn.fetchrow("SELECT status FROM messages WHERE id=$1", message_id)
3411
+ if msg is None or msg["status"] != MessageStatus.ROUTED.value:
3412
+ return
3413
+ status = MessageStatus.FILTERED.value
3414
+ else:
3415
+ return # only terminal non-dead non-outbound rows (shouldn't occur) — leave as-is
3416
+ await conn.execute("UPDATE messages SET status=$1 WHERE id=$2", status, message_id)
3417
+
3418
+
3419
+ def _rowcount(command_tag: str) -> int:
3420
+ """Parse the affected-row count out of an asyncpg command tag (e.g. ``"UPDATE 3"`` → ``3``,
3421
+ ``"DELETE 0"`` → ``0``). asyncpg returns the tag string from ``Connection.execute``; the count is
3422
+ its last whitespace-separated token. Returns 0 when no trailing integer is present."""
3423
+ if not command_tag:
3424
+ return 0
3425
+ token = command_tag.rsplit(" ", 1)[-1]
3426
+ try:
3427
+ return int(token)
3428
+ except ValueError:
3429
+ return 0
3430
+
3431
+
3432
+ def _append_channel_scope_pg(
3433
+ clauses: list[str],
3434
+ params: list[Any],
3435
+ column: str,
3436
+ allowed_channels: Sequence[str] | None,
3437
+ ) -> None:
3438
+ """Restrict ``column`` to a per-channel RBAC scope using a Postgres ``= ANY($n::text[])`` array
3439
+ bind (the dialect-correct parallel of ``store._append_channel_scope``'s ``IN (?, …)``). ``None`` =
3440
+ no restriction; an empty set = match nothing. ``column`` is a code-controlled literal."""
3441
+ if allowed_channels is None:
3442
+ return
3443
+ if allowed_channels:
3444
+ params.append(list(allowed_channels))
3445
+ clauses.append(f"{column} = ANY(${len(params)}::text[])")
3446
+ else:
3447
+ clauses.append("1=0") # scoped to no channels