messagefoundry 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- messagefoundry/__init__.py +108 -0
- messagefoundry/__main__.py +1155 -0
- messagefoundry/api/__init__.py +27 -0
- messagefoundry/api/app.py +1581 -0
- messagefoundry/api/approvals.py +184 -0
- messagefoundry/api/auth_models.py +211 -0
- messagefoundry/api/auth_routes.py +655 -0
- messagefoundry/api/field_authz.py +96 -0
- messagefoundry/api/models.py +374 -0
- messagefoundry/api/security.py +247 -0
- messagefoundry/api/tls.py +47 -0
- messagefoundry/auth/__init__.py +39 -0
- messagefoundry/auth/data/common_passwords.NOTICE +13 -0
- messagefoundry/auth/data/common_passwords.txt +10000 -0
- messagefoundry/auth/identity.py +71 -0
- messagefoundry/auth/ldap.py +264 -0
- messagefoundry/auth/notifications.py +68 -0
- messagefoundry/auth/passwords.py +53 -0
- messagefoundry/auth/permissions.py +120 -0
- messagefoundry/auth/policy.py +153 -0
- messagefoundry/auth/ratelimit.py +55 -0
- messagefoundry/auth/service.py +1323 -0
- messagefoundry/auth/tokens.py +26 -0
- messagefoundry/auth/totp.py +174 -0
- messagefoundry/checks.py +174 -0
- messagefoundry/config/__init__.py +30 -0
- messagefoundry/config/active_environment.py +80 -0
- messagefoundry/config/ai_policy.py +140 -0
- messagefoundry/config/code_sets.py +260 -0
- messagefoundry/config/connections_edit.py +200 -0
- messagefoundry/config/connections_file.py +287 -0
- messagefoundry/config/db_lookup.py +117 -0
- messagefoundry/config/environments.py +116 -0
- messagefoundry/config/ingest_time.py +83 -0
- messagefoundry/config/models.py +240 -0
- messagefoundry/config/reference.py +158 -0
- messagefoundry/config/response.py +83 -0
- messagefoundry/config/run_context.py +153 -0
- messagefoundry/config/settings.py +1311 -0
- messagefoundry/config/state.py +99 -0
- messagefoundry/config/tls_policy.py +110 -0
- messagefoundry/config/wiring.py +1918 -0
- messagefoundry/console/__init__.py +20 -0
- messagefoundry/console/__main__.py +274 -0
- messagefoundry/console/_async.py +107 -0
- messagefoundry/console/change_password.py +111 -0
- messagefoundry/console/client.py +552 -0
- messagefoundry/console/connections.py +324 -0
- messagefoundry/console/login.py +107 -0
- messagefoundry/console/mfa.py +205 -0
- messagefoundry/console/reauth.py +94 -0
- messagefoundry/console/search.py +57 -0
- messagefoundry/console/service_control.py +137 -0
- messagefoundry/console/sessions.py +122 -0
- messagefoundry/console/shell.py +410 -0
- messagefoundry/console/status.py +377 -0
- messagefoundry/console/users_page.py +282 -0
- messagefoundry/console/widgets.py +553 -0
- messagefoundry/generators/README.md +27 -0
- messagefoundry/generators/__init__.py +15 -0
- messagefoundry/generators/_core.py +589 -0
- messagefoundry/generators/_hl7data.py +428 -0
- messagefoundry/generators/adt.py +286 -0
- messagefoundry/generators/all_types.py +24 -0
- messagefoundry/generators/bar.py +28 -0
- messagefoundry/generators/dft.py +20 -0
- messagefoundry/generators/mdm.py +39 -0
- messagefoundry/generators/mfn.py +46 -0
- messagefoundry/generators/oml.py +32 -0
- messagefoundry/generators/orl.py +30 -0
- messagefoundry/generators/orm.py +23 -0
- messagefoundry/generators/oru.py +21 -0
- messagefoundry/generators/ras.py +20 -0
- messagefoundry/generators/rde.py +54 -0
- messagefoundry/generators/siu.py +64 -0
- messagefoundry/generators/vxu.py +20 -0
- messagefoundry/hl7schema.py +75 -0
- messagefoundry/last_resort.py +55 -0
- messagefoundry/logging_setup.py +332 -0
- messagefoundry/parsing/__init__.py +64 -0
- messagefoundry/parsing/consistency.py +166 -0
- messagefoundry/parsing/groups.py +228 -0
- messagefoundry/parsing/message.py +453 -0
- messagefoundry/parsing/peek.py +237 -0
- messagefoundry/parsing/split.py +120 -0
- messagefoundry/parsing/summary.py +46 -0
- messagefoundry/parsing/tree.py +128 -0
- messagefoundry/parsing/validate.py +95 -0
- messagefoundry/parsing/x12/__init__.py +46 -0
- messagefoundry/parsing/x12/delimiters.py +140 -0
- messagefoundry/parsing/x12/errors.py +30 -0
- messagefoundry/parsing/x12/interchange.py +232 -0
- messagefoundry/parsing/x12/message.py +200 -0
- messagefoundry/parsing/x12/peek.py +207 -0
- messagefoundry/pipeline/__init__.py +21 -0
- messagefoundry/pipeline/alert_sinks.py +486 -0
- messagefoundry/pipeline/alerts.py +100 -0
- messagefoundry/pipeline/cert_expiry.py +219 -0
- messagefoundry/pipeline/cluster.py +955 -0
- messagefoundry/pipeline/cluster_sqlserver.py +444 -0
- messagefoundry/pipeline/config_convergence.py +137 -0
- messagefoundry/pipeline/dryrun.py +450 -0
- messagefoundry/pipeline/engine.py +756 -0
- messagefoundry/pipeline/leader_tasks.py +158 -0
- messagefoundry/pipeline/reference_sync.py +369 -0
- messagefoundry/pipeline/retention.py +289 -0
- messagefoundry/pipeline/security_notify.py +168 -0
- messagefoundry/pipeline/state_convergence.py +143 -0
- messagefoundry/pipeline/wiring_runner.py +1722 -0
- messagefoundry/py.typed +0 -0
- messagefoundry/redaction.py +71 -0
- messagefoundry/scaffold.py +321 -0
- messagefoundry/secrets_dpapi.py +129 -0
- messagefoundry/store/__init__.py +46 -0
- messagefoundry/store/audit_tee.py +67 -0
- messagefoundry/store/base.py +758 -0
- messagefoundry/store/crypto.py +166 -0
- messagefoundry/store/keyprovider.py +192 -0
- messagefoundry/store/postgres.py +3447 -0
- messagefoundry/store/sqlserver.py +3014 -0
- messagefoundry/store/store.py +3790 -0
- messagefoundry/timezone.py +207 -0
- messagefoundry/transports/__init__.py +50 -0
- messagefoundry/transports/base.py +269 -0
- messagefoundry/transports/database.py +693 -0
- messagefoundry/transports/file.py +551 -0
- messagefoundry/transports/framing.py +164 -0
- messagefoundry/transports/loopback.py +53 -0
- messagefoundry/transports/mllp.py +644 -0
- messagefoundry/transports/remotefile.py +664 -0
- messagefoundry/transports/rest.py +281 -0
- messagefoundry/transports/signing.py +321 -0
- messagefoundry/transports/soap.py +507 -0
- messagefoundry/transports/tcp.py +307 -0
- messagefoundry/transports/timer.py +146 -0
- messagefoundry/transports/x12.py +323 -0
- messagefoundry-0.1.0.dist-info/METADATA +212 -0
- messagefoundry-0.1.0.dist-info/RECORD +142 -0
- messagefoundry-0.1.0.dist-info/WHEEL +4 -0
- messagefoundry-0.1.0.dist-info/entry_points.txt +2 -0
- messagefoundry-0.1.0.dist-info/licenses/LICENSE +662 -0
- messagefoundry-0.1.0.dist-info/licenses/NOTICE +27 -0
|
@@ -0,0 +1,693 @@
|
|
|
1
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
2
|
+
# Copyright (C) 2026 MessageFoundry Organization and contributors
|
|
3
|
+
"""DATABASE transport: a SQL destination that runs one parameterized statement per payload.
|
|
4
|
+
|
|
5
|
+
The **destination** executes the operator-declared ``statement`` (an INSERT/UPDATE or a stored-procedure
|
|
6
|
+
call) against an outbound database, binding the payload's fields to the statement's ``:name``
|
|
7
|
+
parameters. The first backend is **SQL Server over ``aioodbc``** (ADR 0003) — the ``[sqlserver]`` extra
|
|
8
|
+
(``pip install 'messagefoundry[sqlserver]'``) plus the Microsoft ODBC Driver 18, **lazily imported** so
|
|
9
|
+
SQLite-only installs never touch it. **Status: production / supported** — SQL Server only, via that
|
|
10
|
+
extra. The live aioodbc round-trip is exercised by the CI SQL Server service-container job
|
|
11
|
+
(``tests/test_database_connector_integration.py``); the connector logic is also unit-tested with a
|
|
12
|
+
faked driver. The SQL Server *store* backend is a **separate** (also production) layer — this
|
|
13
|
+
connector does not depend on it.
|
|
14
|
+
|
|
15
|
+
**Parameters.** The Handler produces a **JSON object** body; the connector binds its keys to the
|
|
16
|
+
``:name`` placeholders in ``statement`` (translated to positional ODBC ``?`` — always parameterized,
|
|
17
|
+
never string-built, so a value can't inject SQL). A ``:name`` must not appear inside a quoted string
|
|
18
|
+
literal in the statement (bind dynamic strings as parameters, which is the correct practice anyway).
|
|
19
|
+
|
|
20
|
+
**Error mapping.** A *transient* DB failure (connection drop / deadlock / timeout — SQLSTATE class
|
|
21
|
+
``08``/``40`` or ``HYTxx``) → :class:`DeliveryError` (the lane retries). A *permanent* failure
|
|
22
|
+
(constraint / data / syntax) and a payload that doesn't match the statement → :class:`NegativeAckError`
|
|
23
|
+
(``permanent=True``) → dead-letter, since a retry can't fix it.
|
|
24
|
+
|
|
25
|
+
**Idempotency.** Delivery is at-least-once, so a retry **re-executes** the statement. Use an idempotent
|
|
26
|
+
write (``MERGE``/upsert on a natural key, or a de-dup) so a retry doesn't double-apply. See
|
|
27
|
+
docs/CONNECTIONS.md.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import asyncio
|
|
33
|
+
import base64
|
|
34
|
+
import json
|
|
35
|
+
import logging
|
|
36
|
+
import re
|
|
37
|
+
from collections.abc import Callable, Mapping
|
|
38
|
+
from datetime import date, datetime
|
|
39
|
+
from decimal import Decimal
|
|
40
|
+
from typing import Any
|
|
41
|
+
|
|
42
|
+
from messagefoundry.config.db_lookup import DbLookupError
|
|
43
|
+
from messagefoundry.config.models import ConnectorType, Destination, Source
|
|
44
|
+
from messagefoundry.config.settings import INSECURE_TLS_ESCAPE_ENV, insecure_tls_allowed
|
|
45
|
+
from messagefoundry.transports.base import (
|
|
46
|
+
DeliveryError,
|
|
47
|
+
DeliveryResponse,
|
|
48
|
+
DestinationConnector,
|
|
49
|
+
InboundHandler,
|
|
50
|
+
NegativeAckError,
|
|
51
|
+
SourceConnector,
|
|
52
|
+
register_destination,
|
|
53
|
+
register_source,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
__all__ = ["DatabaseDestination", "DatabaseLookupExecutor", "DatabaseSource"]
|
|
57
|
+
|
|
58
|
+
logger = logging.getLogger(__name__)
|
|
59
|
+
|
|
60
|
+
# A `:name` parameter, but not `::` (a PostgreSQL-style cast) and not a `:` preceded by a word char
|
|
61
|
+
# (so a time literal like '12:30' inside the SQL is left alone). String-literal colons are otherwise
|
|
62
|
+
# the operator's responsibility — bind dynamic strings as parameters, not inline literals.
|
|
63
|
+
_PARAM_RE = re.compile(r"(?<![:\w]):(\w+)")
|
|
64
|
+
|
|
65
|
+
# SQLSTATE classes that are worth retrying: 08 = connection exception, 40 = transaction rollback /
|
|
66
|
+
# deadlock (40001); plus the ODBC connect/operation timeouts.
|
|
67
|
+
_TRANSIENT_PREFIXES = ("08", "40")
|
|
68
|
+
_TRANSIENT_STATES = frozenset({"HYT00", "HYT01"})
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _odbc_brace(value: str) -> str:
|
|
72
|
+
"""ODBC-quote a value in braces, doubling any internal ``}`` — neutralizes ``; { } =`` inside it so
|
|
73
|
+
an attacker-influenced value (e.g. a password) can't inject extra connection keywords (mirrors the
|
|
74
|
+
store's ``connection_string`` hardening)."""
|
|
75
|
+
return "{" + value.replace("}", "}}") + "}"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _build_dsn(s: dict[str, Any]) -> str:
|
|
79
|
+
"""Build the ODBC connection string for SQL Server from the connection settings.
|
|
80
|
+
|
|
81
|
+
Free-text values are brace-quoted (injection guard) and the ``Encrypt``/``TrustServerCertificate``
|
|
82
|
+
flags are emitted **last** (ODBC is last-wins, so nothing earlier can downgrade TLS). A weakened
|
|
83
|
+
TLS posture is **refused** unless the explicit dev escape is set, exactly like the store backend."""
|
|
84
|
+
encrypt = bool(s.get("encrypt", True))
|
|
85
|
+
trust = bool(s.get("trust_server_certificate", False))
|
|
86
|
+
if (trust or not encrypt) and not insecure_tls_allowed():
|
|
87
|
+
raise ValueError(
|
|
88
|
+
"DATABASE destination TLS is weakened (trust_server_certificate=true or encrypt=false), "
|
|
89
|
+
f"which is MITM-able. Use a trusted server certificate, or set {INSECURE_TLS_ESCAPE_ENV}=1 "
|
|
90
|
+
"to explicitly allow it for a trusted-network dev/test bind."
|
|
91
|
+
)
|
|
92
|
+
auth = str(s.get("auth", "sql")).lower()
|
|
93
|
+
if auth not in ("sql", "integrated", "entra"):
|
|
94
|
+
raise ValueError(f"DATABASE destination auth must be sql|integrated|entra, got {auth!r}")
|
|
95
|
+
# SERVER must be emitted UNBRACED so the driver parses the ",port" suffix and resolves the host for
|
|
96
|
+
# the TLS handshake — a brace-quoted "SERVER={host},port" is malformed ODBC (content after the
|
|
97
|
+
# closing brace) and breaks certificate handling against a real SQL Server. So the host is
|
|
98
|
+
# *validated* for connection-string metacharacters instead of brace-quoted (the guard used for every
|
|
99
|
+
# other free-text value), exactly like the store backend's connection_string.
|
|
100
|
+
server = str(s["server"])
|
|
101
|
+
if any(ch in server for ch in ";{}=\r\n"):
|
|
102
|
+
raise ValueError(
|
|
103
|
+
"DATABASE server must not contain ';', '{', '}', '=', or newlines (ODBC injection risk)"
|
|
104
|
+
)
|
|
105
|
+
parts = [
|
|
106
|
+
f"DRIVER={_odbc_brace(str(s.get('odbc_driver', 'ODBC Driver 18 for SQL Server')))}",
|
|
107
|
+
f"SERVER={server},{int(s.get('port', 1433))}",
|
|
108
|
+
f"DATABASE={_odbc_brace(str(s['database']))}",
|
|
109
|
+
f"Connection Timeout={int(s.get('connect_timeout', 15))}",
|
|
110
|
+
f"APP={_odbc_brace(str(s.get('app_name', 'messagefoundry')))}",
|
|
111
|
+
]
|
|
112
|
+
if auth == "sql":
|
|
113
|
+
parts.append(f"UID={_odbc_brace(str(s.get('username') or ''))}")
|
|
114
|
+
parts.append(f"PWD={_odbc_brace(str(s.get('password') or ''))}")
|
|
115
|
+
elif auth == "integrated":
|
|
116
|
+
parts.append("Trusted_Connection=yes")
|
|
117
|
+
else: # entra
|
|
118
|
+
parts.append("Authentication=ActiveDirectoryDefault")
|
|
119
|
+
parts.append(f"Encrypt={'yes' if encrypt else 'no'}")
|
|
120
|
+
parts.append(f"TrustServerCertificate={'yes' if trust else 'no'}")
|
|
121
|
+
return ";".join(parts) + ";"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _parse_named_params(statement: str) -> tuple[str, list[str]]:
|
|
125
|
+
"""Translate ``:name`` placeholders to positional ``?`` and return ``(sql, ordered_names)``."""
|
|
126
|
+
names: list[str] = []
|
|
127
|
+
|
|
128
|
+
def repl(m: re.Match[str]) -> str:
|
|
129
|
+
names.append(m.group(1))
|
|
130
|
+
return "?"
|
|
131
|
+
|
|
132
|
+
return _PARAM_RE.sub(repl, statement), names
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _bind_params(payload: str, names: list[str]) -> tuple[Any, ...]:
|
|
136
|
+
"""Bind a JSON-object payload to the statement's ``names`` (positional order).
|
|
137
|
+
|
|
138
|
+
A payload that isn't a JSON object, or that's missing a parameter, is a **permanent** data error
|
|
139
|
+
(a retry can't fix it) → :class:`NegativeAckError`."""
|
|
140
|
+
try:
|
|
141
|
+
data = json.loads(payload)
|
|
142
|
+
except json.JSONDecodeError as exc:
|
|
143
|
+
raise NegativeAckError(
|
|
144
|
+
f"DATABASE payload is not valid JSON: {exc}", code="payload", permanent=True
|
|
145
|
+
) from exc
|
|
146
|
+
if not isinstance(data, dict):
|
|
147
|
+
raise NegativeAckError(
|
|
148
|
+
"DATABASE payload must be a JSON object mapping parameter names to values",
|
|
149
|
+
code="payload",
|
|
150
|
+
permanent=True,
|
|
151
|
+
)
|
|
152
|
+
try:
|
|
153
|
+
return tuple(data[n] for n in names)
|
|
154
|
+
except KeyError as exc:
|
|
155
|
+
raise NegativeAckError(
|
|
156
|
+
f"DATABASE payload is missing parameter {exc}", code="payload", permanent=True
|
|
157
|
+
) from exc
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _is_transient(sqlstate: str) -> bool:
|
|
161
|
+
return sqlstate[:2] in _TRANSIENT_PREFIXES or sqlstate in _TRANSIENT_STATES
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _classify_db_error(sqlstate: str, message: str) -> DeliveryError:
|
|
165
|
+
"""Map a DB error's SQLSTATE to a transient :class:`DeliveryError` (retry) or a permanent
|
|
166
|
+
:class:`NegativeAckError` (dead-letter)."""
|
|
167
|
+
if _is_transient(sqlstate):
|
|
168
|
+
return DeliveryError(f"database transient error [{sqlstate}]: {message}")
|
|
169
|
+
return NegativeAckError(
|
|
170
|
+
f"database rejected the statement [{sqlstate}]: {message}",
|
|
171
|
+
code=sqlstate or "db",
|
|
172
|
+
permanent=True,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _sqlstate(exc: BaseException) -> str | None:
|
|
177
|
+
"""The 5-character SQLSTATE a DB driver error carries in ``args[0]`` (pyodbc/aioodbc), or ``None``
|
|
178
|
+
if ``exc`` isn't shaped like a DB error — letting a genuine bug propagate as an internal error
|
|
179
|
+
rather than being misread as a transport failure."""
|
|
180
|
+
args = getattr(exc, "args", ())
|
|
181
|
+
if args and isinstance(args[0], str) and len(args[0]) == 5 and args[0].isalnum():
|
|
182
|
+
return args[0]
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _import_aioodbc() -> Any:
|
|
187
|
+
"""Import the optional ``aioodbc`` driver, raising a clear install hint if the ``[sqlserver]`` extra
|
|
188
|
+
isn't present — so a SQLite-only install never touches it until a DATABASE connector is actually used."""
|
|
189
|
+
try:
|
|
190
|
+
import aioodbc
|
|
191
|
+
except ImportError as exc: # pragma: no cover - exercised only without the extra
|
|
192
|
+
raise RuntimeError(
|
|
193
|
+
"DATABASE connector requires the 'sqlserver' extra: "
|
|
194
|
+
"pip install 'messagefoundry[sqlserver]' (plus the Microsoft ODBC Driver 18)"
|
|
195
|
+
) from exc
|
|
196
|
+
return aioodbc
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
async def _make_pool(dsn: str, pool_max: int, *, autocommit: bool) -> Any:
|
|
200
|
+
"""Create an aioodbc connection pool for ``dsn`` (lazy driver import). The destination wraps
|
|
201
|
+
execute+commit itself (``autocommit=False``); the source marks each row in its own auto-committed
|
|
202
|
+
statement (``autocommit=True``)."""
|
|
203
|
+
aioodbc = _import_aioodbc()
|
|
204
|
+
return await aioodbc.create_pool(
|
|
205
|
+
dsn=dsn, minsize=1, maxsize=max(1, pool_max), autocommit=autocommit
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# WP-L3-07 (ASVS 13.1.2/13.2.6): bound a pooled-connection borrow. One delivery/poll worker per
|
|
210
|
+
# connection means pool_max is never legitimately exhausted, so an acquire that can't be satisfied
|
|
211
|
+
# within the timeout means the pool is wedged or the DB is unresponsive — fail it transiently rather
|
|
212
|
+
# than block the worker forever (which would let the queue back up unbounded). Override per connection
|
|
213
|
+
# with the ``acquire_timeout`` setting.
|
|
214
|
+
_DEFAULT_DB_ACQUIRE_TIMEOUT = 30.0
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
async def _acquire(pool: Any, timeout: float) -> Any:
|
|
218
|
+
"""Acquire a connection from ``pool`` within ``timeout`` seconds, or raise a transient
|
|
219
|
+
:class:`DeliveryError` with a clear, PHI-free message. Wraps the driver's own ``acquire`` so a
|
|
220
|
+
hung/exhausted pool surfaces as a retryable failure instead of an unbounded await."""
|
|
221
|
+
try:
|
|
222
|
+
return await asyncio.wait_for(pool.acquire(), timeout)
|
|
223
|
+
except TimeoutError as exc:
|
|
224
|
+
raise DeliveryError(
|
|
225
|
+
f"DATABASE pool acquire timed out after {timeout:g}s (pool exhausted or DB unresponsive)"
|
|
226
|
+
) from exc
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
async def _probe_db(
|
|
230
|
+
get_pool: Callable[[], Any], *, timeout: float = _DEFAULT_DB_ACQUIRE_TIMEOUT
|
|
231
|
+
) -> None:
|
|
232
|
+
"""Open the pool and run ``SELECT 1`` — a no-data, no-write reachability probe shared by the
|
|
233
|
+
DATABASE source and destination's ``test_connection``. A driver error is mapped via
|
|
234
|
+
:func:`_classify_db_error` (transient vs permanent); a non-driver failure (e.g. an unreachable host
|
|
235
|
+
before any SQLSTATE) becomes a transient :class:`DeliveryError`. Triggers the connector's lazy pool;
|
|
236
|
+
the caller closes it with ``aclose()``."""
|
|
237
|
+
try:
|
|
238
|
+
pool = await get_pool()
|
|
239
|
+
conn = await _acquire(pool, timeout)
|
|
240
|
+
except Exception as exc:
|
|
241
|
+
state = _sqlstate(exc)
|
|
242
|
+
raise (
|
|
243
|
+
_classify_db_error(state, str(exc))
|
|
244
|
+
if state
|
|
245
|
+
else DeliveryError(f"DATABASE connect failed: {exc}")
|
|
246
|
+
) from exc
|
|
247
|
+
try:
|
|
248
|
+
cur = await conn.cursor()
|
|
249
|
+
await cur.execute("SELECT 1")
|
|
250
|
+
except Exception as exc:
|
|
251
|
+
state = _sqlstate(exc)
|
|
252
|
+
raise (
|
|
253
|
+
_classify_db_error(state, str(exc))
|
|
254
|
+
if state
|
|
255
|
+
else DeliveryError(f"DATABASE probe failed: {exc}")
|
|
256
|
+
) from exc
|
|
257
|
+
finally:
|
|
258
|
+
await pool.release(conn)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _json_default(value: Any) -> Any:
|
|
262
|
+
"""JSON-serialize DB column types ``json.dumps`` can't handle natively (dates, ``Decimal``, bytes),
|
|
263
|
+
so a polled row becomes a JSON-object body. An unknown type raises ``TypeError`` (surfaced as a
|
|
264
|
+
poll error and logged) rather than silently dropping data."""
|
|
265
|
+
if isinstance(value, (datetime, date)):
|
|
266
|
+
return value.isoformat()
|
|
267
|
+
if isinstance(value, Decimal):
|
|
268
|
+
return str(value)
|
|
269
|
+
if isinstance(value, (bytes, bytearray)):
|
|
270
|
+
return base64.b64encode(bytes(value)).decode("ascii")
|
|
271
|
+
raise TypeError(
|
|
272
|
+
f"DATABASE source cannot serialize a {type(value).__name__} column value to JSON"
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
class DatabaseDestination(DestinationConnector):
|
|
277
|
+
"""Execute one parameterized statement per payload against a SQL database (SQL Server today)."""
|
|
278
|
+
|
|
279
|
+
def __init__(self, config: Destination) -> None:
|
|
280
|
+
s = config.settings
|
|
281
|
+
for req in ("server", "database", "statement"):
|
|
282
|
+
if not s.get(req):
|
|
283
|
+
raise ValueError(f"DATABASE destination requires a {req!r} setting")
|
|
284
|
+
self._dsn = _build_dsn(s) # fail fast on a weakened-TLS / bad-auth config
|
|
285
|
+
self._sql, self._param_names = _parse_named_params(str(s["statement"]))
|
|
286
|
+
self._pool_max = int(s.get("pool_max", 5))
|
|
287
|
+
self._acquire_timeout = float(s.get("acquire_timeout", _DEFAULT_DB_ACQUIRE_TIMEOUT))
|
|
288
|
+
self._pool: Any = None
|
|
289
|
+
self._pool_lock = asyncio.Lock()
|
|
290
|
+
# ADR 0013: capture the statement's result-set (its RETURNING/OUTPUT rows). Default False →
|
|
291
|
+
# returns None, byte-identical. Capture MUST be a RETURNING/OUTPUT clause of the write itself
|
|
292
|
+
# (fetched from the SAME cursor BEFORE commit) — a separate post-commit SELECT would re-run on a
|
|
293
|
+
# crash-replay against changed state. Wiring rejects a capturing statement with no RETURNING/
|
|
294
|
+
# OUTPUT. The result-set is JSON-serialized and bounded by row/byte caps (over-cap →
|
|
295
|
+
# outcome='unparseable' with an empty body, never an unbounded blob).
|
|
296
|
+
self.capture_response: bool = bool(s.get("capture_response", False))
|
|
297
|
+
self._capture_max_rows = int(s.get("capture_max_rows", 100))
|
|
298
|
+
self._capture_max_bytes = int(s.get("capture_max_bytes", 256 * 1024))
|
|
299
|
+
|
|
300
|
+
async def _get_pool(self) -> Any:
|
|
301
|
+
if self._pool is not None:
|
|
302
|
+
return self._pool
|
|
303
|
+
async with self._pool_lock:
|
|
304
|
+
if self._pool is None:
|
|
305
|
+
self._pool = await _make_pool(self._dsn, self._pool_max, autocommit=False)
|
|
306
|
+
return self._pool
|
|
307
|
+
|
|
308
|
+
async def send(self, payload: str) -> DeliveryResponse | None:
|
|
309
|
+
params = _bind_params(payload, self._param_names) # NegativeAckError(permanent) on bad data
|
|
310
|
+
pool = await self._get_pool()
|
|
311
|
+
conn = await _acquire(pool, self._acquire_timeout)
|
|
312
|
+
try:
|
|
313
|
+
cur = await conn.cursor()
|
|
314
|
+
try:
|
|
315
|
+
await cur.execute(self._sql, params)
|
|
316
|
+
# Capture the RETURNING/OUTPUT rows from the SAME cursor BEFORE commit (re-run-stable:
|
|
317
|
+
# a separate post-commit SELECT could read changed state on a crash-replay). _capture
|
|
318
|
+
# never raises — a capture problem must not roll back an otherwise-successful write.
|
|
319
|
+
captured = await self._capture(cur) if self.capture_response else None
|
|
320
|
+
await conn.commit()
|
|
321
|
+
except Exception as exc:
|
|
322
|
+
await conn.rollback()
|
|
323
|
+
state = _sqlstate(exc)
|
|
324
|
+
if state is None:
|
|
325
|
+
raise # not a DB driver error → an internal/code error, let the runner handle it
|
|
326
|
+
raise _classify_db_error(state, str(exc)) from exc
|
|
327
|
+
finally:
|
|
328
|
+
await pool.release(conn)
|
|
329
|
+
return captured
|
|
330
|
+
|
|
331
|
+
async def _capture(self, cur: Any) -> DeliveryResponse:
|
|
332
|
+
"""Serialize the statement's RETURNING/OUTPUT result-set to a bounded JSON body (ADR 0013).
|
|
333
|
+
|
|
334
|
+
Never raises (capture must not un-succeed a committed write): a missing result set / over-cap
|
|
335
|
+
becomes ``no_reply`` / ``unparseable`` with an empty body. Generated ids in a RETURNING are
|
|
336
|
+
only as stable as the write's idempotency — a non-idempotent INSERT re-derives a new id on a
|
|
337
|
+
crash-re-send (the standing 'outbounds must be idempotent' requirement; see the connector docs)."""
|
|
338
|
+
try:
|
|
339
|
+
rows = await cur.fetchall()
|
|
340
|
+
except Exception: # noqa: BLE001 - statement produced no result set; capture nothing, keep the write
|
|
341
|
+
return DeliveryResponse(body="", outcome="no_reply", detail="no result set")
|
|
342
|
+
if not rows:
|
|
343
|
+
return DeliveryResponse(body="", outcome="no_reply", detail="0 rows")
|
|
344
|
+
if len(rows) > self._capture_max_rows:
|
|
345
|
+
return DeliveryResponse(
|
|
346
|
+
body="",
|
|
347
|
+
outcome="unparseable",
|
|
348
|
+
detail=f"result-set exceeded capture_max_rows={self._capture_max_rows}",
|
|
349
|
+
)
|
|
350
|
+
try:
|
|
351
|
+
cols = [d[0] for d in cur.description] if cur.description else []
|
|
352
|
+
data = [dict(zip(cols, tuple(row))) for row in rows]
|
|
353
|
+
body = json.dumps(data, default=_json_default)
|
|
354
|
+
except Exception as exc: # noqa: BLE001 - an unserializable column type must NOT fail the write
|
|
355
|
+
# _json_default raises TypeError on a column type it can't encode; serializing must never
|
|
356
|
+
# propagate (it runs pre-commit and would roll back an otherwise-successful write).
|
|
357
|
+
return DeliveryResponse(
|
|
358
|
+
body="",
|
|
359
|
+
outcome="unparseable",
|
|
360
|
+
detail=f"result-set not serializable ({type(exc).__name__})",
|
|
361
|
+
)
|
|
362
|
+
if len(body.encode("utf-8")) > self._capture_max_bytes:
|
|
363
|
+
return DeliveryResponse(
|
|
364
|
+
body="",
|
|
365
|
+
outcome="unparseable",
|
|
366
|
+
detail=f"result-set exceeded capture_max_bytes={self._capture_max_bytes}",
|
|
367
|
+
)
|
|
368
|
+
return DeliveryResponse(body=body, outcome="accepted", detail=f"{len(rows)} row(s)")
|
|
369
|
+
|
|
370
|
+
async def test_connection(self) -> None:
|
|
371
|
+
await _probe_db(self._get_pool, timeout=self._acquire_timeout)
|
|
372
|
+
|
|
373
|
+
async def aclose(self) -> None:
|
|
374
|
+
if self._pool is not None:
|
|
375
|
+
self._pool.close()
|
|
376
|
+
await self._pool.wait_closed()
|
|
377
|
+
self._pool = None
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
class DatabaseSource(SourceConnector):
|
|
381
|
+
"""Poll a SQL table on an interval, hand each row to the pipeline handler, then mark it processed.
|
|
382
|
+
|
|
383
|
+
The File source's *process-then-mark-done* shape (at-least-once), with a query instead of a
|
|
384
|
+
directory: a cooperatively-cancellable background loop runs the operator-declared ``poll_statement``
|
|
385
|
+
(a ``SELECT`` of the next batch), hands each row to the handler as a body, and — **only after the
|
|
386
|
+
handler returns** — runs the optional ``mark_statement`` (an ``UPDATE``/``DELETE`` bound from that
|
|
387
|
+
row's columns) so the row isn't re-read. A crash before the mark re-emits the row next poll
|
|
388
|
+
(at-least-once); the downstream pipeline must tolerate duplicates. Poll errors are logged-not-fatal
|
|
389
|
+
(a bad poll never kills the poller, mirroring the File source).
|
|
390
|
+
|
|
391
|
+
**Body shape (payload-agnostic ingress, ADR 0004).** With ``body_column`` set, the body is that one
|
|
392
|
+
column's value verbatim (e.g. a queue column holding an HL7 message → pair with ``content_type``
|
|
393
|
+
``hl7v2`` and it flows through the full HL7 path); unset, the body is the whole row as a JSON object
|
|
394
|
+
``{column: value}`` (pair with ``content_type=json`` so the Handler can ``.json()`` it).
|
|
395
|
+
|
|
396
|
+
**Under ``[cluster].enabled`` (multi-node)** this source is leader-gated (only the leader polls,
|
|
397
|
+
Track B Step 4b) — but unlike the File/RemoteFile sources, where the engine owns the atomic rename
|
|
398
|
+
that bounds the leadership-transition duplicate window, the engine can't enforce row claim/mark
|
|
399
|
+
atomicity here: it's on the operator's SQL. Write ``poll_statement``/``mark_statement`` to claim
|
|
400
|
+
rows atomically (a status flag, or ``UPDATE ... RETURNING`` that both selects and marks) so the
|
|
401
|
+
brief transition window stays at the same at-least-once duplicate class as a crash mid-poll.
|
|
402
|
+
"""
|
|
403
|
+
|
|
404
|
+
polls_shared_resource = True # a DB table is a shared external resource — leader-gate it
|
|
405
|
+
|
|
406
|
+
def __init__(self, config: Source) -> None:
|
|
407
|
+
s = config.settings
|
|
408
|
+
for req in ("server", "database", "poll_statement"):
|
|
409
|
+
if not s.get(req):
|
|
410
|
+
raise ValueError(f"DATABASE source requires a {req!r} setting")
|
|
411
|
+
self._dsn = _build_dsn(s) # fail fast on a weakened-TLS / bad-auth config
|
|
412
|
+
self._poll_sql = str(s["poll_statement"])
|
|
413
|
+
mark = s.get("mark_statement")
|
|
414
|
+
# mark_statement is optional (a read-only/idempotent feed may omit it); its :name params bind
|
|
415
|
+
# from the polled row's columns, reusing the destination's named-parameter translation.
|
|
416
|
+
self._mark_sql: str | None
|
|
417
|
+
self._mark_sql, self._mark_names = _parse_named_params(str(mark)) if mark else (None, [])
|
|
418
|
+
self._body_column: str | None = s.get("body_column") or None
|
|
419
|
+
self._poll_seconds = float(s.get("poll_seconds", 5.0))
|
|
420
|
+
self._encoding: str = s.get("encoding", "utf-8")
|
|
421
|
+
self._pool_max = int(s.get("pool_max", 5))
|
|
422
|
+
self._acquire_timeout = float(s.get("acquire_timeout", _DEFAULT_DB_ACQUIRE_TIMEOUT))
|
|
423
|
+
self._pool: Any = None
|
|
424
|
+
self._pool_lock = asyncio.Lock()
|
|
425
|
+
self._handler: InboundHandler | None = None
|
|
426
|
+
# Leader-gate (Track B Step 4b): when set, the poll table (a shared external resource) is
|
|
427
|
+
# polled/marked only while the gate returns True, so in a cluster exactly one node ingests
|
|
428
|
+
# its rows. None = always poll (single-node / direct callers / tests) — byte-identical.
|
|
429
|
+
self._leader_gate: Callable[[], bool] | None = None
|
|
430
|
+
self._skipping = False # whether the last tick was gated out (for a single transition log)
|
|
431
|
+
self._stop = asyncio.Event()
|
|
432
|
+
self._task: asyncio.Task[None] | None = None
|
|
433
|
+
|
|
434
|
+
async def start(
|
|
435
|
+
self, handler: InboundHandler, *, leader_gate: Callable[[], bool] | None = None
|
|
436
|
+
) -> None:
|
|
437
|
+
self._handler = handler
|
|
438
|
+
self._leader_gate = leader_gate
|
|
439
|
+
self._stop.clear()
|
|
440
|
+
self._task = asyncio.create_task(self._run())
|
|
441
|
+
|
|
442
|
+
async def stop(self) -> None:
|
|
443
|
+
self._stop.set()
|
|
444
|
+
if self._task is not None:
|
|
445
|
+
# return_exceptions: a faulted poll task must not re-raise here — stop() runs during reload
|
|
446
|
+
# quiesce, outside its rollback (mirrors the File source's belt-and-suspenders).
|
|
447
|
+
await asyncio.gather(self._task, return_exceptions=True)
|
|
448
|
+
self._task = None
|
|
449
|
+
await self.aclose()
|
|
450
|
+
|
|
451
|
+
async def _get_pool(self) -> Any:
|
|
452
|
+
if self._pool is not None:
|
|
453
|
+
return self._pool
|
|
454
|
+
async with self._pool_lock:
|
|
455
|
+
if self._pool is None:
|
|
456
|
+
# autocommit: each mark is its own committed statement, giving per-row mark durability.
|
|
457
|
+
self._pool = await _make_pool(self._dsn, self._pool_max, autocommit=True)
|
|
458
|
+
return self._pool
|
|
459
|
+
|
|
460
|
+
async def _run(self) -> None:
|
|
461
|
+
while not self._stop.is_set():
|
|
462
|
+
try:
|
|
463
|
+
if self._may_poll():
|
|
464
|
+
await self._poll_once()
|
|
465
|
+
except asyncio.CancelledError:
|
|
466
|
+
raise
|
|
467
|
+
except Exception:
|
|
468
|
+
# A poll error (connection drop, a bad poll_statement, an unserializable column) must
|
|
469
|
+
# NOT kill the poller — that would silently stop the connection from receiving while it
|
|
470
|
+
# still reports running. Log and retry on the next interval (mirrors the File source).
|
|
471
|
+
logger.exception("DATABASE source poll failed; retrying next interval")
|
|
472
|
+
try:
|
|
473
|
+
await asyncio.wait_for(self._stop.wait(), self._poll_seconds)
|
|
474
|
+
except asyncio.TimeoutError:
|
|
475
|
+
pass # poll interval elapsed; poll again
|
|
476
|
+
|
|
477
|
+
def _may_poll(self) -> bool:
|
|
478
|
+
"""Whether this tick may run poll_statement (and mark rows). False on a follower (leader-
|
|
479
|
+
gated, Step 4b): a non-leader must NOT execute poll_statement or mark any rows, since the
|
|
480
|
+
table is shared and two nodes polling it would duplicate intake. The loop still ticks, so a
|
|
481
|
+
node that becomes leader polls on its next tick (reactive-by-polling, no restart). When the
|
|
482
|
+
gate is None or True, behaves exactly as before. Logged once on each transition (never per
|
|
483
|
+
skipped tick — that would spam a follower's log every poll interval)."""
|
|
484
|
+
if self._leader_gate is None or self._leader_gate():
|
|
485
|
+
if self._skipping:
|
|
486
|
+
self._skipping = False
|
|
487
|
+
logger.debug("DATABASE source resuming polling (now leader)")
|
|
488
|
+
return True
|
|
489
|
+
if not self._skipping:
|
|
490
|
+
self._skipping = True
|
|
491
|
+
logger.debug("DATABASE source skipping polling (not leader; another node ingests it)")
|
|
492
|
+
return False
|
|
493
|
+
|
|
494
|
+
async def _poll_once(self) -> None:
|
|
495
|
+
assert self._handler is not None
|
|
496
|
+
columns, rows = await self._select()
|
|
497
|
+
for row in rows:
|
|
498
|
+
if self._stop.is_set():
|
|
499
|
+
break # shutting down — leave the rest unmarked for the next start (at-least-once)
|
|
500
|
+
record = dict(zip(columns, row))
|
|
501
|
+
try:
|
|
502
|
+
body = self._body(record)
|
|
503
|
+
except (ValueError, TypeError) as exc:
|
|
504
|
+
# A row we can't turn into a body (missing body_column, unserializable value) is a
|
|
505
|
+
# config/data error for that row — log and skip it rather than wedging the batch.
|
|
506
|
+
logger.error("DATABASE source: %s; skipping row", exc)
|
|
507
|
+
continue
|
|
508
|
+
try:
|
|
509
|
+
await self._handler(body.encode(self._encoding))
|
|
510
|
+
except Exception as exc:
|
|
511
|
+
# The handler records every message-level outcome itself (parse/route → ERROR) and
|
|
512
|
+
# returns, so an exception here is an infrastructure failure (the durable store write
|
|
513
|
+
# failed). Leave the row UNMARKED so the next poll re-emits it (at-least-once) — marking
|
|
514
|
+
# it now would drop a received-but-unrecorded message (mirrors the File source's M-15).
|
|
515
|
+
logger.warning(
|
|
516
|
+
"DATABASE source handler failed (row left unmarked, will retry): %s", exc
|
|
517
|
+
)
|
|
518
|
+
continue
|
|
519
|
+
try:
|
|
520
|
+
await self._mark(record)
|
|
521
|
+
except Exception as exc:
|
|
522
|
+
# The handler already ingested the message; a mark failure means the row re-emits next
|
|
523
|
+
# poll (a duplicate — at-least-once). Log and move on rather than abort the batch tail.
|
|
524
|
+
logger.warning(
|
|
525
|
+
"DATABASE source mark failed (row will re-emit, a duplicate): %s", exc
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
async def _select(self) -> tuple[list[str], list[Any]]:
|
|
529
|
+
"""Run ``poll_statement`` and return ``(column_names, rows)``. The connection is released before
|
|
530
|
+
the rows are handed to the (possibly slow) handler, so a batch never holds a pool connection
|
|
531
|
+
hostage to downstream store I/O."""
|
|
532
|
+
pool = await self._get_pool()
|
|
533
|
+
conn = await _acquire(pool, self._acquire_timeout)
|
|
534
|
+
try:
|
|
535
|
+
cur = await conn.cursor()
|
|
536
|
+
await cur.execute(self._poll_sql)
|
|
537
|
+
columns = [d[0] for d in cur.description]
|
|
538
|
+
rows = list(await cur.fetchall())
|
|
539
|
+
finally:
|
|
540
|
+
await pool.release(conn)
|
|
541
|
+
return columns, rows
|
|
542
|
+
|
|
543
|
+
def _body(self, record: dict[str, Any]) -> str:
|
|
544
|
+
"""The body for one row: a single column verbatim (``body_column``) or the whole row as JSON."""
|
|
545
|
+
if self._body_column is not None:
|
|
546
|
+
try:
|
|
547
|
+
value = record[self._body_column]
|
|
548
|
+
except KeyError:
|
|
549
|
+
raise ValueError(
|
|
550
|
+
f"body_column {self._body_column!r} is not in the poll_statement result columns"
|
|
551
|
+
) from None
|
|
552
|
+
if isinstance(value, (bytes, bytearray)):
|
|
553
|
+
return bytes(value).decode(self._encoding)
|
|
554
|
+
return value if isinstance(value, str) else str(value)
|
|
555
|
+
return json.dumps(record, default=_json_default)
|
|
556
|
+
|
|
557
|
+
async def _mark(self, record: dict[str, Any]) -> None:
|
|
558
|
+
if self._mark_sql is None:
|
|
559
|
+
return
|
|
560
|
+
try:
|
|
561
|
+
params = tuple(record[n] for n in self._mark_names)
|
|
562
|
+
except KeyError as exc:
|
|
563
|
+
# mark_statement references a column the poll_statement didn't select — a static config
|
|
564
|
+
# error. Log loudly and leave the row unmarked (it re-emits) rather than crash the poller.
|
|
565
|
+
logger.error(
|
|
566
|
+
"DATABASE source mark_statement references unknown column %s; row left unmarked",
|
|
567
|
+
exc,
|
|
568
|
+
)
|
|
569
|
+
return
|
|
570
|
+
pool = await self._get_pool()
|
|
571
|
+
conn = await _acquire(pool, self._acquire_timeout)
|
|
572
|
+
try:
|
|
573
|
+
cur = await conn.cursor()
|
|
574
|
+
await cur.execute(self._mark_sql, params)
|
|
575
|
+
finally:
|
|
576
|
+
await pool.release(conn)
|
|
577
|
+
|
|
578
|
+
async def test_connection(self) -> None:
|
|
579
|
+
await _probe_db(self._get_pool, timeout=self._acquire_timeout)
|
|
580
|
+
|
|
581
|
+
async def aclose(self) -> None:
|
|
582
|
+
if self._pool is not None:
|
|
583
|
+
self._pool.close()
|
|
584
|
+
await self._pool.wait_closed()
|
|
585
|
+
self._pool = None
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
register_destination(ConnectorType.DATABASE, DatabaseDestination)
|
|
589
|
+
register_source(ConnectorType.DATABASE, DatabaseSource)
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def _bind_lookup_params(
|
|
593
|
+
params: Mapping[str, Any], names: list[str], connection: str
|
|
594
|
+
) -> tuple[Any, ...]:
|
|
595
|
+
"""Bind a params mapping to the statement's ordered ``:name`` placeholders (positional). A missing
|
|
596
|
+
name is a permanent author error → :class:`DbLookupError` (PHI-free: names the key, never its value)."""
|
|
597
|
+
try:
|
|
598
|
+
return tuple(params[n] for n in names)
|
|
599
|
+
except KeyError as exc:
|
|
600
|
+
raise DbLookupError(f"db_lookup on {connection!r}: missing parameter {exc}") from exc
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
class DatabaseLookupExecutor:
|
|
604
|
+
"""Pooled executor for handler-callable **live** lookups (``db_lookup``, ADR 0010).
|
|
605
|
+
|
|
606
|
+
Built by the :class:`~messagefoundry.pipeline.wiring_runner.RegistryRunner` from the graph's
|
|
607
|
+
``DatabaseLookup`` specs (``env()``-resolved + ``[egress].allowed_db``-checked by the runner). Lazily
|
|
608
|
+
opens one read-only ``aioodbc`` pool per named connection; :meth:`query` runs on the engine loop,
|
|
609
|
+
while ``db_lookup`` bridges to it from the handler's worker thread via ``run_coroutine_threadsafe``.
|
|
610
|
+
Reuses the DATABASE connector's DSN build / named-parameter translation / SQLSTATE extraction. Pools
|
|
611
|
+
are autocommit — a lookup is read-only, so each query is its own implicit transaction; nothing here
|
|
612
|
+
writes. Production / supported (SQL Server via the ``[sqlserver]`` extra), like the DATABASE connector."""
|
|
613
|
+
|
|
614
|
+
def __init__(self, connections: Mapping[str, Mapping[str, Any]]) -> None:
|
|
615
|
+
# connections: name -> already-env-resolved settings (the runner substitutes env() first).
|
|
616
|
+
self._dsn: dict[str, str] = {}
|
|
617
|
+
self._pool_max: dict[str, int] = {}
|
|
618
|
+
self._acquire_timeout: dict[str, float] = {}
|
|
619
|
+
for cname, s in connections.items():
|
|
620
|
+
for req in ("server", "database"):
|
|
621
|
+
if not s.get(req):
|
|
622
|
+
raise ValueError(f"DatabaseLookup {cname!r} requires a {req!r} setting")
|
|
623
|
+
self._dsn[cname] = _build_dsn(dict(s)) # fail fast on weakened-TLS / bad-auth config
|
|
624
|
+
self._pool_max[cname] = int(s.get("pool_max", 5))
|
|
625
|
+
self._acquire_timeout[cname] = float(
|
|
626
|
+
s.get("acquire_timeout", _DEFAULT_DB_ACQUIRE_TIMEOUT)
|
|
627
|
+
)
|
|
628
|
+
self._pools: dict[str, Any] = {}
|
|
629
|
+
self._locks: dict[str, asyncio.Lock] = {c: asyncio.Lock() for c in self._dsn}
|
|
630
|
+
|
|
631
|
+
@property
|
|
632
|
+
def connections(self) -> frozenset[str]:
|
|
633
|
+
"""The declared lookup connection names."""
|
|
634
|
+
return frozenset(self._dsn)
|
|
635
|
+
|
|
636
|
+
async def _get_pool(self, connection: str) -> Any:
|
|
637
|
+
pool = self._pools.get(connection)
|
|
638
|
+
if pool is not None:
|
|
639
|
+
return pool
|
|
640
|
+
async with self._locks[connection]:
|
|
641
|
+
if connection not in self._pools:
|
|
642
|
+
self._pools[connection] = await _make_pool(
|
|
643
|
+
self._dsn[connection], self._pool_max[connection], autocommit=True
|
|
644
|
+
)
|
|
645
|
+
return self._pools[connection]
|
|
646
|
+
|
|
647
|
+
async def query(
|
|
648
|
+
self, connection: str, statement: str, params: Mapping[str, Any] | None
|
|
649
|
+
) -> list[dict[str, Any]]:
|
|
650
|
+
"""Run ``statement`` against ``connection`` and return rows as ``{column: value}`` dicts.
|
|
651
|
+
|
|
652
|
+
Always parameterized (``:name`` → positional ``?``, bound from ``params`` — a value can never
|
|
653
|
+
inject SQL). Raises :class:`DbLookupError` (PHI-free) on an unknown connection, a missing
|
|
654
|
+
parameter, or a DB/driver error — the transform worker turns it into that message's ``ERROR`` /
|
|
655
|
+
dead-letter disposition. Runs on the engine loop (the handler thread bridges in via
|
|
656
|
+
``run_coroutine_threadsafe``), so a slow query never blocks the loop, only its own worker thread."""
|
|
657
|
+
if connection not in self._dsn:
|
|
658
|
+
known = ", ".join(sorted(self._dsn)) or "(none declared)"
|
|
659
|
+
raise DbLookupError(
|
|
660
|
+
f"db_lookup: no DatabaseLookup connection named {connection!r} (declared: {known})"
|
|
661
|
+
)
|
|
662
|
+
sql, names = _parse_named_params(statement)
|
|
663
|
+
bound = _bind_lookup_params(params or {}, names, connection)
|
|
664
|
+
pool = await self._get_pool(connection)
|
|
665
|
+
try:
|
|
666
|
+
conn = await _acquire(pool, self._acquire_timeout[connection])
|
|
667
|
+
except DeliveryError as exc:
|
|
668
|
+
# Map the transient pool-timeout onto the lookup's own PHI-free error type so the transform
|
|
669
|
+
# worker dead-letters/errors this message consistently with other lookup failures.
|
|
670
|
+
raise DbLookupError(f"db_lookup on {connection!r}: {exc}") from exc
|
|
671
|
+
try:
|
|
672
|
+
cur = await conn.cursor()
|
|
673
|
+
await cur.execute(sql, bound)
|
|
674
|
+
columns = [d[0] for d in cur.description] if cur.description else []
|
|
675
|
+
rows = list(await cur.fetchall())
|
|
676
|
+
except DbLookupError:
|
|
677
|
+
raise
|
|
678
|
+
except Exception as exc:
|
|
679
|
+
state = _sqlstate(exc)
|
|
680
|
+
# PHI-free: name the connection + SQLSTATE (if any) only — never the statement/params/rows.
|
|
681
|
+
raise DbLookupError(
|
|
682
|
+
f"db_lookup query on {connection!r} failed" + (f" [{state}]" if state else "")
|
|
683
|
+
) from exc
|
|
684
|
+
finally:
|
|
685
|
+
await pool.release(conn)
|
|
686
|
+
return [dict(zip(columns, row)) for row in rows]
|
|
687
|
+
|
|
688
|
+
async def aclose(self) -> None:
|
|
689
|
+
"""Close every opened pool (idempotent; safe if no pool was ever opened)."""
|
|
690
|
+
for pool in self._pools.values():
|
|
691
|
+
pool.close()
|
|
692
|
+
await pool.wait_closed()
|
|
693
|
+
self._pools.clear()
|