messagefoundry 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- messagefoundry/__init__.py +108 -0
- messagefoundry/__main__.py +1155 -0
- messagefoundry/api/__init__.py +27 -0
- messagefoundry/api/app.py +1581 -0
- messagefoundry/api/approvals.py +184 -0
- messagefoundry/api/auth_models.py +211 -0
- messagefoundry/api/auth_routes.py +655 -0
- messagefoundry/api/field_authz.py +96 -0
- messagefoundry/api/models.py +374 -0
- messagefoundry/api/security.py +247 -0
- messagefoundry/api/tls.py +47 -0
- messagefoundry/auth/__init__.py +39 -0
- messagefoundry/auth/data/common_passwords.NOTICE +13 -0
- messagefoundry/auth/data/common_passwords.txt +10000 -0
- messagefoundry/auth/identity.py +71 -0
- messagefoundry/auth/ldap.py +264 -0
- messagefoundry/auth/notifications.py +68 -0
- messagefoundry/auth/passwords.py +53 -0
- messagefoundry/auth/permissions.py +120 -0
- messagefoundry/auth/policy.py +153 -0
- messagefoundry/auth/ratelimit.py +55 -0
- messagefoundry/auth/service.py +1323 -0
- messagefoundry/auth/tokens.py +26 -0
- messagefoundry/auth/totp.py +174 -0
- messagefoundry/checks.py +174 -0
- messagefoundry/config/__init__.py +30 -0
- messagefoundry/config/active_environment.py +80 -0
- messagefoundry/config/ai_policy.py +140 -0
- messagefoundry/config/code_sets.py +260 -0
- messagefoundry/config/connections_edit.py +200 -0
- messagefoundry/config/connections_file.py +287 -0
- messagefoundry/config/db_lookup.py +117 -0
- messagefoundry/config/environments.py +116 -0
- messagefoundry/config/ingest_time.py +83 -0
- messagefoundry/config/models.py +240 -0
- messagefoundry/config/reference.py +158 -0
- messagefoundry/config/response.py +83 -0
- messagefoundry/config/run_context.py +153 -0
- messagefoundry/config/settings.py +1311 -0
- messagefoundry/config/state.py +99 -0
- messagefoundry/config/tls_policy.py +110 -0
- messagefoundry/config/wiring.py +1918 -0
- messagefoundry/console/__init__.py +20 -0
- messagefoundry/console/__main__.py +274 -0
- messagefoundry/console/_async.py +107 -0
- messagefoundry/console/change_password.py +111 -0
- messagefoundry/console/client.py +552 -0
- messagefoundry/console/connections.py +324 -0
- messagefoundry/console/login.py +107 -0
- messagefoundry/console/mfa.py +205 -0
- messagefoundry/console/reauth.py +94 -0
- messagefoundry/console/search.py +57 -0
- messagefoundry/console/service_control.py +137 -0
- messagefoundry/console/sessions.py +122 -0
- messagefoundry/console/shell.py +410 -0
- messagefoundry/console/status.py +377 -0
- messagefoundry/console/users_page.py +282 -0
- messagefoundry/console/widgets.py +553 -0
- messagefoundry/generators/README.md +27 -0
- messagefoundry/generators/__init__.py +15 -0
- messagefoundry/generators/_core.py +589 -0
- messagefoundry/generators/_hl7data.py +428 -0
- messagefoundry/generators/adt.py +286 -0
- messagefoundry/generators/all_types.py +24 -0
- messagefoundry/generators/bar.py +28 -0
- messagefoundry/generators/dft.py +20 -0
- messagefoundry/generators/mdm.py +39 -0
- messagefoundry/generators/mfn.py +46 -0
- messagefoundry/generators/oml.py +32 -0
- messagefoundry/generators/orl.py +30 -0
- messagefoundry/generators/orm.py +23 -0
- messagefoundry/generators/oru.py +21 -0
- messagefoundry/generators/ras.py +20 -0
- messagefoundry/generators/rde.py +54 -0
- messagefoundry/generators/siu.py +64 -0
- messagefoundry/generators/vxu.py +20 -0
- messagefoundry/hl7schema.py +75 -0
- messagefoundry/last_resort.py +55 -0
- messagefoundry/logging_setup.py +332 -0
- messagefoundry/parsing/__init__.py +64 -0
- messagefoundry/parsing/consistency.py +166 -0
- messagefoundry/parsing/groups.py +228 -0
- messagefoundry/parsing/message.py +453 -0
- messagefoundry/parsing/peek.py +237 -0
- messagefoundry/parsing/split.py +120 -0
- messagefoundry/parsing/summary.py +46 -0
- messagefoundry/parsing/tree.py +128 -0
- messagefoundry/parsing/validate.py +95 -0
- messagefoundry/parsing/x12/__init__.py +46 -0
- messagefoundry/parsing/x12/delimiters.py +140 -0
- messagefoundry/parsing/x12/errors.py +30 -0
- messagefoundry/parsing/x12/interchange.py +232 -0
- messagefoundry/parsing/x12/message.py +200 -0
- messagefoundry/parsing/x12/peek.py +207 -0
- messagefoundry/pipeline/__init__.py +21 -0
- messagefoundry/pipeline/alert_sinks.py +486 -0
- messagefoundry/pipeline/alerts.py +100 -0
- messagefoundry/pipeline/cert_expiry.py +219 -0
- messagefoundry/pipeline/cluster.py +955 -0
- messagefoundry/pipeline/cluster_sqlserver.py +444 -0
- messagefoundry/pipeline/config_convergence.py +137 -0
- messagefoundry/pipeline/dryrun.py +450 -0
- messagefoundry/pipeline/engine.py +756 -0
- messagefoundry/pipeline/leader_tasks.py +158 -0
- messagefoundry/pipeline/reference_sync.py +369 -0
- messagefoundry/pipeline/retention.py +289 -0
- messagefoundry/pipeline/security_notify.py +168 -0
- messagefoundry/pipeline/state_convergence.py +143 -0
- messagefoundry/pipeline/wiring_runner.py +1722 -0
- messagefoundry/py.typed +0 -0
- messagefoundry/redaction.py +71 -0
- messagefoundry/scaffold.py +321 -0
- messagefoundry/secrets_dpapi.py +129 -0
- messagefoundry/store/__init__.py +46 -0
- messagefoundry/store/audit_tee.py +67 -0
- messagefoundry/store/base.py +758 -0
- messagefoundry/store/crypto.py +166 -0
- messagefoundry/store/keyprovider.py +192 -0
- messagefoundry/store/postgres.py +3447 -0
- messagefoundry/store/sqlserver.py +3014 -0
- messagefoundry/store/store.py +3790 -0
- messagefoundry/timezone.py +207 -0
- messagefoundry/transports/__init__.py +50 -0
- messagefoundry/transports/base.py +269 -0
- messagefoundry/transports/database.py +693 -0
- messagefoundry/transports/file.py +551 -0
- messagefoundry/transports/framing.py +164 -0
- messagefoundry/transports/loopback.py +53 -0
- messagefoundry/transports/mllp.py +644 -0
- messagefoundry/transports/remotefile.py +664 -0
- messagefoundry/transports/rest.py +281 -0
- messagefoundry/transports/signing.py +321 -0
- messagefoundry/transports/soap.py +507 -0
- messagefoundry/transports/tcp.py +307 -0
- messagefoundry/transports/timer.py +146 -0
- messagefoundry/transports/x12.py +323 -0
- messagefoundry-0.1.0.dist-info/METADATA +212 -0
- messagefoundry-0.1.0.dist-info/RECORD +142 -0
- messagefoundry-0.1.0.dist-info/WHEEL +4 -0
- messagefoundry-0.1.0.dist-info/entry_points.txt +2 -0
- messagefoundry-0.1.0.dist-info/licenses/LICENSE +662 -0
- messagefoundry-0.1.0.dist-info/licenses/NOTICE +27 -0
|
@@ -0,0 +1,1581 @@
|
|
|
1
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
2
|
+
# Copyright (C) 2026 MessageFoundry Organization and contributors
|
|
3
|
+
"""Localhost FastAPI surface for the console.
|
|
4
|
+
|
|
5
|
+
This is the *only* boundary a client uses, so in-process / local-daemon / remote
|
|
6
|
+
deployments are indistinguishable to the UI. Routes resolve the live :class:`Engine`
|
|
7
|
+
from ``app.state`` at request time (not at construction), which lets the same app object
|
|
8
|
+
be driven two ways:
|
|
9
|
+
|
|
10
|
+
* :func:`create_app(engine)` — bind an engine the caller already manages (embedding, and
|
|
11
|
+
the async test client).
|
|
12
|
+
* :func:`create_managed_app(...)` — own the engine via an ASGI lifespan (the CLI server,
|
|
13
|
+
and anything driven by a synchronous test client).
|
|
14
|
+
|
|
15
|
+
Authentication + RBAC are enforced whenever an enabled :class:`AuthService` is attached (the
|
|
16
|
+
``serve`` path always attaches one). With **no** auth attached the routes are **fail-closed** (403)
|
|
17
|
+
unless the app explicitly opts out via ``allow_no_auth=True`` (embedding / dev), in which case
|
|
18
|
+
requests run as the full-access system identity (SYS-1). The API still binds localhost by default;
|
|
19
|
+
remote exposure (TLS) is later.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import asyncio
|
|
25
|
+
import json
|
|
26
|
+
import logging
|
|
27
|
+
import os
|
|
28
|
+
import time
|
|
29
|
+
from collections.abc import Callable, Mapping, Sequence
|
|
30
|
+
from contextlib import asynccontextmanager, suppress
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Any, AsyncIterator
|
|
33
|
+
|
|
34
|
+
from fastapi import (
|
|
35
|
+
Depends,
|
|
36
|
+
FastAPI,
|
|
37
|
+
HTTPException,
|
|
38
|
+
Query,
|
|
39
|
+
Request,
|
|
40
|
+
Response,
|
|
41
|
+
WebSocket,
|
|
42
|
+
WebSocketDisconnect,
|
|
43
|
+
)
|
|
44
|
+
from fastapi.responses import JSONResponse
|
|
45
|
+
|
|
46
|
+
from messagefoundry import __version__
|
|
47
|
+
from messagefoundry.api.approvals import ApprovalError, ApprovalGate
|
|
48
|
+
from messagefoundry.api.models import (
|
|
49
|
+
AiPolicy,
|
|
50
|
+
ApprovalDecisionResult,
|
|
51
|
+
ApprovalList,
|
|
52
|
+
CapturedResponseInfo,
|
|
53
|
+
ChannelInfo,
|
|
54
|
+
ClusterNode,
|
|
55
|
+
ClusterNodeList,
|
|
56
|
+
ClusterStatus,
|
|
57
|
+
ConnectionMetadata,
|
|
58
|
+
ConnectionRow,
|
|
59
|
+
ConnectionTestResult,
|
|
60
|
+
DbInfo,
|
|
61
|
+
DeadLetterList,
|
|
62
|
+
DeadLetterReplayRequest,
|
|
63
|
+
DeadLetterReplayResult,
|
|
64
|
+
DeadLetterRow,
|
|
65
|
+
EngineInfo,
|
|
66
|
+
EventInfo,
|
|
67
|
+
Health,
|
|
68
|
+
IntegrityResult,
|
|
69
|
+
MessageDetail,
|
|
70
|
+
MessageList,
|
|
71
|
+
MessageResponses,
|
|
72
|
+
MessageSummary,
|
|
73
|
+
OutboundPayloadInfo,
|
|
74
|
+
OutboundPayloads,
|
|
75
|
+
OutboxInfo,
|
|
76
|
+
PendingApprovalInfo,
|
|
77
|
+
PendingApprovalResponse,
|
|
78
|
+
PurgeResult,
|
|
79
|
+
ReloadRequest,
|
|
80
|
+
ReloadResult,
|
|
81
|
+
ReplayResult,
|
|
82
|
+
StatsResponse,
|
|
83
|
+
SystemStatus,
|
|
84
|
+
)
|
|
85
|
+
from messagefoundry.api.auth_routes import add_auth_routes
|
|
86
|
+
from messagefoundry.api.field_authz import count_exposed, redact_unauthorized
|
|
87
|
+
from messagefoundry.api.security import (
|
|
88
|
+
authorize_ws,
|
|
89
|
+
optional_identity,
|
|
90
|
+
require,
|
|
91
|
+
require_phi_read,
|
|
92
|
+
require_step_up,
|
|
93
|
+
ws_token,
|
|
94
|
+
)
|
|
95
|
+
from messagefoundry.auth import Identity, Permission
|
|
96
|
+
from messagefoundry.auth.service import AuthService, BootstrapAdmin
|
|
97
|
+
from messagefoundry.config.ai_policy import resolve_effective_policy
|
|
98
|
+
from messagefoundry.config.models import (
|
|
99
|
+
AckAfter,
|
|
100
|
+
BuildupThreshold,
|
|
101
|
+
InternalErrorPolicy,
|
|
102
|
+
OrderingMode,
|
|
103
|
+
RetryPolicy,
|
|
104
|
+
)
|
|
105
|
+
from messagefoundry.config.settings import (
|
|
106
|
+
AiSettings,
|
|
107
|
+
AlertsSettings,
|
|
108
|
+
ApprovalsSettings,
|
|
109
|
+
AuthSettings,
|
|
110
|
+
CertMonitorSettings,
|
|
111
|
+
ClusterSettings,
|
|
112
|
+
EgressSettings,
|
|
113
|
+
ReferenceSettings,
|
|
114
|
+
RetentionSettings,
|
|
115
|
+
ShadowSettings,
|
|
116
|
+
StoreSettings,
|
|
117
|
+
)
|
|
118
|
+
from messagefoundry.config.wiring import EnvRef, WiringError, load_config, redacted_settings
|
|
119
|
+
from messagefoundry.last_resort import install_loop_exception_handler
|
|
120
|
+
from messagefoundry.pipeline import ConfigReloadDenied, Engine
|
|
121
|
+
from messagefoundry.pipeline.alert_sinks import notifier_from_settings
|
|
122
|
+
from messagefoundry.pipeline.security_notify import security_notifier_from_settings
|
|
123
|
+
from messagefoundry.pipeline.cluster import build_coordinator
|
|
124
|
+
from messagefoundry.pipeline.wiring_runner import RegistryRunner
|
|
125
|
+
from messagefoundry.transports.base import (
|
|
126
|
+
DeliveryError,
|
|
127
|
+
DestinationConnector,
|
|
128
|
+
TestNotSupportedError,
|
|
129
|
+
)
|
|
130
|
+
from messagefoundry.store import Row, open_store, sqlite_settings
|
|
131
|
+
from messagefoundry.store.base import Store
|
|
132
|
+
from messagefoundry.store.store import _secure_file
|
|
133
|
+
|
|
134
|
+
__all__ = ["create_app", "create_managed_app"]
|
|
135
|
+
|
|
136
|
+
_RATE_WINDOW = 60.0 # seconds; window for the backlog throughput estimate
|
|
137
|
+
_MAX_REQUEST_BODY_BYTES = 1 * 1024 * 1024 # 1 MiB cap on HTTP request bodies (API-INPUT)
|
|
138
|
+
_CONNECTION_TEST_TIMEOUT = 35.0 # overall cap for a POST /connections/{name}/test probe (seconds)
|
|
139
|
+
_MAX_WS_CONNECTIONS = 64 # cap concurrent /ws/stats sockets (API-WS)
|
|
140
|
+
_WS_REVALIDATE_SECONDS = 30.0 # re-check the session on an open /ws/stats this often (API-WS)
|
|
141
|
+
_log = logging.getLogger(__name__)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _peer_display(value: Any) -> str | None:
|
|
145
|
+
"""Render a connector address field for the dashboard: a literal, or an ``env()`` reference shown
|
|
146
|
+
symbolically (``env:<key>``). The live value is resolved per-instance; the spec only holds the ref."""
|
|
147
|
+
if value is None:
|
|
148
|
+
return None
|
|
149
|
+
if isinstance(value, EnvRef):
|
|
150
|
+
return f"env:{value.key}"
|
|
151
|
+
return str(value)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _peer_port(type_value: str, settings: dict[str, Any]) -> tuple[str | None, int | None]:
|
|
155
|
+
"""Best-effort (peer, port) for a connector: MLLP host+port, or a file directory."""
|
|
156
|
+
if type_value == "mllp":
|
|
157
|
+
port = settings.get("port")
|
|
158
|
+
port_int = None if port is None or isinstance(port, EnvRef) else int(port)
|
|
159
|
+
return (_peer_display(settings.get("host")), port_int)
|
|
160
|
+
if type_value == "file":
|
|
161
|
+
return (_peer_display(settings.get("directory")), None)
|
|
162
|
+
return (None, None)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# Display labels for the connection method/protocol. Includes types not yet built so the
|
|
166
|
+
# column reads well the moment a connector lands; unknown types fall back to upper-case.
|
|
167
|
+
_METHOD_LABELS = {
|
|
168
|
+
"mllp": "MLLP",
|
|
169
|
+
"file": "File",
|
|
170
|
+
"tcp": "TCP",
|
|
171
|
+
"soap": "SOAP",
|
|
172
|
+
"rest": "REST",
|
|
173
|
+
"http": "HTTP",
|
|
174
|
+
"sftp": "SFTP",
|
|
175
|
+
"db": "Database",
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _method_label(type_value: str) -> str:
|
|
180
|
+
return _METHOD_LABELS.get(type_value, type_value.upper())
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _backlog(depth: int, recent: int) -> float | None:
|
|
184
|
+
"""Estimated seconds to clear the queue: 0 if empty, None if queued but nothing draining."""
|
|
185
|
+
if depth == 0:
|
|
186
|
+
return 0.0
|
|
187
|
+
return depth * _RATE_WINDOW / recent if recent > 0 else None
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _get_engine(request: Request) -> Engine:
|
|
191
|
+
engine: Engine | None = getattr(request.app.state, "engine", None)
|
|
192
|
+
if engine is None:
|
|
193
|
+
raise HTTPException(status_code=503, detail="engine not started")
|
|
194
|
+
return engine
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _get_gate(request: Request) -> ApprovalGate | None:
|
|
198
|
+
"""The dual-control approval gate (ASVS 2.3.5), or ``None`` when no engine is bound — then gated
|
|
199
|
+
endpoints execute inline and the ``/approvals`` routes report 503."""
|
|
200
|
+
return getattr(request.app.state, "approval_gate", None)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _build_approval_gate(engine: Engine, settings: ApprovalsSettings) -> ApprovalGate:
|
|
204
|
+
"""Build the approval gate and register the high-value operations dual-control can hold. Each
|
|
205
|
+
executor re-runs its captured operation on approval (params are JSON, persisted at request time)."""
|
|
206
|
+
gate = ApprovalGate(engine.store, settings)
|
|
207
|
+
|
|
208
|
+
async def _replay(p: Mapping[str, Any]) -> dict[str, Any]:
|
|
209
|
+
requeued = await engine.replay_dead(
|
|
210
|
+
channel_id=p.get("channel_id"), destination_name=p.get("destination_name")
|
|
211
|
+
)
|
|
212
|
+
return {"requeued": requeued}
|
|
213
|
+
|
|
214
|
+
async def _purge(p: Mapping[str, Any]) -> dict[str, Any]:
|
|
215
|
+
cancelled = await engine.store.cancel_queued(
|
|
216
|
+
None, str(p["name"]), top_only=(p.get("scope") == "top")
|
|
217
|
+
)
|
|
218
|
+
return {"cancelled": cancelled}
|
|
219
|
+
|
|
220
|
+
gate.register("dead_letter_replay", "Replay dead-lettered deliveries", _replay)
|
|
221
|
+
gate.register("connection_purge", "Purge queued deliveries to an outbound connection", _purge)
|
|
222
|
+
return gate
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _summary(row: Row) -> MessageSummary:
|
|
226
|
+
# dict() so optional columns (last_event on list rows; summary/metadata) read via .get,
|
|
227
|
+
# letting the same builder serve list rows and SELECT * detail rows.
|
|
228
|
+
d = dict(row)
|
|
229
|
+
return MessageSummary(
|
|
230
|
+
id=d["id"],
|
|
231
|
+
channel_id=d["channel_id"],
|
|
232
|
+
received_at=d["received_at"],
|
|
233
|
+
source_type=d.get("source_type"),
|
|
234
|
+
control_id=d.get("control_id"),
|
|
235
|
+
message_type=d.get("message_type"),
|
|
236
|
+
status=d["status"],
|
|
237
|
+
error=d.get("error"),
|
|
238
|
+
event=d.get("last_event"),
|
|
239
|
+
summary=d.get("summary"),
|
|
240
|
+
metadata=d.get("metadata"),
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _dead_row(row: Row) -> DeadLetterRow:
|
|
245
|
+
d = dict(row)
|
|
246
|
+
return DeadLetterRow(
|
|
247
|
+
outbox_id=d["outbox_id"],
|
|
248
|
+
message_id=d["message_id"],
|
|
249
|
+
channel_id=d["channel_id"],
|
|
250
|
+
destination_name=d["destination_name"],
|
|
251
|
+
attempts=d["attempts"],
|
|
252
|
+
last_error=d.get("last_error"),
|
|
253
|
+
failed_at=d["updated_at"],
|
|
254
|
+
control_id=d.get("control_id"),
|
|
255
|
+
message_type=d.get("message_type"),
|
|
256
|
+
received_at=d["received_at"],
|
|
257
|
+
summary=d.get("summary"),
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _scope(identity: Identity) -> list[str] | None:
|
|
262
|
+
"""The caller's per-channel allow-list for store filters (None = all channels)."""
|
|
263
|
+
return None if identity.allowed_channels is None else sorted(identity.allowed_channels)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
async def _audit_channel_denied(engine: Engine, identity: Identity, channel: str | None) -> None:
|
|
267
|
+
"""Audit a per-channel RBAC denial (mirrors auth.permission_denied)."""
|
|
268
|
+
await engine.store.record_audit(
|
|
269
|
+
"auth.channel_denied",
|
|
270
|
+
actor=identity.username,
|
|
271
|
+
channel_id=channel,
|
|
272
|
+
detail=json.dumps({"channel": channel}),
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
async def _run_connection_test(
|
|
277
|
+
rr: RegistryRunner, name: str, direction: str
|
|
278
|
+
) -> ConnectionTestResult:
|
|
279
|
+
"""Build a fresh connector for ``name`` and probe its reachability, never disturbing the live one.
|
|
280
|
+
Reports a config (bad ``env()``/egress) or connectivity failure in the result rather than raising —
|
|
281
|
+
only an unexpected bug would 500. Closes the test connector afterward."""
|
|
282
|
+
|
|
283
|
+
def _result(
|
|
284
|
+
*, supported: bool, success: bool, ms: float, detail: str | None
|
|
285
|
+
) -> ConnectionTestResult:
|
|
286
|
+
return ConnectionTestResult(
|
|
287
|
+
name=name,
|
|
288
|
+
direction=direction,
|
|
289
|
+
supported=supported,
|
|
290
|
+
success=success,
|
|
291
|
+
duration_ms=round(ms, 1),
|
|
292
|
+
detail=detail,
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
try:
|
|
296
|
+
_direction, connector = rr.build_test_connector(name)
|
|
297
|
+
except WiringError as exc:
|
|
298
|
+
return _result(supported=True, success=False, ms=0.0, detail=str(exc))
|
|
299
|
+
start = time.monotonic()
|
|
300
|
+
supported, success, detail = True, False, None
|
|
301
|
+
try:
|
|
302
|
+
await asyncio.wait_for(connector.test_connection(), _CONNECTION_TEST_TIMEOUT)
|
|
303
|
+
success = True
|
|
304
|
+
except TestNotSupportedError as exc:
|
|
305
|
+
supported, detail = False, str(exc)
|
|
306
|
+
except asyncio.TimeoutError:
|
|
307
|
+
detail = f"timed out after {_CONNECTION_TEST_TIMEOUT:.0f}s"
|
|
308
|
+
except DeliveryError as exc:
|
|
309
|
+
detail = str(exc)
|
|
310
|
+
except Exception as exc: # noqa: BLE001 - any probe failure is reported in the result, never a 500
|
|
311
|
+
detail = f"{type(exc).__name__}: {exc}"
|
|
312
|
+
finally:
|
|
313
|
+
with suppress(Exception): # closing a test connector must never mask the result
|
|
314
|
+
if isinstance(connector, DestinationConnector):
|
|
315
|
+
await connector.aclose()
|
|
316
|
+
else:
|
|
317
|
+
await connector.stop()
|
|
318
|
+
return _result(
|
|
319
|
+
supported=supported, success=success, ms=(time.monotonic() - start) * 1000.0, detail=detail
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
class _SummaryAuditCoalescer:
|
|
324
|
+
"""Coalesces PHI-summary access auditing into ONE ``summary_access`` audit row per
|
|
325
|
+
``(actor, channel-scope, hour)`` window, carrying the running count of summaries exposed in that
|
|
326
|
+
window (review M-5).
|
|
327
|
+
|
|
328
|
+
Auditing is **server-enforced**: every list response that returns non-redacted summaries is
|
|
329
|
+
counted, regardless of any client flag — so a scripted bulk fetch can't harvest the patient census
|
|
330
|
+
unaudited. Coalescing keeps routine console polling to one row/hour while a bulk harvest shows a
|
|
331
|
+
large count. A window's total is flushed when a later summary access rolls into a new hour (the
|
|
332
|
+
keyed window, plus a sweep so a *different* actor's later access also flushes stragglers); the
|
|
333
|
+
active window is also flushed on :meth:`flush` (engine shutdown). The in-process dict is safe
|
|
334
|
+
because the engine is a single uvicorn worker (single-connection store + ``asyncio.Lock``)."""
|
|
335
|
+
|
|
336
|
+
def __init__(self) -> None:
|
|
337
|
+
# (actor, scope) -> {"hour": int, "count": int}; scope is the channel filter ("" = all channels)
|
|
338
|
+
self._windows: dict[tuple[str | None, str], dict[str, int]] = {}
|
|
339
|
+
|
|
340
|
+
def _roll(
|
|
341
|
+
self, actor: str | None, scope: str, count: int, hour: int
|
|
342
|
+
) -> list[tuple[str | None, str, int, int]]:
|
|
343
|
+
"""Accumulate ``count`` into the ``(actor, scope)`` window for ``hour`` and return any windows
|
|
344
|
+
to flush now — every window whose hour has passed. Synchronous (no ``await``), so the dict is
|
|
345
|
+
mutated atomically w.r.t. the event loop and a window can't be double-emitted."""
|
|
346
|
+
emit: list[tuple[str | None, str, int, int]] = []
|
|
347
|
+
for (a, sc), win in list(self._windows.items()):
|
|
348
|
+
if win["hour"] != hour:
|
|
349
|
+
emit.append((a, sc, win["hour"], win["count"]))
|
|
350
|
+
del self._windows[(a, sc)]
|
|
351
|
+
self._windows.setdefault((actor, scope), {"hour": hour, "count": 0})["count"] += count
|
|
352
|
+
return emit
|
|
353
|
+
|
|
354
|
+
async def note(
|
|
355
|
+
self, store: Store, actor: str | None, scope: str | None, count: int, now: float
|
|
356
|
+
) -> None:
|
|
357
|
+
"""Count ``count`` exposed summaries for ``actor``; emit a coalesced audit row for any window
|
|
358
|
+
that just rolled over. No-op when nothing was exposed."""
|
|
359
|
+
if count <= 0:
|
|
360
|
+
return
|
|
361
|
+
for a, sc, win_hour, win_count in self._roll(actor, scope or "", count, int(now // 3600)):
|
|
362
|
+
await self._emit(store, a, sc, win_hour, win_count)
|
|
363
|
+
|
|
364
|
+
async def flush(self, store: Store) -> None:
|
|
365
|
+
"""Emit every pending window (e.g. on engine shutdown) so an active window isn't lost."""
|
|
366
|
+
windows = list(self._windows.items())
|
|
367
|
+
self._windows.clear()
|
|
368
|
+
for (a, sc), win in windows:
|
|
369
|
+
await self._emit(store, a, sc, win["hour"], win["count"])
|
|
370
|
+
|
|
371
|
+
@staticmethod
|
|
372
|
+
async def _emit(store: Store, actor: str | None, scope: str, hour: int, count: int) -> None:
|
|
373
|
+
await store.record_audit(
|
|
374
|
+
"summary_access",
|
|
375
|
+
actor=actor,
|
|
376
|
+
channel_id=(scope or None),
|
|
377
|
+
detail=json.dumps({"count": count, "window_start": hour * 3600}),
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def create_app(
|
|
382
|
+
engine: Engine | None = None,
|
|
383
|
+
*,
|
|
384
|
+
lifespan: object | None = None,
|
|
385
|
+
auth: AuthService | None = None,
|
|
386
|
+
ai_settings: AiSettings | None = None,
|
|
387
|
+
approvals: ApprovalsSettings | None = None,
|
|
388
|
+
expose_docs: bool = False,
|
|
389
|
+
allow_no_auth: bool = False,
|
|
390
|
+
ws_allowed_origins: Sequence[str] = (),
|
|
391
|
+
) -> FastAPI:
|
|
392
|
+
# The interactive docs (/docs, /redoc) and the OpenAPI schema (/openapi.json) are off by
|
|
393
|
+
# default: they widen the attack surface and disclose the schema, which matters the moment the
|
|
394
|
+
# API binds off-loopback. Opt in with [api] expose_docs = true. See docs/PHI.md §10.
|
|
395
|
+
app = FastAPI(
|
|
396
|
+
title="MessageFoundry",
|
|
397
|
+
version=__version__,
|
|
398
|
+
lifespan=lifespan, # type: ignore[arg-type]
|
|
399
|
+
docs_url="/docs" if expose_docs else None,
|
|
400
|
+
redoc_url="/redoc" if expose_docs else None,
|
|
401
|
+
openapi_url="/openapi.json" if expose_docs else None,
|
|
402
|
+
)
|
|
403
|
+
if engine is not None:
|
|
404
|
+
app.state.engine = engine
|
|
405
|
+
app.state.approval_gate = _build_approval_gate(engine, approvals or ApprovalsSettings())
|
|
406
|
+
if auth is not None:
|
|
407
|
+
app.state.auth = auth
|
|
408
|
+
if ai_settings is not None:
|
|
409
|
+
app.state.ai = ai_settings
|
|
410
|
+
# Fail-closed when no auth is attached unless explicitly opted out (embedding/dev) — SYS-1.
|
|
411
|
+
app.state.allow_no_auth = allow_no_auth
|
|
412
|
+
app.state.ws_count = 0 # live /ws/stats connection count (API-WS cap)
|
|
413
|
+
app.state.ws_allowed_origins = tuple(
|
|
414
|
+
ws_allowed_origins
|
|
415
|
+
) # browser Origins for /ws/stats (4.4.2)
|
|
416
|
+
app.state.summary_auditor = _SummaryAuditCoalescer() # coalesced PHI-summary access audit (M-5)
|
|
417
|
+
add_auth_routes(app)
|
|
418
|
+
|
|
419
|
+
@app.exception_handler(Exception)
|
|
420
|
+
async def _unhandled_exception(request: Request, exc: Exception) -> JSONResponse:
|
|
421
|
+
# Catch-all so an unexpected error returns a generic 500 — never a stack trace or internal
|
|
422
|
+
# detail to the client (ASVS 16.5.1). The real cause is logged server-side only; we log the
|
|
423
|
+
# exception TYPE + route, not str(exc), to avoid a stray PHI fragment reaching the general
|
|
424
|
+
# log (the "never log bodies" rule; centralized redaction is the WP-6c follow-up).
|
|
425
|
+
_log.error(
|
|
426
|
+
"unhandled error on %s %s: %s", request.method, request.url.path, type(exc).__name__
|
|
427
|
+
)
|
|
428
|
+
return JSONResponse({"detail": "internal error"}, status_code=500)
|
|
429
|
+
|
|
430
|
+
@app.middleware("http")
|
|
431
|
+
async def _security_headers(request: Request, call_next: Any) -> Any:
|
|
432
|
+
# Defense-in-depth response headers (ASVS 3.4.4 / 3.4.5 / 3.2.1). The shipped client is a
|
|
433
|
+
# desktop app, but these are mandatory the moment a browser/off-loopback client appears and
|
|
434
|
+
# cost nothing on a JSON API. HSTS is only meaningful over TLS, so it is emitted only when the
|
|
435
|
+
# request actually arrived over https (wired when API TLS lands — WP-13a).
|
|
436
|
+
response = await call_next(request)
|
|
437
|
+
response.headers.setdefault("X-Content-Type-Options", "nosniff")
|
|
438
|
+
response.headers.setdefault("Referrer-Policy", "no-referrer")
|
|
439
|
+
response.headers.setdefault("X-Frame-Options", "DENY")
|
|
440
|
+
if request.url.scheme == "https":
|
|
441
|
+
response.headers.setdefault(
|
|
442
|
+
"Strict-Transport-Security", "max-age=31536000; includeSubDomains"
|
|
443
|
+
)
|
|
444
|
+
return response
|
|
445
|
+
|
|
446
|
+
@app.middleware("http")
|
|
447
|
+
async def _limit_request_body(request: Request, call_next: Any) -> Any:
|
|
448
|
+
# The HTTP API carries only small JSON (HL7 payloads arrive via MLLP/file, not here), so a
|
|
449
|
+
# generous cap rejects oversized/abusive bodies early (API-INPUT).
|
|
450
|
+
# Rejections are logged (ASVS 16.3.3) — these are control-bypass attempts (a pre-auth memory
|
|
451
|
+
# DoS probe) and were previously dropped silently. We log to the rotating general log rather
|
|
452
|
+
# than the audit_log: it's pre-auth (no actor) and a flood must not grow the audit DB.
|
|
453
|
+
client = request.client.host if request.client else None
|
|
454
|
+
length = request.headers.get("content-length")
|
|
455
|
+
transfer_encoding = request.headers.get("transfer-encoding", "").lower()
|
|
456
|
+
# A request carrying BOTH Content-Length and Transfer-Encoding is ambiguously framed (RFC 9112
|
|
457
|
+
# §6.1 — TE overrides CL) and is the classic CL.TE request-smuggling vector. Our single h11
|
|
458
|
+
# parser doesn't desync on the default loopback bind, but reject it outright so a future front
|
|
459
|
+
# proxy can never disagree with us about where the message ends (ASVS 4.2.1).
|
|
460
|
+
if length is not None and "chunked" in transfer_encoding:
|
|
461
|
+
_log.warning(
|
|
462
|
+
"rejected request with both Content-Length and Transfer-Encoding on %s from %s",
|
|
463
|
+
request.url.path,
|
|
464
|
+
client,
|
|
465
|
+
)
|
|
466
|
+
return JSONResponse(
|
|
467
|
+
{
|
|
468
|
+
"detail": "ambiguous framing: Content-Length with Transfer-Encoding is not accepted"
|
|
469
|
+
},
|
|
470
|
+
status_code=400,
|
|
471
|
+
)
|
|
472
|
+
if length is None:
|
|
473
|
+
# No Content-Length means a chunked body (HTTP/1.1 requires one or the other), which the
|
|
474
|
+
# Content-Length cap can't bound up front — Starlette would buffer it unbounded, a pre-auth
|
|
475
|
+
# memory DoS. We only accept small JSON, so require a Content-Length (review M-19).
|
|
476
|
+
if "chunked" in transfer_encoding:
|
|
477
|
+
_log.warning(
|
|
478
|
+
"rejected chunked request body on %s from %s", request.url.path, client
|
|
479
|
+
)
|
|
480
|
+
return JSONResponse(
|
|
481
|
+
{"detail": "chunked request bodies are not accepted; send a Content-Length"},
|
|
482
|
+
status_code=411,
|
|
483
|
+
)
|
|
484
|
+
return await call_next(request)
|
|
485
|
+
try:
|
|
486
|
+
too_big = int(length) > _MAX_REQUEST_BODY_BYTES
|
|
487
|
+
except ValueError:
|
|
488
|
+
_log.warning("rejected invalid Content-Length on %s from %s", request.url.path, client)
|
|
489
|
+
return JSONResponse({"detail": "invalid Content-Length"}, status_code=400)
|
|
490
|
+
if too_big:
|
|
491
|
+
_log.warning("rejected oversized request body on %s from %s", request.url.path, client)
|
|
492
|
+
return JSONResponse({"detail": "request body too large"}, status_code=413)
|
|
493
|
+
return await call_next(request)
|
|
494
|
+
|
|
495
|
+
@app.get("/health", response_model=Health)
|
|
496
|
+
async def health(identity: Identity | None = Depends(optional_identity)) -> Health:
|
|
497
|
+
# Liveness is always answerable (tokenless), but the build version is fingerprinting info, so
|
|
498
|
+
# it is disclosed only to an authenticated caller (WP-L3-07 / ASVS 13.4.6). When auth is
|
|
499
|
+
# disabled-with-allow_no_auth, optional_identity returns the system identity → version shown.
|
|
500
|
+
return Health(version=__version__ if identity is not None else None)
|
|
501
|
+
|
|
502
|
+
@app.get("/ai/policy", response_model=AiPolicy)
|
|
503
|
+
async def ai_policy(
|
|
504
|
+
request: Request, identity: Identity | None = Depends(optional_identity)
|
|
505
|
+
) -> AiPolicy:
|
|
506
|
+
"""The central AI-assistance policy (mode/scope/environment) plus the caller's
|
|
507
|
+
``assist_permitted`` bit, for the IDE gate.
|
|
508
|
+
|
|
509
|
+
Intentionally NOT behind ``require()``: the install policy is non-sensitive operational
|
|
510
|
+
config and must be readable even by a tokenless client, so a central ``off`` is honored.
|
|
511
|
+
``assist_permitted`` carries the identity-dependent bit (``None`` = RBAC not evaluable, i.e.
|
|
512
|
+
no/invalid token under enabled auth). Policy reads are not audited in this MVP."""
|
|
513
|
+
ai = getattr(request.app.state, "ai", None) or AiSettings()
|
|
514
|
+
data_class, prod = ai.derived_posture()
|
|
515
|
+
production = True if prod is None else prod # unresolved posture -> strictest ceiling
|
|
516
|
+
eff = resolve_effective_policy(
|
|
517
|
+
mode=ai.mode, data_scope=ai.data_scope, production=production
|
|
518
|
+
)
|
|
519
|
+
permitted = None if identity is None else identity.has(Permission.AI_ASSIST)
|
|
520
|
+
return AiPolicy(
|
|
521
|
+
mode=eff.mode,
|
|
522
|
+
data_scope=eff.data_scope,
|
|
523
|
+
environment=ai.environment,
|
|
524
|
+
data_class=data_class,
|
|
525
|
+
production=production,
|
|
526
|
+
assist_permitted=permitted,
|
|
527
|
+
reason=eff.reason,
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
# --- connections list (inbound connections, for the Log Search filter) ---
|
|
531
|
+
|
|
532
|
+
@app.get("/channels", response_model=list[ChannelInfo])
|
|
533
|
+
async def list_channels(
|
|
534
|
+
engine: Engine = Depends(_get_engine),
|
|
535
|
+
_user: Identity = Depends(require(Permission.MONITORING_READ)),
|
|
536
|
+
) -> list[ChannelInfo]:
|
|
537
|
+
"""Inbound connections as ChannelInfo (id = connection name) for the Log Search filter."""
|
|
538
|
+
runner = engine.registry_runner
|
|
539
|
+
if runner is None:
|
|
540
|
+
return []
|
|
541
|
+
return [
|
|
542
|
+
ChannelInfo(
|
|
543
|
+
id=name,
|
|
544
|
+
name=name,
|
|
545
|
+
enabled=True,
|
|
546
|
+
running=runner.inbound_running(name),
|
|
547
|
+
source_type=ic.spec.type.value,
|
|
548
|
+
destinations=[],
|
|
549
|
+
)
|
|
550
|
+
for name, ic in runner.registry.inbound.items()
|
|
551
|
+
]
|
|
552
|
+
|
|
553
|
+
# --- connections (per-endpoint dashboard) --------------------------------
|
|
554
|
+
|
|
555
|
+
@app.get("/connections", response_model=list[ConnectionRow])
|
|
556
|
+
async def list_connections(
|
|
557
|
+
engine: Engine = Depends(_get_engine),
|
|
558
|
+
_user: Identity = Depends(require(Permission.MONITORING_READ)),
|
|
559
|
+
) -> list[ConnectionRow]:
|
|
560
|
+
now = time.time()
|
|
561
|
+
metrics = await engine.store.connection_metrics(
|
|
562
|
+
since=engine.started_at, now=now, rate_window=_RATE_WINDOW
|
|
563
|
+
)
|
|
564
|
+
rows: list[ConnectionRow] = []
|
|
565
|
+
|
|
566
|
+
# A source row per inbound connection, and a destination row per (inbound → outbound)
|
|
567
|
+
# edge that has carried traffic (the outbox metrics are keyed that way).
|
|
568
|
+
rr = engine.registry_runner
|
|
569
|
+
if rr is not None:
|
|
570
|
+
reg = rr.registry
|
|
571
|
+
rstatus = "running" if rr.running else "stopped"
|
|
572
|
+
for iname, ic in reg.inbound.items():
|
|
573
|
+
inb = metrics.inbound.get(iname)
|
|
574
|
+
speer, sport = _peer_port(ic.spec.type.value, ic.spec.settings)
|
|
575
|
+
rows.append(
|
|
576
|
+
ConnectionRow(
|
|
577
|
+
role="source",
|
|
578
|
+
channel_id=iname,
|
|
579
|
+
channel_name=iname,
|
|
580
|
+
destination=None,
|
|
581
|
+
name=f"{iname} ▸ in",
|
|
582
|
+
status="running" if rr.inbound_running(iname) else "stopped",
|
|
583
|
+
direction="in",
|
|
584
|
+
method=_method_label(ic.spec.type.value),
|
|
585
|
+
peer=speer,
|
|
586
|
+
port=sport,
|
|
587
|
+
queue_depth=None,
|
|
588
|
+
idle_seconds=(now - inb.last_at) if inb and inb.last_at else None,
|
|
589
|
+
alerts_active=0,
|
|
590
|
+
errored=inb.errored if inb else 0,
|
|
591
|
+
read=inb.read if inb else 0,
|
|
592
|
+
written=None,
|
|
593
|
+
backlog_seconds=None,
|
|
594
|
+
delivered_age_seconds=None,
|
|
595
|
+
)
|
|
596
|
+
)
|
|
597
|
+
for (cid, dname), dm in metrics.destinations.items():
|
|
598
|
+
if cid not in reg.inbound:
|
|
599
|
+
continue # a declarative-channel edge, already emitted above
|
|
600
|
+
oc = reg.outbound.get(dname)
|
|
601
|
+
# An outbound the live graph no longer declares (removed by a reload) keeps draining
|
|
602
|
+
# its queued rows — report it honestly as "draining" with an unknown method, rather
|
|
603
|
+
# than mislabeling it as a running File connector.
|
|
604
|
+
if oc is not None:
|
|
605
|
+
dmethod = _method_label(oc.spec.type.value)
|
|
606
|
+
dpeer, dport = _peer_port(oc.spec.type.value, oc.spec.settings)
|
|
607
|
+
dstatus = rstatus
|
|
608
|
+
else:
|
|
609
|
+
dmethod, dpeer, dport, dstatus = "—", None, None, "draining"
|
|
610
|
+
rows.append(
|
|
611
|
+
ConnectionRow(
|
|
612
|
+
role="destination",
|
|
613
|
+
channel_id=cid,
|
|
614
|
+
channel_name=cid,
|
|
615
|
+
destination=dname,
|
|
616
|
+
name=f"{cid} ▸ {dname}",
|
|
617
|
+
status=dstatus,
|
|
618
|
+
direction="out",
|
|
619
|
+
method=dmethod,
|
|
620
|
+
peer=dpeer,
|
|
621
|
+
port=dport,
|
|
622
|
+
queue_depth=dm.queue_depth,
|
|
623
|
+
idle_seconds=(now - dm.last_done_at) if dm.last_done_at else None,
|
|
624
|
+
alerts_active=0,
|
|
625
|
+
errored=dm.dead,
|
|
626
|
+
read=None,
|
|
627
|
+
written=dm.written,
|
|
628
|
+
backlog_seconds=_backlog(dm.queue_depth, dm.recent_done),
|
|
629
|
+
delivered_age_seconds=(
|
|
630
|
+
(now - dm.oldest_pending_at) if dm.oldest_pending_at else None
|
|
631
|
+
),
|
|
632
|
+
# Effective simulate flag — queried even for a draining (removed) outbound,
|
|
633
|
+
# whose suppression persists in the runner until full shutdown (#15).
|
|
634
|
+
simulated=rr.outbound_simulated(dname),
|
|
635
|
+
)
|
|
636
|
+
)
|
|
637
|
+
return rows
|
|
638
|
+
|
|
639
|
+
# --- code-first connection operations ------------------------------------
|
|
640
|
+
|
|
641
|
+
def _inbound(engine: Engine, name: str) -> RegistryRunner:
|
|
642
|
+
rr = engine.registry_runner
|
|
643
|
+
if rr is None or name not in rr.registry.inbound:
|
|
644
|
+
raise HTTPException(404, f"no such inbound connection: {name}")
|
|
645
|
+
return rr
|
|
646
|
+
|
|
647
|
+
async def _control_guard(engine: Engine, identity: Identity, name: str) -> None:
|
|
648
|
+
# Controlling an inbound connection is scoped per-channel (the connection IS the channel).
|
|
649
|
+
if not identity.can_access_channel(name):
|
|
650
|
+
await _audit_channel_denied(engine, identity, name)
|
|
651
|
+
raise HTTPException(403, "not authorized for this connection")
|
|
652
|
+
|
|
653
|
+
@app.post("/connections/{name}/start")
|
|
654
|
+
async def start_connection(
|
|
655
|
+
name: str,
|
|
656
|
+
engine: Engine = Depends(_get_engine),
|
|
657
|
+
identity: Identity = Depends(require(Permission.CONNECTIONS_CONTROL)),
|
|
658
|
+
) -> dict[str, object]:
|
|
659
|
+
await _control_guard(engine, identity, name)
|
|
660
|
+
rr = _inbound(engine, name)
|
|
661
|
+
await rr.start_inbound(name)
|
|
662
|
+
return {"name": name, "running": rr.inbound_running(name)}
|
|
663
|
+
|
|
664
|
+
@app.post("/connections/{name}/stop")
|
|
665
|
+
async def stop_connection(
|
|
666
|
+
name: str,
|
|
667
|
+
engine: Engine = Depends(_get_engine),
|
|
668
|
+
identity: Identity = Depends(require(Permission.CONNECTIONS_CONTROL)),
|
|
669
|
+
) -> dict[str, object]:
|
|
670
|
+
await _control_guard(engine, identity, name)
|
|
671
|
+
rr = _inbound(engine, name)
|
|
672
|
+
await rr.stop_inbound(name)
|
|
673
|
+
return {"name": name, "running": rr.inbound_running(name)}
|
|
674
|
+
|
|
675
|
+
@app.post("/connections/{name}/restart")
|
|
676
|
+
async def restart_connection(
|
|
677
|
+
name: str,
|
|
678
|
+
engine: Engine = Depends(_get_engine),
|
|
679
|
+
identity: Identity = Depends(require(Permission.CONNECTIONS_CONTROL)),
|
|
680
|
+
) -> dict[str, object]:
|
|
681
|
+
await _control_guard(engine, identity, name)
|
|
682
|
+
rr = _inbound(engine, name)
|
|
683
|
+
await rr.restart_inbound(name)
|
|
684
|
+
return {"name": name, "running": rr.inbound_running(name)}
|
|
685
|
+
|
|
686
|
+
@app.get("/connections/{name}/metadata", response_model=ConnectionMetadata)
|
|
687
|
+
async def connection_metadata(
|
|
688
|
+
name: str,
|
|
689
|
+
engine: Engine = Depends(_get_engine),
|
|
690
|
+
identity: Identity = Depends(require(Permission.MONITORING_READ)),
|
|
691
|
+
) -> ConnectionMetadata:
|
|
692
|
+
"""Static metadata for one connection (operability Tier 4): operator labels + a secret-scrubbed
|
|
693
|
+
settings view. No live probe — see ``POST /connections/{name}/test``."""
|
|
694
|
+
rr = engine.registry_runner
|
|
695
|
+
if rr is None:
|
|
696
|
+
raise HTTPException(503, "engine not started")
|
|
697
|
+
ic = rr.registry.inbound.get(name)
|
|
698
|
+
if ic is not None:
|
|
699
|
+
await _control_guard(engine, identity, name) # inbound config is per-channel
|
|
700
|
+
return ConnectionMetadata(
|
|
701
|
+
name=name,
|
|
702
|
+
direction="in",
|
|
703
|
+
method=ic.spec.type.value,
|
|
704
|
+
running=rr.inbound_running(name),
|
|
705
|
+
router=ic.router,
|
|
706
|
+
metadata=dict(ic.metadata) if ic.metadata else None,
|
|
707
|
+
settings=redacted_settings(ic.spec.settings),
|
|
708
|
+
)
|
|
709
|
+
oc = rr.registry.outbound.get(name)
|
|
710
|
+
if oc is not None:
|
|
711
|
+
if identity.allowed_channels is not None:
|
|
712
|
+
# An outbound spans channels, so a channel-scoped user can't read a shared one — the
|
|
713
|
+
# same boundary /test and /purge enforce (don't disclose shared-outbound topology).
|
|
714
|
+
await _audit_channel_denied(engine, identity, name)
|
|
715
|
+
raise HTTPException(
|
|
716
|
+
403, "channel-scoped users cannot read a shared outbound connection"
|
|
717
|
+
)
|
|
718
|
+
return ConnectionMetadata(
|
|
719
|
+
name=name,
|
|
720
|
+
direction="out",
|
|
721
|
+
method=oc.spec.type.value,
|
|
722
|
+
running=rr.running,
|
|
723
|
+
metadata=dict(oc.metadata) if oc.metadata else None,
|
|
724
|
+
settings=redacted_settings(oc.spec.settings),
|
|
725
|
+
simulated=rr.outbound_simulated(name),
|
|
726
|
+
)
|
|
727
|
+
raise HTTPException(404, f"no such connection: {name}")
|
|
728
|
+
|
|
729
|
+
@app.post("/connections/{name}/test", response_model=ConnectionTestResult)
|
|
730
|
+
async def connection_test(
|
|
731
|
+
name: str,
|
|
732
|
+
engine: Engine = Depends(_get_engine),
|
|
733
|
+
identity: Identity = Depends(require(Permission.CONNECTIONS_TEST)),
|
|
734
|
+
) -> ConnectionTestResult:
|
|
735
|
+
"""Probe a connection's reachability (operability Tier 4) — builds a **fresh** connector
|
|
736
|
+
(never the live one), honors the ``[egress]`` allowlist, and sends NO real data. Audited."""
|
|
737
|
+
rr = engine.registry_runner
|
|
738
|
+
if rr is None:
|
|
739
|
+
raise HTTPException(503, "engine not started")
|
|
740
|
+
is_inbound = name in rr.registry.inbound
|
|
741
|
+
if not is_inbound and name not in rr.registry.outbound:
|
|
742
|
+
raise HTTPException(404, f"no such connection: {name}")
|
|
743
|
+
direction = "in" if is_inbound else "out"
|
|
744
|
+
if is_inbound:
|
|
745
|
+
await _control_guard(engine, identity, name) # inbound test is per-channel
|
|
746
|
+
elif identity.allowed_channels is not None:
|
|
747
|
+
# An outbound spans channels, so a channel-scoped user can't probe a shared one (like purge).
|
|
748
|
+
await _audit_channel_denied(engine, identity, name)
|
|
749
|
+
raise HTTPException(
|
|
750
|
+
403, "channel-scoped users cannot test a shared outbound connection"
|
|
751
|
+
)
|
|
752
|
+
|
|
753
|
+
result = await _run_connection_test(rr, name, direction)
|
|
754
|
+
await engine.store.record_audit(
|
|
755
|
+
"connection_test",
|
|
756
|
+
actor=identity.username,
|
|
757
|
+
channel_id=name if direction == "in" else None,
|
|
758
|
+
detail=json.dumps(
|
|
759
|
+
{
|
|
760
|
+
"connection": name,
|
|
761
|
+
"direction": direction,
|
|
762
|
+
"supported": result.supported,
|
|
763
|
+
"success": result.success,
|
|
764
|
+
"detail": result.detail,
|
|
765
|
+
}
|
|
766
|
+
),
|
|
767
|
+
)
|
|
768
|
+
return result
|
|
769
|
+
|
|
770
|
+
@app.post("/connections/{name}/purge", response_model=PurgeResult | PendingApprovalResponse)
|
|
771
|
+
async def purge_connection(
|
|
772
|
+
name: str,
|
|
773
|
+
response: Response,
|
|
774
|
+
engine: Engine = Depends(_get_engine),
|
|
775
|
+
scope: str = Query("all", pattern="^(top|all)$"),
|
|
776
|
+
identity: Identity = Depends(require_step_up(Permission.MESSAGES_PURGE)),
|
|
777
|
+
gate: ApprovalGate | None = Depends(_get_gate),
|
|
778
|
+
) -> PurgeResult | PendingApprovalResponse:
|
|
779
|
+
"""Soft-cancel queued deliveries to an outbound connection (across all inbounds)."""
|
|
780
|
+
# Purge targets an outbound and spans every inbound feeding it, so it can't be confined to a
|
|
781
|
+
# per-(inbound-)channel scope — a channel-scoped user may not purge a shared outbound.
|
|
782
|
+
if identity.allowed_channels is not None:
|
|
783
|
+
await _audit_channel_denied(engine, identity, name)
|
|
784
|
+
raise HTTPException(
|
|
785
|
+
403, "channel-scoped users cannot purge a shared outbound connection"
|
|
786
|
+
)
|
|
787
|
+
rr = engine.registry_runner
|
|
788
|
+
if rr is None or name not in rr.registry.outbound:
|
|
789
|
+
raise HTTPException(404, f"no such outbound connection: {name}")
|
|
790
|
+
if (
|
|
791
|
+
gate is not None
|
|
792
|
+
): # dual-control: hold for a second approver when [approvals] gates purge
|
|
793
|
+
pending = await gate.guard(
|
|
794
|
+
"connection_purge", {"name": name, "scope": scope}, requester=identity.username
|
|
795
|
+
)
|
|
796
|
+
if pending is not None:
|
|
797
|
+
response.status_code = 202
|
|
798
|
+
return PendingApprovalResponse(
|
|
799
|
+
approval_id=pending,
|
|
800
|
+
operation="connection_purge",
|
|
801
|
+
detail="held for a second approver (dual-control)",
|
|
802
|
+
)
|
|
803
|
+
cancelled = await engine.store.cancel_queued(None, name, top_only=(scope == "top"))
|
|
804
|
+
return PurgeResult(cancelled=cancelled)
|
|
805
|
+
|
|
806
|
+
# --- dead letters (verify + recover) -------------------------------------
|
|
807
|
+
|
|
808
|
+
@app.get("/dead-letters", response_model=DeadLetterList)
|
|
809
|
+
async def list_dead_letters(
|
|
810
|
+
request: Request,
|
|
811
|
+
engine: Engine = Depends(_get_engine),
|
|
812
|
+
identity: Identity = Depends(require_phi_read(Permission.MESSAGES_READ)),
|
|
813
|
+
channel_id: str | None = Query(None, max_length=256),
|
|
814
|
+
destination_name: str | None = Query(None, max_length=256),
|
|
815
|
+
limit: int = Query(50, ge=1, le=500),
|
|
816
|
+
offset: int = Query(0, ge=0),
|
|
817
|
+
) -> DeadLetterList:
|
|
818
|
+
"""Dead-lettered deliveries (newest first), optionally scoped to an inbound/outbound."""
|
|
819
|
+
allowed = _scope(
|
|
820
|
+
identity
|
|
821
|
+
) # per-channel RBAC: restrict to the caller's channels (None = all)
|
|
822
|
+
rows = await engine.store.list_dead(
|
|
823
|
+
channel_id=channel_id,
|
|
824
|
+
destination_name=destination_name,
|
|
825
|
+
limit=limit,
|
|
826
|
+
offset=offset,
|
|
827
|
+
allowed_channels=allowed,
|
|
828
|
+
)
|
|
829
|
+
total = await engine.store.count_dead(
|
|
830
|
+
channel_id=channel_id, destination_name=destination_name, allowed_channels=allowed
|
|
831
|
+
)
|
|
832
|
+
dead = [_dead_row(r) for r in rows]
|
|
833
|
+
# Same centralized per-property PHI gate as /messages (WP-9): messages:view_summary unlocks the
|
|
834
|
+
# patient-identifying `summary` and the delivery `last_error` (which can quote field values —
|
|
835
|
+
# review low-8); a caller without it gets them nulled. Exposure audited server-side (M-5).
|
|
836
|
+
dead = [redact_unauthorized(d, identity) for d in dead]
|
|
837
|
+
exposed = count_exposed(dead)
|
|
838
|
+
if exposed:
|
|
839
|
+
await request.app.state.summary_auditor.note(
|
|
840
|
+
engine.store, identity.username, channel_id, exposed, time.time()
|
|
841
|
+
)
|
|
842
|
+
return DeadLetterList(total=total, limit=limit, offset=offset, dead_letters=dead)
|
|
843
|
+
|
|
844
|
+
@app.post(
|
|
845
|
+
"/dead-letters/replay", response_model=DeadLetterReplayResult | PendingApprovalResponse
|
|
846
|
+
)
|
|
847
|
+
async def replay_dead_letters(
|
|
848
|
+
req: DeadLetterReplayRequest,
|
|
849
|
+
response: Response,
|
|
850
|
+
engine: Engine = Depends(_get_engine),
|
|
851
|
+
identity: Identity = Depends(require_step_up(Permission.MESSAGES_REPLAY)),
|
|
852
|
+
gate: ApprovalGate | None = Depends(_get_gate),
|
|
853
|
+
) -> DeadLetterReplayResult | PendingApprovalResponse:
|
|
854
|
+
"""Re-queue dead-lettered deliveries (optionally scoped). Already-delivered rows are left
|
|
855
|
+
alone; each affected message reverts from ``error`` to ``received`` and re-drains."""
|
|
856
|
+
# A channel-scoped user must target one of their channels (replay isn't channel-filtered at
|
|
857
|
+
# the engine level, so an unscoped "replay all" would cross channels).
|
|
858
|
+
if identity.allowed_channels is not None and not identity.can_access_channel(
|
|
859
|
+
req.channel_id
|
|
860
|
+
):
|
|
861
|
+
await _audit_channel_denied(engine, identity, req.channel_id)
|
|
862
|
+
raise HTTPException(403, "specify a channel within your scope to replay")
|
|
863
|
+
if (
|
|
864
|
+
gate is not None
|
|
865
|
+
): # dual-control: hold for a second approver when [approvals] gates replay
|
|
866
|
+
pending = await gate.guard(
|
|
867
|
+
"dead_letter_replay",
|
|
868
|
+
{"channel_id": req.channel_id, "destination_name": req.destination_name},
|
|
869
|
+
requester=identity.username,
|
|
870
|
+
)
|
|
871
|
+
if pending is not None:
|
|
872
|
+
response.status_code = 202
|
|
873
|
+
return PendingApprovalResponse(
|
|
874
|
+
approval_id=pending,
|
|
875
|
+
operation="dead_letter_replay",
|
|
876
|
+
detail="held for a second approver (dual-control)",
|
|
877
|
+
)
|
|
878
|
+
requeued = await engine.replay_dead(
|
|
879
|
+
channel_id=req.channel_id, destination_name=req.destination_name
|
|
880
|
+
)
|
|
881
|
+
if requeued: # only when PHI was actually re-transmitted (review M-4)
|
|
882
|
+
await engine.store.record_audit(
|
|
883
|
+
"dead_letter_replay",
|
|
884
|
+
actor=identity.username,
|
|
885
|
+
channel_id=req.channel_id,
|
|
886
|
+
detail=json.dumps({"destination_name": req.destination_name, "requeued": requeued}),
|
|
887
|
+
)
|
|
888
|
+
return DeadLetterReplayResult(requeued=requeued)
|
|
889
|
+
|
|
890
|
+
# --- dual-control approvals (ASVS 2.3.5) ---------------------------------
|
|
891
|
+
|
|
892
|
+
@app.get("/approvals", response_model=ApprovalList)
|
|
893
|
+
async def list_approvals(
|
|
894
|
+
_: Identity = Depends(require(Permission.APPROVALS_APPROVE)),
|
|
895
|
+
gate: ApprovalGate | None = Depends(_get_gate),
|
|
896
|
+
) -> ApprovalList:
|
|
897
|
+
"""Open (still-pending, unexpired) high-value actions awaiting a second approver."""
|
|
898
|
+
if gate is None:
|
|
899
|
+
raise HTTPException(503, "approval workflow is not available")
|
|
900
|
+
return ApprovalList(approvals=[PendingApprovalInfo(**a) for a in await gate.list_pending()])
|
|
901
|
+
|
|
902
|
+
@app.post("/approvals/{approval_id}/approve", response_model=ApprovalDecisionResult)
|
|
903
|
+
async def approve_action(
|
|
904
|
+
approval_id: str,
|
|
905
|
+
identity: Identity = Depends(require(Permission.APPROVALS_APPROVE)),
|
|
906
|
+
gate: ApprovalGate | None = Depends(_get_gate),
|
|
907
|
+
) -> ApprovalDecisionResult:
|
|
908
|
+
"""Release a pending action: re-executes the captured operation and audits both identities. A
|
|
909
|
+
requester can never approve their own request (dual-control, 2.3.5)."""
|
|
910
|
+
if gate is None:
|
|
911
|
+
raise HTTPException(503, "approval workflow is not available")
|
|
912
|
+
try:
|
|
913
|
+
outcome = await gate.approve(approval_id, approver=identity.username)
|
|
914
|
+
except ApprovalError as exc:
|
|
915
|
+
raise HTTPException(exc.status, exc.detail) from exc
|
|
916
|
+
return ApprovalDecisionResult(**outcome)
|
|
917
|
+
|
|
918
|
+
@app.post("/approvals/{approval_id}/reject", response_model=ApprovalDecisionResult)
|
|
919
|
+
async def reject_action(
|
|
920
|
+
approval_id: str,
|
|
921
|
+
identity: Identity = Depends(require(Permission.APPROVALS_APPROVE)),
|
|
922
|
+
gate: ApprovalGate | None = Depends(_get_gate),
|
|
923
|
+
) -> ApprovalDecisionResult:
|
|
924
|
+
"""Decline a pending action without executing it (audited)."""
|
|
925
|
+
if gate is None:
|
|
926
|
+
raise HTTPException(503, "approval workflow is not available")
|
|
927
|
+
try:
|
|
928
|
+
outcome = await gate.reject(approval_id, approver=identity.username)
|
|
929
|
+
except ApprovalError as exc:
|
|
930
|
+
raise HTTPException(exc.status, exc.detail) from exc
|
|
931
|
+
return ApprovalDecisionResult(**outcome)
|
|
932
|
+
|
|
933
|
+
# --- config promote / reload ---------------------------------------------
|
|
934
|
+
|
|
935
|
+
@app.post("/config/reload", response_model=ReloadResult)
|
|
936
|
+
async def reload_config(
|
|
937
|
+
req: ReloadRequest,
|
|
938
|
+
engine: Engine = Depends(_get_engine),
|
|
939
|
+
user: Identity = Depends(require_step_up(Permission.CONFIG_DEPLOY)),
|
|
940
|
+
) -> ReloadResult:
|
|
941
|
+
"""Load the code-first graph and atomically apply it to the running engine (quiesce-and-swap;
|
|
942
|
+
in-flight outbox deliveries keep draining). ``config_dir`` defaults to the server's startup
|
|
943
|
+
--config dir and must resolve within an allowed reload root — the loader executes Python, so
|
|
944
|
+
an arbitrary path is refused (403). A bad/empty config is rejected and the running graph is
|
|
945
|
+
left untouched. Every reload (and dry-run) is audited. Requires ``config:deploy``.
|
|
946
|
+
|
|
947
|
+
``dry_run=true`` is the promote pre-flight: it validates the graph against THIS environment's
|
|
948
|
+
values (a missing ``env()`` value → 422) and reports the would-be graph **without** swapping.
|
|
949
|
+
|
|
950
|
+
Error responses are intentionally generic (the detail is logged server-side, not returned)
|
|
951
|
+
so a config:deploy holder can't probe the filesystem via reload error text."""
|
|
952
|
+
try:
|
|
953
|
+
# propagate=True on the real apply so an operator reload on one node bumps the cluster-wide
|
|
954
|
+
# config version and every other node converges (Track B Step 6); a dry_run never propagates
|
|
955
|
+
# (it doesn't apply anything) and single-node ignores it (is_clustered() False).
|
|
956
|
+
registry = await engine.reload(
|
|
957
|
+
req.config_dir, dry_run=req.dry_run, propagate=not req.dry_run
|
|
958
|
+
)
|
|
959
|
+
except ConfigReloadDenied as exc:
|
|
960
|
+
await engine.store.record_audit(
|
|
961
|
+
"config_reload_denied",
|
|
962
|
+
actor=user.username,
|
|
963
|
+
detail=json.dumps({"requested": req.config_dir, "dry_run": req.dry_run}),
|
|
964
|
+
)
|
|
965
|
+
raise HTTPException(403, "config directory is not an allowed reload root") from exc
|
|
966
|
+
except FileNotFoundError as exc:
|
|
967
|
+
_log.warning("config reload failed (missing dir): %s", exc)
|
|
968
|
+
await engine.store.record_audit(
|
|
969
|
+
"config_reload_failed",
|
|
970
|
+
actor=user.username,
|
|
971
|
+
detail=json.dumps(
|
|
972
|
+
{"requested": req.config_dir, "dry_run": req.dry_run, "reason": "not_found"}
|
|
973
|
+
),
|
|
974
|
+
)
|
|
975
|
+
raise HTTPException(404, "config directory not found") from exc
|
|
976
|
+
except WiringError as exc:
|
|
977
|
+
_log.warning("config reload failed (invalid config): %s", exc)
|
|
978
|
+
await engine.store.record_audit(
|
|
979
|
+
"config_reload_failed",
|
|
980
|
+
actor=user.username,
|
|
981
|
+
detail=json.dumps(
|
|
982
|
+
{
|
|
983
|
+
"requested": req.config_dir,
|
|
984
|
+
"dry_run": req.dry_run,
|
|
985
|
+
"reason": "invalid_config",
|
|
986
|
+
}
|
|
987
|
+
),
|
|
988
|
+
)
|
|
989
|
+
raise HTTPException(422, "invalid configuration") from exc
|
|
990
|
+
await engine.store.record_audit(
|
|
991
|
+
"config_reload_check" if req.dry_run else "config_reload",
|
|
992
|
+
actor=user.username,
|
|
993
|
+
detail=json.dumps(
|
|
994
|
+
{
|
|
995
|
+
"dir": str(engine.last_reload_dir) if engine.last_reload_dir else None,
|
|
996
|
+
"inbound": len(registry.inbound),
|
|
997
|
+
"outbound": len(registry.outbound),
|
|
998
|
+
"dry_run": req.dry_run,
|
|
999
|
+
}
|
|
1000
|
+
),
|
|
1001
|
+
)
|
|
1002
|
+
rr = engine.registry_runner
|
|
1003
|
+
return ReloadResult(
|
|
1004
|
+
inbound=len(registry.inbound),
|
|
1005
|
+
outbound=len(registry.outbound),
|
|
1006
|
+
routers=len(registry.routers),
|
|
1007
|
+
handlers=len(registry.handlers),
|
|
1008
|
+
running=bool(rr and rr.running),
|
|
1009
|
+
dry_run=req.dry_run,
|
|
1010
|
+
)
|
|
1011
|
+
|
|
1012
|
+
# --- messages ------------------------------------------------------------
|
|
1013
|
+
|
|
1014
|
+
@app.get("/messages", response_model=MessageList)
|
|
1015
|
+
async def list_messages(
|
|
1016
|
+
request: Request,
|
|
1017
|
+
engine: Engine = Depends(_get_engine),
|
|
1018
|
+
identity: Identity = Depends(require_phi_read(Permission.MESSAGES_READ)),
|
|
1019
|
+
channel_id: str | None = Query(None, max_length=256),
|
|
1020
|
+
status: str | None = Query(None, max_length=64),
|
|
1021
|
+
message_type: str | None = Query(None, max_length=64),
|
|
1022
|
+
control_id: str | None = Query(None, max_length=256),
|
|
1023
|
+
limit: int = Query(50, ge=1, le=500),
|
|
1024
|
+
offset: int = Query(0, ge=0),
|
|
1025
|
+
) -> MessageList:
|
|
1026
|
+
filters = dict(
|
|
1027
|
+
channel_id=channel_id,
|
|
1028
|
+
status=status,
|
|
1029
|
+
message_type=message_type,
|
|
1030
|
+
control_id=control_id,
|
|
1031
|
+
)
|
|
1032
|
+
allowed = _scope(identity) # per-channel RBAC: only the caller's channels (None = all)
|
|
1033
|
+
rows = await engine.store.list_messages(
|
|
1034
|
+
limit=limit, offset=offset, allowed_channels=allowed, **filters
|
|
1035
|
+
)
|
|
1036
|
+
total = await engine.store.count_messages(allowed_channels=allowed, **filters)
|
|
1037
|
+
messages = [_summary(r) for r in rows]
|
|
1038
|
+
# Per-property PHI gate, centralized in api/field_authz (WP-9, ASVS 8.2.3): a caller without
|
|
1039
|
+
# messages:view_summary gets `summary` AND `error` (handler exception text can quote field
|
|
1040
|
+
# values — review low-8) nulled; the detail endpoint keeps them, gated instead by
|
|
1041
|
+
# messages:view_raw which already exposes the body.
|
|
1042
|
+
messages = [redact_unauthorized(m, identity) for m in messages]
|
|
1043
|
+
# Every patient-identifying value actually returned is audited SERVER-SIDE (coalesced per
|
|
1044
|
+
# actor/hour) — never gated on a client flag, so a scripted bulk fetch can't harvest the
|
|
1045
|
+
# patient census unaudited (review M-5). Counted post-redaction = exactly what's returned.
|
|
1046
|
+
exposed = count_exposed(messages)
|
|
1047
|
+
if exposed:
|
|
1048
|
+
await request.app.state.summary_auditor.note(
|
|
1049
|
+
engine.store, identity.username, channel_id, exposed, time.time()
|
|
1050
|
+
)
|
|
1051
|
+
return MessageList(total=total, limit=limit, offset=offset, messages=messages)
|
|
1052
|
+
|
|
1053
|
+
@app.get("/messages/{message_id}", response_model=MessageDetail)
|
|
1054
|
+
async def get_message(
|
|
1055
|
+
message_id: str,
|
|
1056
|
+
request: Request,
|
|
1057
|
+
engine: Engine = Depends(_get_engine),
|
|
1058
|
+
identity: Identity = Depends(require_phi_read(Permission.MESSAGES_VIEW_RAW)),
|
|
1059
|
+
) -> MessageDetail:
|
|
1060
|
+
row = await engine.store.get_message(message_id)
|
|
1061
|
+
# 404 (not 403) when the message is outside the caller's channel scope — don't reveal that a
|
|
1062
|
+
# message exists in another tenant's channel (per-channel RBAC).
|
|
1063
|
+
if row is None or not identity.can_access_channel(row["channel_id"]):
|
|
1064
|
+
if row is not None:
|
|
1065
|
+
await _audit_channel_denied(engine, identity, row["channel_id"])
|
|
1066
|
+
raise HTTPException(404, f"no such message: {message_id}")
|
|
1067
|
+
# Opening a body is PHI access — record it (with the viewer) before returning. record_view
|
|
1068
|
+
# gives the per-message timeline; record_audit puts it in the tamper-evident, GET /audit-visible
|
|
1069
|
+
# compliance chain (docs/PHI.md §6 names message_view as audited — review M-3).
|
|
1070
|
+
await engine.store.record_view(message_id, actor=identity.username)
|
|
1071
|
+
await engine.store.record_audit(
|
|
1072
|
+
"message_view",
|
|
1073
|
+
actor=identity.username,
|
|
1074
|
+
channel_id=row["channel_id"],
|
|
1075
|
+
detail=json.dumps({"message_id": message_id}),
|
|
1076
|
+
)
|
|
1077
|
+
outbox_rows = await engine.store.outbox_for(message_id)
|
|
1078
|
+
event_rows = await engine.store.events_for(message_id)
|
|
1079
|
+
detail = MessageDetail(
|
|
1080
|
+
**_summary(row).model_dump(),
|
|
1081
|
+
raw=row["raw"],
|
|
1082
|
+
outbox=[
|
|
1083
|
+
OutboxInfo(
|
|
1084
|
+
id=o["id"],
|
|
1085
|
+
destination_name=o["destination_name"],
|
|
1086
|
+
status=o["status"],
|
|
1087
|
+
attempts=o["attempts"],
|
|
1088
|
+
next_attempt_at=o["next_attempt_at"],
|
|
1089
|
+
last_error=o["last_error"],
|
|
1090
|
+
)
|
|
1091
|
+
for o in outbox_rows
|
|
1092
|
+
],
|
|
1093
|
+
events=[
|
|
1094
|
+
EventInfo(
|
|
1095
|
+
ts=e["ts"],
|
|
1096
|
+
event=e["event"],
|
|
1097
|
+
destination=e["destination"],
|
|
1098
|
+
detail=e["detail"],
|
|
1099
|
+
)
|
|
1100
|
+
for e in event_rows
|
|
1101
|
+
],
|
|
1102
|
+
)
|
|
1103
|
+
# Per-property PHI gate (#120): the patient `summary`, the exception `error`, every delivery
|
|
1104
|
+
# `last_error`, and every event `detail` gate on messages:view_summary. Redaction keys on the
|
|
1105
|
+
# EXACT type (no MRO walk), so the MessageDetail wrapper and each nested OutboxInfo/EventInfo are
|
|
1106
|
+
# redacted individually. The raw body stays on this route's view_raw gate. Exposure is audited
|
|
1107
|
+
# server-side, mirroring the list endpoints (count after redaction = what's actually returned).
|
|
1108
|
+
outbox = [redact_unauthorized(o, identity) for o in detail.outbox]
|
|
1109
|
+
events = [redact_unauthorized(e, identity) for e in detail.events]
|
|
1110
|
+
detail = redact_unauthorized(detail, identity).model_copy(
|
|
1111
|
+
update={"outbox": outbox, "events": events}
|
|
1112
|
+
)
|
|
1113
|
+
exposed = count_exposed([detail, *outbox, *events])
|
|
1114
|
+
if exposed:
|
|
1115
|
+
await request.app.state.summary_auditor.note(
|
|
1116
|
+
engine.store, identity.username, row["channel_id"], exposed, time.time()
|
|
1117
|
+
)
|
|
1118
|
+
return detail
|
|
1119
|
+
|
|
1120
|
+
@app.get("/messages/{message_id}/responses", response_model=MessageResponses)
|
|
1121
|
+
async def get_message_responses(
|
|
1122
|
+
message_id: str,
|
|
1123
|
+
engine: Engine = Depends(_get_engine),
|
|
1124
|
+
identity: Identity = Depends(require_phi_read(Permission.MESSAGES_READ)),
|
|
1125
|
+
) -> MessageResponses:
|
|
1126
|
+
"""The captured request/response replies for a message (ADR 0013). ``outcome``/``detail`` need
|
|
1127
|
+
the message-read permission; the PHI ``body`` is included only for a caller that also holds the
|
|
1128
|
+
raw-body permission (``MESSAGES_VIEW_RAW``). Every access is audited (``response.read``)."""
|
|
1129
|
+
row = await engine.store.get_message(message_id)
|
|
1130
|
+
# 404 (not 403) outside the caller's channel scope — don't reveal a message in another tenant's
|
|
1131
|
+
# channel (per-channel RBAC), mirroring get_message.
|
|
1132
|
+
if row is None or not identity.can_access_channel(row["channel_id"]):
|
|
1133
|
+
if row is not None:
|
|
1134
|
+
await _audit_channel_denied(engine, identity, row["channel_id"])
|
|
1135
|
+
raise HTTPException(404, f"no such message: {message_id}")
|
|
1136
|
+
captured = await engine.store.correlate_response(message_id)
|
|
1137
|
+
include_body = identity.has(Permission.MESSAGES_VIEW_RAW)
|
|
1138
|
+
# Reading captured replies is PHI access — audit it. If bodies are exposed, also record the
|
|
1139
|
+
# per-message PHI view timeline (record_view), exactly like opening a raw body.
|
|
1140
|
+
await engine.store.record_audit(
|
|
1141
|
+
"response.read",
|
|
1142
|
+
actor=identity.username,
|
|
1143
|
+
channel_id=row["channel_id"],
|
|
1144
|
+
detail=json.dumps(
|
|
1145
|
+
{"message_id": message_id, "count": len(captured), "body": include_body}
|
|
1146
|
+
),
|
|
1147
|
+
)
|
|
1148
|
+
if include_body and captured:
|
|
1149
|
+
await engine.store.record_view(message_id, actor=identity.username)
|
|
1150
|
+
# `detail` can embed a reply fragment (e.g. an unparseable-ACK note), so it gates on
|
|
1151
|
+
# messages:view_summary like every other disposition text (#120) — a bare messages:read caller
|
|
1152
|
+
# (Viewer) reaches this endpoint but gets `detail` nulled. The PHI `body` stays on view_raw above.
|
|
1153
|
+
return MessageResponses(
|
|
1154
|
+
message_id=message_id,
|
|
1155
|
+
responses=[
|
|
1156
|
+
redact_unauthorized(
|
|
1157
|
+
CapturedResponseInfo(
|
|
1158
|
+
destination_name=c.destination_name,
|
|
1159
|
+
response_seq=c.response_seq,
|
|
1160
|
+
outcome=c.outcome,
|
|
1161
|
+
detail=c.detail,
|
|
1162
|
+
captured_at=c.captured_at,
|
|
1163
|
+
body=c.body if include_body else None,
|
|
1164
|
+
),
|
|
1165
|
+
identity,
|
|
1166
|
+
)
|
|
1167
|
+
for c in captured
|
|
1168
|
+
],
|
|
1169
|
+
)
|
|
1170
|
+
|
|
1171
|
+
@app.get("/messages/{message_id}/outbound", response_model=OutboundPayloads)
|
|
1172
|
+
async def get_message_outbound(
|
|
1173
|
+
message_id: str,
|
|
1174
|
+
engine: Engine = Depends(_get_engine),
|
|
1175
|
+
identity: Identity = Depends(require_phi_read(Permission.MESSAGES_VIEW_RAW)),
|
|
1176
|
+
) -> OutboundPayloads:
|
|
1177
|
+
"""The **transformed outbound payloads** MEFOR routed for a message — one entry per
|
|
1178
|
+
destination (#14 parity tool). The PHI bodies are returned in full, so the route requires
|
|
1179
|
+
``MESSAGES_VIEW_RAW`` outright (unlike ``/responses``, where the body is conditional). Works on
|
|
1180
|
+
both simulate/shadow and live runs — the transformed payload is retained on the done outbound
|
|
1181
|
+
row in either mode. Every access is audited (``outbound.read`` + a per-message ``viewed``
|
|
1182
|
+
event when bodies are returned)."""
|
|
1183
|
+
row = await engine.store.get_message(message_id)
|
|
1184
|
+
# 404 (not 403) outside the caller's channel scope — don't reveal a message in another tenant's
|
|
1185
|
+
# channel (per-channel RBAC), mirroring get_message.
|
|
1186
|
+
if row is None or not identity.can_access_channel(row["channel_id"]):
|
|
1187
|
+
if row is not None:
|
|
1188
|
+
await _audit_channel_denied(engine, identity, row["channel_id"])
|
|
1189
|
+
raise HTTPException(404, f"no such message: {message_id}")
|
|
1190
|
+
payload_rows = await engine.store.outbox_payloads_for(message_id)
|
|
1191
|
+
# Returning transformed bodies is PHI access — audit the read, and (when bodies are actually
|
|
1192
|
+
# returned) record the per-message PHI view timeline, exactly like opening a raw body.
|
|
1193
|
+
await engine.store.record_audit(
|
|
1194
|
+
"outbound.read",
|
|
1195
|
+
actor=identity.username,
|
|
1196
|
+
channel_id=row["channel_id"],
|
|
1197
|
+
detail=json.dumps({"message_id": message_id, "count": len(payload_rows)}),
|
|
1198
|
+
)
|
|
1199
|
+
if payload_rows:
|
|
1200
|
+
await engine.store.record_view(message_id, actor=identity.username)
|
|
1201
|
+
return OutboundPayloads(
|
|
1202
|
+
message_id=message_id,
|
|
1203
|
+
payloads=[
|
|
1204
|
+
OutboundPayloadInfo(
|
|
1205
|
+
destination_name=o["destination_name"],
|
|
1206
|
+
status=o["status"],
|
|
1207
|
+
payload=o["payload"],
|
|
1208
|
+
)
|
|
1209
|
+
for o in payload_rows
|
|
1210
|
+
],
|
|
1211
|
+
)
|
|
1212
|
+
|
|
1213
|
+
@app.post("/messages/{message_id}/replay", response_model=ReplayResult)
|
|
1214
|
+
async def replay_message(
|
|
1215
|
+
message_id: str,
|
|
1216
|
+
engine: Engine = Depends(_get_engine),
|
|
1217
|
+
identity: Identity = Depends(require_step_up(Permission.MESSAGES_REPLAY)),
|
|
1218
|
+
) -> ReplayResult:
|
|
1219
|
+
row = await engine.store.get_message(message_id)
|
|
1220
|
+
if row is None or not identity.can_access_channel(row["channel_id"]):
|
|
1221
|
+
if row is not None:
|
|
1222
|
+
await _audit_channel_denied(engine, identity, row["channel_id"])
|
|
1223
|
+
raise HTTPException(404, f"no such message: {message_id}")
|
|
1224
|
+
requeued = await engine.replay(message_id)
|
|
1225
|
+
if requeued == 0:
|
|
1226
|
+
# The message exists (checked above) but has no re-queueable outbox rows — it errored,
|
|
1227
|
+
# was filtered, or routed nowhere. Replaying is a no-op there; say so rather than report
|
|
1228
|
+
# a misleading 200/requeued=0 (and the store leaves its disposition intact — review M-2).
|
|
1229
|
+
raise HTTPException(
|
|
1230
|
+
409,
|
|
1231
|
+
f"message {message_id} has no deliveries to replay "
|
|
1232
|
+
"(it errored, was filtered, or routed nowhere)",
|
|
1233
|
+
)
|
|
1234
|
+
# An actual re-transmission of PHI: record who did it in the tamper-evident chain (review M-4).
|
|
1235
|
+
await engine.store.record_audit(
|
|
1236
|
+
"message_replay",
|
|
1237
|
+
actor=identity.username,
|
|
1238
|
+
channel_id=row["channel_id"],
|
|
1239
|
+
detail=json.dumps({"message_id": message_id, "requeued": requeued}),
|
|
1240
|
+
)
|
|
1241
|
+
return ReplayResult(message_id=message_id, requeued=requeued)
|
|
1242
|
+
|
|
1243
|
+
# --- stats ---------------------------------------------------------------
|
|
1244
|
+
|
|
1245
|
+
@app.get("/stats", response_model=StatsResponse)
|
|
1246
|
+
async def stats(
|
|
1247
|
+
engine: Engine = Depends(_get_engine),
|
|
1248
|
+
_user: Identity = Depends(require(Permission.MONITORING_READ)),
|
|
1249
|
+
) -> StatsResponse:
|
|
1250
|
+
return StatsResponse(
|
|
1251
|
+
outbox_by_status=await engine.store.stats(),
|
|
1252
|
+
in_pipeline=await engine.store.in_pipeline_depth(),
|
|
1253
|
+
)
|
|
1254
|
+
|
|
1255
|
+
# --- engine + DB status --------------------------------------------------
|
|
1256
|
+
|
|
1257
|
+
@app.get("/status", response_model=SystemStatus)
|
|
1258
|
+
async def system_status(
|
|
1259
|
+
engine: Engine = Depends(_get_engine),
|
|
1260
|
+
_user: Identity = Depends(require(Permission.MONITORING_READ)),
|
|
1261
|
+
) -> SystemStatus:
|
|
1262
|
+
total = running = 0
|
|
1263
|
+
rr = engine.registry_runner
|
|
1264
|
+
if rr is not None: # one "channel" per inbound connection
|
|
1265
|
+
total = len(rr.registry.inbound)
|
|
1266
|
+
running = sum(1 for name in rr.registry.inbound if rr.inbound_running(name))
|
|
1267
|
+
db = await engine.store.db_status()
|
|
1268
|
+
return SystemStatus(
|
|
1269
|
+
engine=EngineInfo(
|
|
1270
|
+
version=__version__,
|
|
1271
|
+
uptime_seconds=max(0.0, time.time() - engine.started_at)
|
|
1272
|
+
if engine.started_at
|
|
1273
|
+
else 0.0,
|
|
1274
|
+
pid=os.getpid(),
|
|
1275
|
+
channels_total=total,
|
|
1276
|
+
channels_running=running,
|
|
1277
|
+
channels_stopped=total - running,
|
|
1278
|
+
outbox_by_status=await engine.store.stats(),
|
|
1279
|
+
),
|
|
1280
|
+
db=DbInfo(
|
|
1281
|
+
path=db.path,
|
|
1282
|
+
size_bytes=db.size_bytes,
|
|
1283
|
+
disk_free_bytes=db.disk_free_bytes,
|
|
1284
|
+
journal_mode=db.journal_mode,
|
|
1285
|
+
messages=db.messages,
|
|
1286
|
+
events=db.events,
|
|
1287
|
+
audit=db.audit,
|
|
1288
|
+
),
|
|
1289
|
+
)
|
|
1290
|
+
|
|
1291
|
+
# --- cluster observability (Track B Step 7) ------------------------------
|
|
1292
|
+
|
|
1293
|
+
@app.get("/cluster/status", response_model=ClusterStatus)
|
|
1294
|
+
async def cluster_status(
|
|
1295
|
+
engine: Engine = Depends(_get_engine),
|
|
1296
|
+
_user: Identity = Depends(require(Permission.MONITORING_READ)),
|
|
1297
|
+
) -> ClusterStatus:
|
|
1298
|
+
"""This node's cluster posture: id, whether it's clustered, whether it's the leader, its
|
|
1299
|
+
active-passive role, and the cached config version. All cheap in-memory coordinator gates — no DB
|
|
1300
|
+
round-trip. Single-node (NullCoordinator) reports clustered=false, is_leader=true,
|
|
1301
|
+
role="single-node", config_version=0."""
|
|
1302
|
+
c = engine.coordinator
|
|
1303
|
+
clustered = c.is_clustered()
|
|
1304
|
+
is_leader = c.is_leader()
|
|
1305
|
+
role = "single-node" if not clustered else ("primary" if is_leader else "standby")
|
|
1306
|
+
return ClusterStatus(
|
|
1307
|
+
node_id=c.node_id,
|
|
1308
|
+
clustered=clustered,
|
|
1309
|
+
is_leader=is_leader,
|
|
1310
|
+
role=role,
|
|
1311
|
+
config_version=c.config_version_cached(),
|
|
1312
|
+
)
|
|
1313
|
+
|
|
1314
|
+
@app.get("/cluster/nodes", response_model=ClusterNodeList)
|
|
1315
|
+
async def cluster_nodes(
|
|
1316
|
+
engine: Engine = Depends(_get_engine),
|
|
1317
|
+
_user: Identity = Depends(require(Permission.MONITORING_READ)),
|
|
1318
|
+
) -> ClusterNodeList:
|
|
1319
|
+
"""Cluster membership: one row per known node with liveness + derived leadership, plus the single
|
|
1320
|
+
leader's node_id and the authoritative leadership-lease state (owner + expiry). One-to-two DB
|
|
1321
|
+
reads on a real cluster (the shared ``nodes`` table + the ``leader_lease`` row); single-node
|
|
1322
|
+
synthesizes one self-entry with no DB."""
|
|
1323
|
+
c = engine.coordinator
|
|
1324
|
+
members = await c.cluster_members()
|
|
1325
|
+
nodes = [
|
|
1326
|
+
ClusterNode(
|
|
1327
|
+
node_id=m.node_id,
|
|
1328
|
+
host=m.host,
|
|
1329
|
+
pid=m.pid,
|
|
1330
|
+
status=m.status,
|
|
1331
|
+
started_at=m.started_at,
|
|
1332
|
+
last_seen=m.last_seen,
|
|
1333
|
+
is_leader=m.is_leader,
|
|
1334
|
+
)
|
|
1335
|
+
for m in members
|
|
1336
|
+
]
|
|
1337
|
+
leader = next((n.node_id for n in nodes if n.is_leader), None)
|
|
1338
|
+
lease_owner, lease_expires_at = await c.leadership_lease()
|
|
1339
|
+
return ClusterNodeList(
|
|
1340
|
+
nodes=nodes,
|
|
1341
|
+
leader_node_id=leader,
|
|
1342
|
+
lease_owner=lease_owner,
|
|
1343
|
+
lease_expires_at=lease_expires_at,
|
|
1344
|
+
)
|
|
1345
|
+
|
|
1346
|
+
@app.post("/status/integrity-check", response_model=IntegrityResult)
|
|
1347
|
+
async def integrity_check(
|
|
1348
|
+
engine: Engine = Depends(_get_engine),
|
|
1349
|
+
_user: Identity = Depends(require(Permission.MONITORING_DIAGNOSE)),
|
|
1350
|
+
) -> IntegrityResult:
|
|
1351
|
+
"""Run a database integrity check on demand (PRAGMA quick_check)."""
|
|
1352
|
+
ok, detail = await engine.store.integrity_check()
|
|
1353
|
+
return IntegrityResult(ok=ok, detail=detail)
|
|
1354
|
+
|
|
1355
|
+
@app.websocket("/ws/stats")
|
|
1356
|
+
async def ws_stats(websocket: WebSocket) -> None:
|
|
1357
|
+
"""Push queue-depth stats to the console roughly once a second until it disconnects — the
|
|
1358
|
+
live monitor feed. The session is re-validated periodically so a revoked/expired/downgraded
|
|
1359
|
+
token can't keep streaming forever, and concurrent sockets are capped (API-WS)."""
|
|
1360
|
+
identity = await authorize_ws(websocket, Permission.MONITORING_READ)
|
|
1361
|
+
if identity is None:
|
|
1362
|
+
await websocket.close(code=1008) # policy violation (unauthenticated/forbidden)
|
|
1363
|
+
return
|
|
1364
|
+
engine_obj: Engine | None = getattr(websocket.app.state, "engine", None)
|
|
1365
|
+
if engine_obj is None:
|
|
1366
|
+
await websocket.close(code=1011)
|
|
1367
|
+
return
|
|
1368
|
+
state = websocket.app.state
|
|
1369
|
+
if getattr(state, "ws_count", 0) >= _MAX_WS_CONNECTIONS:
|
|
1370
|
+
await websocket.close(code=1013) # try again later — too many live monitor sockets
|
|
1371
|
+
return
|
|
1372
|
+
auth = getattr(state, "auth", None)
|
|
1373
|
+
token = ws_token(websocket)
|
|
1374
|
+
await websocket.accept()
|
|
1375
|
+
state.ws_count = getattr(state, "ws_count", 0) + 1
|
|
1376
|
+
elapsed = 0.0
|
|
1377
|
+
try:
|
|
1378
|
+
while True:
|
|
1379
|
+
await websocket.send_json({"outbox_by_status": await engine_obj.store.stats()})
|
|
1380
|
+
await asyncio.sleep(1.0)
|
|
1381
|
+
elapsed += 1.0
|
|
1382
|
+
if auth is not None and auth.enabled and elapsed >= _WS_REVALIDATE_SECONDS:
|
|
1383
|
+
elapsed = 0.0
|
|
1384
|
+
# activity=False: this keepalive must not reset the session's idle clock.
|
|
1385
|
+
current = await auth.identity_for_token(token, activity=False)
|
|
1386
|
+
if current is None or not current.has(Permission.MONITORING_READ):
|
|
1387
|
+
await websocket.close(code=1008)
|
|
1388
|
+
return
|
|
1389
|
+
except WebSocketDisconnect:
|
|
1390
|
+
return
|
|
1391
|
+
finally:
|
|
1392
|
+
state.ws_count = max(0, getattr(state, "ws_count", 1) - 1)
|
|
1393
|
+
|
|
1394
|
+
return app
|
|
1395
|
+
|
|
1396
|
+
|
|
1397
|
+
def _emit_bootstrap_admin(bootstrap: BootstrapAdmin, store_settings: StoreSettings) -> None:
|
|
1398
|
+
"""Persist the one-time bootstrap password to a restricted file — never the rotating log.
|
|
1399
|
+
|
|
1400
|
+
Until rotated it is a standing Administrator credential, so it must not land in NSSM's broadly
|
|
1401
|
+
readable stdout capture. Write it to an owner-only file the operator consumes and deletes; log
|
|
1402
|
+
only the location. Paired with server-side must_change_password enforcement, it dies at first login.
|
|
1403
|
+
"""
|
|
1404
|
+
base = Path(store_settings.path or ".").resolve()
|
|
1405
|
+
secret_file = base.parent / "bootstrap-admin.txt"
|
|
1406
|
+
secret_file.write_text(
|
|
1407
|
+
f"username: {bootstrap.username}\npassword: {bootstrap.password}\n", encoding="utf-8"
|
|
1408
|
+
)
|
|
1409
|
+
# Reuse the store's platform-correct primitive: os.chmod(0o600) is a no-op on Windows (the NSSM
|
|
1410
|
+
# deployment target), so _secure_file sets an owner-only DACL via icacls there, chmod on POSIX.
|
|
1411
|
+
_secure_file(secret_file)
|
|
1412
|
+
_log.warning(
|
|
1413
|
+
"Created bootstrap admin %r; one-time password written to %s — sign in, change it, then "
|
|
1414
|
+
"delete that file.",
|
|
1415
|
+
bootstrap.username,
|
|
1416
|
+
secret_file,
|
|
1417
|
+
)
|
|
1418
|
+
|
|
1419
|
+
|
|
1420
|
+
_SESSION_REAP_INTERVAL = 3600.0 # purge expired/idle sessions hourly to bound the sessions table
|
|
1421
|
+
|
|
1422
|
+
|
|
1423
|
+
async def _session_reaper(store: Store) -> None:
|
|
1424
|
+
"""Drop expired session rows (immediately, then on an interval) until the task is cancelled.
|
|
1425
|
+
|
|
1426
|
+
A transient store error must not kill the reaper for the process lifetime (it would let the
|
|
1427
|
+
sessions table grow unbounded, and its stored exception could later abort lifespan shutdown) —
|
|
1428
|
+
log and retry next interval (review M-33)."""
|
|
1429
|
+
while True:
|
|
1430
|
+
try:
|
|
1431
|
+
await store.purge_expired_sessions()
|
|
1432
|
+
except asyncio.CancelledError:
|
|
1433
|
+
raise
|
|
1434
|
+
except Exception:
|
|
1435
|
+
_log.exception("session reaper: purge failed; will retry next interval")
|
|
1436
|
+
await asyncio.sleep(_SESSION_REAP_INTERVAL)
|
|
1437
|
+
|
|
1438
|
+
|
|
1439
|
+
def create_managed_app(
|
|
1440
|
+
*,
|
|
1441
|
+
db_path: str | Path | None = None,
|
|
1442
|
+
store_settings: StoreSettings | None = None,
|
|
1443
|
+
config_dir: str | Path | None = None,
|
|
1444
|
+
config_reload_roots: Sequence[str] = (),
|
|
1445
|
+
poll_interval: float = 0.25,
|
|
1446
|
+
synchronous: str = "NORMAL",
|
|
1447
|
+
inbound_bind_host: str = "127.0.0.1",
|
|
1448
|
+
allow_insecure_bind: bool = False,
|
|
1449
|
+
delivery_defaults: RetryPolicy | None = None,
|
|
1450
|
+
ordering_default: OrderingMode | None = None,
|
|
1451
|
+
internal_error_default: InternalErrorPolicy | None = None,
|
|
1452
|
+
buildup_default: BuildupThreshold | None = None,
|
|
1453
|
+
ack_after_default: AckAfter | None = None,
|
|
1454
|
+
max_correlation_depth: int = 8,
|
|
1455
|
+
env_values: Mapping[str, Any] | None = None,
|
|
1456
|
+
env_values_provider: Callable[[], Mapping[str, Any]] | None = None,
|
|
1457
|
+
auth_settings: AuthSettings | None = None,
|
|
1458
|
+
ai_settings: AiSettings | None = None,
|
|
1459
|
+
alerts_settings: AlertsSettings | None = None,
|
|
1460
|
+
retention_settings: RetentionSettings | None = None,
|
|
1461
|
+
cert_monitor_settings: CertMonitorSettings | None = None,
|
|
1462
|
+
api_tls_cert_file: str | None = None,
|
|
1463
|
+
reference_settings: ReferenceSettings | None = None,
|
|
1464
|
+
egress_settings: EgressSettings | None = None,
|
|
1465
|
+
shadow_settings: ShadowSettings | None = None,
|
|
1466
|
+
cluster_settings: ClusterSettings | None = None,
|
|
1467
|
+
approvals_settings: ApprovalsSettings | None = None,
|
|
1468
|
+
expose_docs: bool = False,
|
|
1469
|
+
ws_allowed_origins: Sequence[str] = (),
|
|
1470
|
+
) -> FastAPI:
|
|
1471
|
+
"""Build an app that owns its engine for its whole lifespan (CLI server / sync tests).
|
|
1472
|
+
|
|
1473
|
+
Pass ``store_settings`` for full backend selection (the service path), or ``db_path`` (+optional
|
|
1474
|
+
``synchronous``) as a SQLite shortcut. ``config_dir`` loads the code-first Connection/Router/
|
|
1475
|
+
Handler graph. ``auth_settings`` (when enabled) attaches an :class:`AuthService`, seeds the
|
|
1476
|
+
built-in roles, and creates a bootstrap admin on first run. The store is opened via the
|
|
1477
|
+
backend-agnostic :func:`~messagefoundry.store.open_store`.
|
|
1478
|
+
"""
|
|
1479
|
+
if store_settings is None:
|
|
1480
|
+
if db_path is None:
|
|
1481
|
+
raise ValueError("create_managed_app requires either store_settings or db_path")
|
|
1482
|
+
store_settings = sqlite_settings(db_path, synchronous=synchronous)
|
|
1483
|
+
resolved = store_settings
|
|
1484
|
+
|
|
1485
|
+
@asynccontextmanager
|
|
1486
|
+
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
|
|
1487
|
+
# Process-level last-resort: route any otherwise-unhandled asyncio task/callback exception
|
|
1488
|
+
# through safe_exc → the log, so it can't escape as a raw traceback (possible PHI) or die
|
|
1489
|
+
# silently (ASVS 16.5.4). Here because set_exception_handler needs the running loop.
|
|
1490
|
+
install_loop_exception_handler()
|
|
1491
|
+
store = await open_store(resolved)
|
|
1492
|
+
# Operational alert notifier (webhook/email). None when no transport is configured → the
|
|
1493
|
+
# engine falls back to the logging sink. Its background dispatch task is owned by this
|
|
1494
|
+
# lifespan: started here, drained + stopped after the engine in the finally below.
|
|
1495
|
+
notifier = notifier_from_settings(alerts_settings) if alerts_settings is not None else None
|
|
1496
|
+
if notifier is not None:
|
|
1497
|
+
notifier.start()
|
|
1498
|
+
# Cluster coordinator (Track B Step 3) — built from the opened store so a Postgres-backed
|
|
1499
|
+
# store can reach its pool. Returns the no-op NullCoordinator unless [cluster].enabled on a
|
|
1500
|
+
# Postgres store, so single-node is byte-identical. The Engine owns its lifecycle (start/stop
|
|
1501
|
+
# in engine.start()/stop()), so the lifespan only constructs + passes it here.
|
|
1502
|
+
coordinator = build_coordinator(store, cluster_settings)
|
|
1503
|
+
engine = Engine(
|
|
1504
|
+
store,
|
|
1505
|
+
poll_interval=poll_interval,
|
|
1506
|
+
max_correlation_depth=max_correlation_depth,
|
|
1507
|
+
config_dir=config_dir,
|
|
1508
|
+
config_reload_roots=config_reload_roots,
|
|
1509
|
+
inbound_bind_host=inbound_bind_host,
|
|
1510
|
+
allow_insecure_bind=allow_insecure_bind,
|
|
1511
|
+
delivery_defaults=delivery_defaults,
|
|
1512
|
+
ordering_default=ordering_default,
|
|
1513
|
+
internal_error_default=internal_error_default,
|
|
1514
|
+
buildup_default=buildup_default,
|
|
1515
|
+
ack_after_default=ack_after_default,
|
|
1516
|
+
alert_sink=notifier,
|
|
1517
|
+
retention_settings=retention_settings,
|
|
1518
|
+
cert_monitor_settings=cert_monitor_settings,
|
|
1519
|
+
api_tls_cert_file=api_tls_cert_file,
|
|
1520
|
+
reference_settings=reference_settings,
|
|
1521
|
+
egress_settings=egress_settings,
|
|
1522
|
+
shadow_settings=shadow_settings,
|
|
1523
|
+
active_environment=ai_settings.environment if ai_settings else None,
|
|
1524
|
+
env_values=env_values,
|
|
1525
|
+
env_values_provider=env_values_provider,
|
|
1526
|
+
coordinator=coordinator,
|
|
1527
|
+
cluster_settings=cluster_settings,
|
|
1528
|
+
)
|
|
1529
|
+
if config_dir is not None:
|
|
1530
|
+
engine.add_registry(load_config(config_dir))
|
|
1531
|
+
await engine.start()
|
|
1532
|
+
app.state.engine = engine
|
|
1533
|
+
app.state.approval_gate = _build_approval_gate(
|
|
1534
|
+
engine, approvals_settings or ApprovalsSettings()
|
|
1535
|
+
)
|
|
1536
|
+
reaper: asyncio.Task[None] | None = None
|
|
1537
|
+
security_notifier = None
|
|
1538
|
+
if auth_settings is not None and auth_settings.enabled:
|
|
1539
|
+
# Out-of-band security-event email (ASVS 6.3.5/6.3.7) — reuses the [alerts] SMTP transport,
|
|
1540
|
+
# sent to each affected user's own address. None when disabled or no SMTP configured; the
|
|
1541
|
+
# /me/security-events feed still records events. Its background task is owned by this
|
|
1542
|
+
# lifespan (started here, drained + closed after the engine in the finally below).
|
|
1543
|
+
if auth_settings.notify_security_events and alerts_settings is not None:
|
|
1544
|
+
security_notifier = security_notifier_from_settings(alerts_settings)
|
|
1545
|
+
if security_notifier is not None:
|
|
1546
|
+
security_notifier.start()
|
|
1547
|
+
auth = AuthService(store, auth_settings, security_notifier=security_notifier)
|
|
1548
|
+
bootstrap = await auth.initialize()
|
|
1549
|
+
app.state.auth = auth
|
|
1550
|
+
if bootstrap is not None:
|
|
1551
|
+
_emit_bootstrap_admin(bootstrap, resolved)
|
|
1552
|
+
reaper = asyncio.create_task(_session_reaper(store))
|
|
1553
|
+
try:
|
|
1554
|
+
yield
|
|
1555
|
+
finally:
|
|
1556
|
+
if reaper is not None:
|
|
1557
|
+
reaper.cancel()
|
|
1558
|
+
# gather(return_exceptions): absorbs both our cancellation AND any exception a
|
|
1559
|
+
# previously-died reaper stored, so it can't propagate here and skip engine.stop()
|
|
1560
|
+
# (review M-33).
|
|
1561
|
+
await asyncio.gather(reaper, return_exceptions=True)
|
|
1562
|
+
await engine.stop()
|
|
1563
|
+
if security_notifier is not None:
|
|
1564
|
+
await (
|
|
1565
|
+
security_notifier.aclose()
|
|
1566
|
+
) # drain queued user emails, bounded by SMTP timeout
|
|
1567
|
+
if notifier is not None:
|
|
1568
|
+
# Stop accepting alerts last (after the engine quiesces) so any final
|
|
1569
|
+
# connection_stopped/queue_buildup still drains; bounded by the transport timeouts.
|
|
1570
|
+
await notifier.aclose()
|
|
1571
|
+
|
|
1572
|
+
# Auth disabled (or unset) → explicitly run open (dev/loopback; __main__ refuses a non-loopback
|
|
1573
|
+
# serve when auth is off). Auth enabled → fail-closed until the lifespan attaches the service.
|
|
1574
|
+
allow_no_auth = auth_settings is None or not auth_settings.enabled
|
|
1575
|
+
return create_app(
|
|
1576
|
+
lifespan=lifespan,
|
|
1577
|
+
ai_settings=ai_settings,
|
|
1578
|
+
expose_docs=expose_docs,
|
|
1579
|
+
allow_no_auth=allow_no_auth,
|
|
1580
|
+
ws_allowed_origins=ws_allowed_origins,
|
|
1581
|
+
)
|