messagefoundry 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. messagefoundry/__init__.py +108 -0
  2. messagefoundry/__main__.py +1155 -0
  3. messagefoundry/api/__init__.py +27 -0
  4. messagefoundry/api/app.py +1581 -0
  5. messagefoundry/api/approvals.py +184 -0
  6. messagefoundry/api/auth_models.py +211 -0
  7. messagefoundry/api/auth_routes.py +655 -0
  8. messagefoundry/api/field_authz.py +96 -0
  9. messagefoundry/api/models.py +374 -0
  10. messagefoundry/api/security.py +247 -0
  11. messagefoundry/api/tls.py +47 -0
  12. messagefoundry/auth/__init__.py +39 -0
  13. messagefoundry/auth/data/common_passwords.NOTICE +13 -0
  14. messagefoundry/auth/data/common_passwords.txt +10000 -0
  15. messagefoundry/auth/identity.py +71 -0
  16. messagefoundry/auth/ldap.py +264 -0
  17. messagefoundry/auth/notifications.py +68 -0
  18. messagefoundry/auth/passwords.py +53 -0
  19. messagefoundry/auth/permissions.py +120 -0
  20. messagefoundry/auth/policy.py +153 -0
  21. messagefoundry/auth/ratelimit.py +55 -0
  22. messagefoundry/auth/service.py +1323 -0
  23. messagefoundry/auth/tokens.py +26 -0
  24. messagefoundry/auth/totp.py +174 -0
  25. messagefoundry/checks.py +174 -0
  26. messagefoundry/config/__init__.py +30 -0
  27. messagefoundry/config/active_environment.py +80 -0
  28. messagefoundry/config/ai_policy.py +140 -0
  29. messagefoundry/config/code_sets.py +260 -0
  30. messagefoundry/config/connections_edit.py +200 -0
  31. messagefoundry/config/connections_file.py +287 -0
  32. messagefoundry/config/db_lookup.py +117 -0
  33. messagefoundry/config/environments.py +116 -0
  34. messagefoundry/config/ingest_time.py +83 -0
  35. messagefoundry/config/models.py +240 -0
  36. messagefoundry/config/reference.py +158 -0
  37. messagefoundry/config/response.py +83 -0
  38. messagefoundry/config/run_context.py +153 -0
  39. messagefoundry/config/settings.py +1311 -0
  40. messagefoundry/config/state.py +99 -0
  41. messagefoundry/config/tls_policy.py +110 -0
  42. messagefoundry/config/wiring.py +1918 -0
  43. messagefoundry/console/__init__.py +20 -0
  44. messagefoundry/console/__main__.py +274 -0
  45. messagefoundry/console/_async.py +107 -0
  46. messagefoundry/console/change_password.py +111 -0
  47. messagefoundry/console/client.py +552 -0
  48. messagefoundry/console/connections.py +324 -0
  49. messagefoundry/console/login.py +107 -0
  50. messagefoundry/console/mfa.py +205 -0
  51. messagefoundry/console/reauth.py +94 -0
  52. messagefoundry/console/search.py +57 -0
  53. messagefoundry/console/service_control.py +137 -0
  54. messagefoundry/console/sessions.py +122 -0
  55. messagefoundry/console/shell.py +410 -0
  56. messagefoundry/console/status.py +377 -0
  57. messagefoundry/console/users_page.py +282 -0
  58. messagefoundry/console/widgets.py +553 -0
  59. messagefoundry/generators/README.md +27 -0
  60. messagefoundry/generators/__init__.py +15 -0
  61. messagefoundry/generators/_core.py +589 -0
  62. messagefoundry/generators/_hl7data.py +428 -0
  63. messagefoundry/generators/adt.py +286 -0
  64. messagefoundry/generators/all_types.py +24 -0
  65. messagefoundry/generators/bar.py +28 -0
  66. messagefoundry/generators/dft.py +20 -0
  67. messagefoundry/generators/mdm.py +39 -0
  68. messagefoundry/generators/mfn.py +46 -0
  69. messagefoundry/generators/oml.py +32 -0
  70. messagefoundry/generators/orl.py +30 -0
  71. messagefoundry/generators/orm.py +23 -0
  72. messagefoundry/generators/oru.py +21 -0
  73. messagefoundry/generators/ras.py +20 -0
  74. messagefoundry/generators/rde.py +54 -0
  75. messagefoundry/generators/siu.py +64 -0
  76. messagefoundry/generators/vxu.py +20 -0
  77. messagefoundry/hl7schema.py +75 -0
  78. messagefoundry/last_resort.py +55 -0
  79. messagefoundry/logging_setup.py +332 -0
  80. messagefoundry/parsing/__init__.py +64 -0
  81. messagefoundry/parsing/consistency.py +166 -0
  82. messagefoundry/parsing/groups.py +228 -0
  83. messagefoundry/parsing/message.py +453 -0
  84. messagefoundry/parsing/peek.py +237 -0
  85. messagefoundry/parsing/split.py +120 -0
  86. messagefoundry/parsing/summary.py +46 -0
  87. messagefoundry/parsing/tree.py +128 -0
  88. messagefoundry/parsing/validate.py +95 -0
  89. messagefoundry/parsing/x12/__init__.py +46 -0
  90. messagefoundry/parsing/x12/delimiters.py +140 -0
  91. messagefoundry/parsing/x12/errors.py +30 -0
  92. messagefoundry/parsing/x12/interchange.py +232 -0
  93. messagefoundry/parsing/x12/message.py +200 -0
  94. messagefoundry/parsing/x12/peek.py +207 -0
  95. messagefoundry/pipeline/__init__.py +21 -0
  96. messagefoundry/pipeline/alert_sinks.py +486 -0
  97. messagefoundry/pipeline/alerts.py +100 -0
  98. messagefoundry/pipeline/cert_expiry.py +219 -0
  99. messagefoundry/pipeline/cluster.py +955 -0
  100. messagefoundry/pipeline/cluster_sqlserver.py +444 -0
  101. messagefoundry/pipeline/config_convergence.py +137 -0
  102. messagefoundry/pipeline/dryrun.py +450 -0
  103. messagefoundry/pipeline/engine.py +756 -0
  104. messagefoundry/pipeline/leader_tasks.py +158 -0
  105. messagefoundry/pipeline/reference_sync.py +369 -0
  106. messagefoundry/pipeline/retention.py +289 -0
  107. messagefoundry/pipeline/security_notify.py +168 -0
  108. messagefoundry/pipeline/state_convergence.py +143 -0
  109. messagefoundry/pipeline/wiring_runner.py +1722 -0
  110. messagefoundry/py.typed +0 -0
  111. messagefoundry/redaction.py +71 -0
  112. messagefoundry/scaffold.py +321 -0
  113. messagefoundry/secrets_dpapi.py +129 -0
  114. messagefoundry/store/__init__.py +46 -0
  115. messagefoundry/store/audit_tee.py +67 -0
  116. messagefoundry/store/base.py +758 -0
  117. messagefoundry/store/crypto.py +166 -0
  118. messagefoundry/store/keyprovider.py +192 -0
  119. messagefoundry/store/postgres.py +3447 -0
  120. messagefoundry/store/sqlserver.py +3014 -0
  121. messagefoundry/store/store.py +3790 -0
  122. messagefoundry/timezone.py +207 -0
  123. messagefoundry/transports/__init__.py +50 -0
  124. messagefoundry/transports/base.py +269 -0
  125. messagefoundry/transports/database.py +693 -0
  126. messagefoundry/transports/file.py +551 -0
  127. messagefoundry/transports/framing.py +164 -0
  128. messagefoundry/transports/loopback.py +53 -0
  129. messagefoundry/transports/mllp.py +644 -0
  130. messagefoundry/transports/remotefile.py +664 -0
  131. messagefoundry/transports/rest.py +281 -0
  132. messagefoundry/transports/signing.py +321 -0
  133. messagefoundry/transports/soap.py +507 -0
  134. messagefoundry/transports/tcp.py +307 -0
  135. messagefoundry/transports/timer.py +146 -0
  136. messagefoundry/transports/x12.py +323 -0
  137. messagefoundry-0.1.0.dist-info/METADATA +212 -0
  138. messagefoundry-0.1.0.dist-info/RECORD +142 -0
  139. messagefoundry-0.1.0.dist-info/WHEEL +4 -0
  140. messagefoundry-0.1.0.dist-info/entry_points.txt +2 -0
  141. messagefoundry-0.1.0.dist-info/licenses/LICENSE +662 -0
  142. messagefoundry-0.1.0.dist-info/licenses/NOTICE +27 -0
@@ -0,0 +1,237 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2026 MessageFoundry Organization and contributors
3
+ """Tolerant HL7 v2 *peek* — fast field extraction for routing/filtering.
4
+
5
+ This is the hot path: every inbound message is peeked to pull the handful of MSH
6
+ fields the engine routes on (message type, trigger event, control id, version) and to
7
+ let channel/destination filters test arbitrary fields by path (e.g. ``MSH-9.1``).
8
+
9
+ It is built on ``python-hl7``, which parses tolerantly — real-world feeds are routinely
10
+ non-conformant and must still route. We never raise on a *structurally* odd-but-parseable
11
+ message; we only raise :class:`HL7PeekError` when the bytes are not an HL7 message at all
12
+ (no MSH) or a field *path* is malformed.
13
+
14
+ HL7 uses a carriage return (``\\r``) between segments. Inbound bytes arrive with all
15
+ manner of line endings (MLLP strips its own framing; files may be ``\\n`` or ``\\r\\n``),
16
+ so :func:`normalize` collapses them to ``\\r`` before parsing.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import re
22
+ from dataclasses import dataclass
23
+
24
+ import hl7
25
+
26
+ __all__ = [
27
+ "Peek",
28
+ "HL7PeekError",
29
+ "normalize",
30
+ "parse_path",
31
+ "DEFAULT_MAX_MESSAGE_BYTES",
32
+ "DEFAULT_MAX_SEGMENTS",
33
+ "enforce_size_limits",
34
+ ]
35
+
36
+ # Pre-parse resource caps (DoS guards). A complete-but-pathological message — multi-MiB, or
37
+ # tens of thousands of segments — would otherwise be parsed/walked whole (python-hl7 here,
38
+ # hl7apy on the strict path), multiplying memory and CPU. Checked *before* parsing so an
39
+ # oversized message is rejected cheaply. ``None`` disables a cap.
40
+ DEFAULT_MAX_MESSAGE_BYTES = 16 * 1024 * 1024 # 16 MiB — matches the MLLP/file ingress caps
41
+ DEFAULT_MAX_SEGMENTS = 10_000 # generous for big batches/ORUs, bounds segment-count blow-up
42
+
43
+
44
+ class HL7PeekError(ValueError):
45
+ """Raised when bytes are not a parseable HL7 message, or a field path is malformed."""
46
+
47
+
48
+ def enforce_size_limits(
49
+ norm: str,
50
+ *,
51
+ max_bytes: int | None = DEFAULT_MAX_MESSAGE_BYTES,
52
+ max_segments: int | None = DEFAULT_MAX_SEGMENTS,
53
+ ) -> None:
54
+ """Raise :class:`HL7PeekError` if the normalized message exceeds the size/segment caps.
55
+
56
+ Operates on the ``\\r``-normalized text so it covers every ingress (MLLP, file). Shared
57
+ by :meth:`Peek.parse`, :func:`~messagefoundry.parsing.validate.validate` and
58
+ :func:`~messagefoundry.parsing.tree.parse_tree`."""
59
+ if max_bytes is not None and len(norm) > max_bytes:
60
+ raise HL7PeekError(f"message exceeds max size ({len(norm)} > {max_bytes} bytes)")
61
+ if max_segments is not None:
62
+ segment_count = norm.count("\r") + 1
63
+ if segment_count > max_segments:
64
+ raise HL7PeekError(f"message exceeds max segments ({segment_count} > {max_segments})")
65
+
66
+
67
+ # SEG-F[.C[.S]] — segment id, field, optional component, optional subcomponent.
68
+ # Repetition defaults to the first; segment to its first occurrence (Phase 1).
69
+ _PATH_RE = re.compile(
70
+ r"^(?P<seg>[A-Z][A-Z0-9]{2})-(?P<field>\d+)"
71
+ r"(?:\.(?P<comp>\d+)(?:\.(?P<sub>\d+))?)?$"
72
+ )
73
+
74
+
75
+ def parse_path(path: str) -> tuple[str, int, int | None, int | None]:
76
+ """Split an HL7 field path into ``(segment, field, component, subcomponent)``.
77
+
78
+ Component/subcomponent are ``None`` when omitted. Raises :class:`HL7PeekError` on a
79
+ malformed path. Shared by :meth:`Peek.field` (read) and the transform engine (write).
80
+ """
81
+ m = _PATH_RE.match(path)
82
+ if not m:
83
+ raise HL7PeekError(f"invalid HL7 field path: {path!r}")
84
+ return (
85
+ m["seg"],
86
+ int(m["field"]),
87
+ int(m["comp"]) if m["comp"] else None,
88
+ int(m["sub"]) if m["sub"] else None,
89
+ )
90
+
91
+
92
+ def normalize(raw: str | bytes, *, encoding: str = "utf-8", errors: str = "replace") -> str:
93
+ """Decode (if ``raw`` is bytes) with ``encoding``/``errors`` and collapse all line endings to
94
+ HL7's ``\\r`` separator.
95
+
96
+ The default is tolerant (``utf-8``/``replace``) so the hot path keeps routing a slightly-off
97
+ message rather than choking. The engine's inbound path instead passes the connection's declared
98
+ encoding with ``errors="strict"`` and routes a genuine ``UnicodeDecodeError`` to the ERROR
99
+ disposition, so a wrong-charset feed isn't silently turned into U+FFFD in the stored raw and the
100
+ delivered copy (review H-3)."""
101
+ if isinstance(raw, (bytes, bytearray)):
102
+ raw = bytes(raw).decode(encoding, errors)
103
+ return raw.replace("\r\n", "\r").replace("\n", "\r")
104
+
105
+
106
+ @dataclass(frozen=True)
107
+ class Peek:
108
+ """A parsed view over an inbound message exposing routing fields + path access.
109
+
110
+ Construct via :meth:`parse`. ``message`` is the underlying ``python-hl7`` parse;
111
+ ``raw`` is the normalized (``\\r``-delimited) text it was parsed from.
112
+ """
113
+
114
+ message: hl7.Message
115
+ raw: str
116
+
117
+ @classmethod
118
+ def parse(
119
+ cls,
120
+ raw: str | bytes,
121
+ *,
122
+ max_bytes: int | None = DEFAULT_MAX_MESSAGE_BYTES,
123
+ max_segments: int | None = DEFAULT_MAX_SEGMENTS,
124
+ ) -> "Peek":
125
+ norm = normalize(raw)
126
+ if not norm.strip():
127
+ raise HL7PeekError("empty message")
128
+ enforce_size_limits(norm, max_bytes=max_bytes, max_segments=max_segments)
129
+ if not norm.lstrip().startswith("MSH"):
130
+ raise HL7PeekError("message does not start with an MSH segment")
131
+ try:
132
+ message = hl7.parse(norm)
133
+ except Exception as exc: # python-hl7 raises a variety of ValueErrors
134
+ raise HL7PeekError(f"could not parse HL7 message: {exc}") from exc
135
+ return cls(message=message, raw=norm)
136
+
137
+ # --- generic field access (for filters) ----------------------------------
138
+
139
+ def field(self, path: str) -> str | None:
140
+ """Return the value at an HL7 path like ``MSH-9``, ``MSH-9.1`` or ``PID-5.1.1``.
141
+
142
+ Returns ``None`` if the segment/field/component is absent or empty. Uses the
143
+ first occurrence of the segment and the first repetition of the field.
144
+ """
145
+ seg, fld, comp, sub = parse_path(path)
146
+ return self._resolve(seg, fld, comp, sub)
147
+
148
+ def _resolve(self, seg: str, fld: int, comp: int | None, sub: int | None) -> str | None:
149
+ try:
150
+ segment = self.message.segment(seg)
151
+ except KeyError:
152
+ return None
153
+ try:
154
+ field_obj = segment[fld]
155
+ except (IndexError, KeyError):
156
+ return None
157
+ if comp is None:
158
+ return str(field_obj) or None
159
+ # For component/subcomponent access use python-hl7's extractor (first segment, first
160
+ # repetition). It correctly returns the whole value when the field carries no component
161
+ # separator — manual indexing would otherwise walk into the *string* and return a single
162
+ # character (e.g. "ORC-2.1" of "PLACER123" => "P"). Out-of-range parts raise IndexError.
163
+ try:
164
+ value = self.message.extract_field(seg, 1, fld, 1, comp, sub if sub is not None else 1)
165
+ except IndexError:
166
+ return None
167
+ return value or None
168
+
169
+ # --- named routing fields (the common case) ------------------------------
170
+
171
+ @property
172
+ def message_code(self) -> str | None:
173
+ """MSH-9.1, e.g. ``ADT``."""
174
+ return self.field("MSH-9.1")
175
+
176
+ @property
177
+ def trigger_event(self) -> str | None:
178
+ """MSH-9.2, e.g. ``A01``."""
179
+ return self.field("MSH-9.2")
180
+
181
+ @property
182
+ def message_structure(self) -> str | None:
183
+ """MSH-9.3, e.g. ``ADT_A01`` (often absent)."""
184
+ return self.field("MSH-9.3")
185
+
186
+ @property
187
+ def message_type(self) -> str | None:
188
+ """MSH-9 as sent, e.g. ``ADT^A01``."""
189
+ return self.field("MSH-9")
190
+
191
+ @property
192
+ def control_id(self) -> str | None:
193
+ """MSH-10 — the message control id, used for de-dup/correlation."""
194
+ return self.field("MSH-10")
195
+
196
+ @property
197
+ def version(self) -> str | None:
198
+ """MSH-12, e.g. ``2.5.1`` (None if the sender omitted it)."""
199
+ return self.field("MSH-12")
200
+
201
+ @property
202
+ def sending_app(self) -> str | None:
203
+ return self.field("MSH-3")
204
+
205
+ @property
206
+ def sending_facility(self) -> str | None:
207
+ return self.field("MSH-4")
208
+
209
+ @property
210
+ def receiving_app(self) -> str | None:
211
+ return self.field("MSH-5")
212
+
213
+ @property
214
+ def receiving_facility(self) -> str | None:
215
+ return self.field("MSH-6")
216
+
217
+ @property
218
+ def timestamp(self) -> str | None:
219
+ """MSH-7 — message date/time as sent."""
220
+ return self.field("MSH-7")
221
+
222
+ def routing(self) -> dict[str, str | None]:
223
+ """The routing/correlation fields the store records. No PHI segments here."""
224
+ return {
225
+ "message_type": self.message_type,
226
+ "control_id": self.control_id,
227
+ "version": self.version,
228
+ "sending_app": self.sending_app,
229
+ "sending_facility": self.sending_facility,
230
+ "receiving_app": self.receiving_app,
231
+ "receiving_facility": self.receiving_facility,
232
+ "timestamp": self.timestamp,
233
+ }
234
+
235
+ def segments(self) -> list[str]:
236
+ """Ordered segment ids, e.g. ``["MSH", "EVN", "PID", "PV1"]``."""
237
+ return [str(seg[0]) for seg in self.message]
@@ -0,0 +1,120 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2026 MessageFoundry Organization and contributors
3
+ """Splitting a single inbound payload into many messages (Corepoint-style "message split").
4
+
5
+ Two independent splits, both pure (no I/O, no engine state) so they can run on the hot path and be
6
+ reused by the dry-run / Test Bench:
7
+
8
+ * :func:`split_batch` — a **batch** file (an ``FHS``/``BHS`` batch or just several ``MSH`` messages
9
+ concatenated) becomes one message per ``MSH`` boundary, in file order. This is the canonical
10
+ splitter the File source uses at ingress and that :func:`~messagefoundry.pipeline.dryrun.split_messages`
11
+ delegates to, so the live engine and a dry-run split identically (single source of truth).
12
+
13
+ * :func:`split_by_obr` — one HL7 order message (an ORM/ORU carrying several ``OBR`` order groups)
14
+ becomes one message per ``OBR`` group, each re-attached to the shared header. This is the
15
+ handler-side equivalent of Corepoint's ``ItemSplit`` — a pure helper a Handler calls to fan one
16
+ order message out into per-order messages.
17
+
18
+ Both read the message's **own** separators (MSH-1/MSH-2) and go through the :class:`Message`
19
+ primitive — never raw string-slicing of structured HL7.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import re
25
+
26
+ from messagefoundry.parsing.message import Message
27
+ from messagefoundry.parsing.peek import normalize
28
+
29
+ __all__ = ["split_batch", "split_by_obr"]
30
+
31
+ # Split a normalized (``\r``-delimited) payload before each non-leading ``MSH`` segment. We match
32
+ # ``\rMSH`` *without* the trailing field separator so a batch whose MSH-1 isn't ``|`` (e.g.
33
+ # ``MSH^...``) still splits per-message instead of being read as one giant message — after a ``\r`` a
34
+ # segment id is always exactly three chars, so only an ``MSH`` segment starts with the literal "MSH".
35
+ _MSH_BOUNDARY = re.compile(r"(?=\rMSH)")
36
+
37
+
38
+ def split_batch(raw: str | bytes) -> list[str]:
39
+ """Split a possibly-batched HL7 payload into individual messages on ``MSH`` boundaries.
40
+
41
+ A real file connection delivers each ``MSH``-delimited message separately; mirror that so a
42
+ batch file (or an ``FHS``/``BHS`` envelope wrapping several messages) yields every message, in
43
+ file order — not just the first. Each returned message is ``\r``-delimited and starts at its
44
+ ``MSH`` (any ``FHS``/``BHS``/``FTS``/``BTS`` batch-envelope lines around the messages are dropped,
45
+ since each split message is routed on its own and the batch framing has no per-message meaning).
46
+
47
+ A payload with a single message round-trips unchanged (a one-element list); an empty/whitespace
48
+ payload yields the normalized text as the sole element (the caller — e.g. the parser — then
49
+ reports it as malformed rather than silently dropping it).
50
+ """
51
+ text = normalize(raw) # \r-delimited, decoupled from the inbound line endings
52
+ chunks = _MSH_BOUNDARY.split(text)
53
+ # Keep only the MSH-led chunks: a leading FHS/BHS envelope (or stray whitespace) before the first
54
+ # MSH is not itself a message. ``lstrip("\r")`` strips the boundary's own leading CR; a chunk that
55
+ # isn't MSH-led after stripping (the batch header) is dropped.
56
+ messages = [c.lstrip("\r") for c in chunks if c.strip() and c.lstrip("\r").startswith("MSH")]
57
+ return messages or [text]
58
+
59
+
60
+ def split_by_obr(message: Message | str | bytes) -> list[str]:
61
+ """Split one HL7 order message into one message per ``OBR`` order group (Corepoint ``ItemSplit``).
62
+
63
+ **Grouping rule.** Everything *before the first* ``OBR`` is the shared **header** (``MSH`` plus
64
+ any patient-/visit-level segments — ``EVN``/``PID``/``PV1``/``ORC``/``NTE``…). Each ``OBR`` begins
65
+ a new **order group** that runs up to (but not including) the next ``OBR``; its group carries that
66
+ ``OBR`` and every segment after it (``OBX``/``NTE``/``SPM``…) until the next order. Each produced
67
+ message is ``header segments + that one order group``, re-encoded through :class:`Message` so it
68
+ re-parses cleanly.
69
+
70
+ **MSH-10 (control id) handling.** Splitting one message into N would otherwise emit N messages
71
+ sharing the original control id, breaking de-dup/correlation downstream. So each split message's
72
+ MSH-10 is **suffixed with its 1-based order index** using the message's own component separator
73
+ is *not* involved — the suffix is appended to the existing control id with a literal ``-`` (e.g.
74
+ ``MSG1`` → ``MSG1-1``, ``MSG1-2``). The first split is *not* special-cased (it too becomes
75
+ ``…-1``) so every emitted message is uniquely and predictably identifiable, and a 1-OBR message
76
+ that is "split" still gets ``…-1`` — a deliberate, documented contract a reviewer can rely on. A
77
+ message with **no** MSH-10 is left untouched (nothing to suffix).
78
+
79
+ **0 or 1 OBR.** A message with **one** ``OBR`` returns a single-element list (the whole message,
80
+ with MSH-10 suffixed ``-1`` per above). A message with **zero** ``OBR`` is *not* an order message
81
+ to split, so it is returned **as-is** in a single-element list with its control id **unchanged**
82
+ (no suffix) — the natural no-op for a non-order message.
83
+
84
+ Accepts a :class:`Message`, or a raw ``str``/``bytes`` (parsed here), matching how the other
85
+ parsing helpers take input. Returns re-encoded ``\r``-delimited HL7 strings.
86
+ """
87
+ msg = message if isinstance(message, Message) else Message.parse(message)
88
+ segments = msg.segments()
89
+ obr_count = segments.count("OBR")
90
+
91
+ # No order groups: not a splittable order message — return it verbatim (control id untouched).
92
+ if obr_count == 0:
93
+ return [msg.encode()]
94
+
95
+ # Index of each OBR among all segments (0-based positions in segment order). The shared header is
96
+ # every segment before the first OBR; each group spans one OBR up to the next.
97
+ obr_positions = [i for i, seg in enumerate(segments) if seg == "OBR"]
98
+ header_end = obr_positions[0]
99
+ boundaries = [*obr_positions, len(segments)] # group i = [obr_positions[i], boundaries[i+1])
100
+
101
+ # Work from the raw segment *lines* so each group is re-attached to the header verbatim and
102
+ # re-parsed — no field-level reconstruction, and the original encoding characters are preserved.
103
+ lines = msg.encode().split("\r")
104
+ # encode() may leave a trailing "" after the final \r; align line count to the segment count so
105
+ # positional slicing matches segments() exactly.
106
+ seg_lines = [ln for ln in lines if ln]
107
+ header_lines = seg_lines[:header_end]
108
+
109
+ out: list[str] = []
110
+ control_id = msg.control_id
111
+ for idx, start in enumerate(obr_positions, start=1):
112
+ end = boundaries[idx] # next OBR position (or end of message)
113
+ group_lines = seg_lines[start:end]
114
+ part = Message.parse("\r".join([*header_lines, *group_lines]) + "\r")
115
+ # Suffix the control id so the N split messages stay individually correlatable downstream;
116
+ # set() goes through the Message primitive (separator-aware, never raw slicing).
117
+ if control_id is not None:
118
+ part.set("MSH-10", f"{control_id}-{idx}")
119
+ out.append(part.encode())
120
+ return out
@@ -0,0 +1,46 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2026 MessageFoundry Organization and contributors
3
+ """Build a short, human-readable message summary from a :class:`Peek`.
4
+
5
+ Computed once at ingest and stored in its own column (outside the serialized body) so the
6
+ search/list view never reparses HL7. PHI-bearing (MRN, patient name) — see the store's audit
7
+ note. Tolerant: any missing field is simply omitted.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from messagefoundry.parsing.peek import Peek
13
+
14
+ __all__ = ["summarize"]
15
+
16
+ _ORDER_TYPES = {"ORM", "ORU"}
17
+
18
+
19
+ def _patient_name(peek: Peek) -> str | None:
20
+ family = peek.field("PID-5.1")
21
+ given = peek.field("PID-5.2")
22
+ if family and given:
23
+ return f"{family}, {given}"
24
+ return family
25
+
26
+
27
+ def summarize(peek: Peek) -> str:
28
+ """e.g. ``MRN 100001 · DOE, JANE`` (+ ``· Order 12345 · Acc 67890`` for ORM/ORU)."""
29
+ parts: list[str] = []
30
+
31
+ mrn = peek.field("PID-3.1")
32
+ if mrn:
33
+ parts.append(f"MRN {mrn}")
34
+ name = _patient_name(peek)
35
+ if name:
36
+ parts.append(name)
37
+
38
+ if (peek.message_code or "") in _ORDER_TYPES:
39
+ order = peek.field("ORC-2.1") or peek.field("OBR-2.1") # placer order number
40
+ accession = peek.field("OBR-3.1") or peek.field("ORC-3.1") # filler / accession
41
+ if order:
42
+ parts.append(f"Order {order}")
43
+ if accession:
44
+ parts.append(f"Acc {accession}")
45
+
46
+ return " · ".join(parts)
@@ -0,0 +1,128 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2026 MessageFoundry Organization and contributors
3
+ """Structured HL7 parse tree for the message viewer.
4
+
5
+ Turns a raw message into a nested ``segment → field → repetition → component →
6
+ subcomponent`` structure with HL7 paths and values, so the console can render an
7
+ explorable tree without reaching into ``python-hl7`` internals. Pure and tolerant: it
8
+ builds whatever parses (the viewer must show non-conformant messages too).
9
+
10
+ Splitting is done from the message's own MSH-1/MSH-2 separators rather than assumed
11
+ defaults, so messages using non-standard encoding characters render correctly. MSH-1
12
+ (the field separator) and MSH-2 (the encoding characters) are represented as literal
13
+ single-value fields, matching how operators expect to see them.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from dataclasses import dataclass, field
19
+
20
+ from messagefoundry.parsing.peek import (
21
+ DEFAULT_MAX_MESSAGE_BYTES,
22
+ DEFAULT_MAX_SEGMENTS,
23
+ HL7PeekError,
24
+ enforce_size_limits,
25
+ normalize,
26
+ )
27
+
28
+ __all__ = ["TreeNode", "parse_tree"]
29
+
30
+
31
+ @dataclass
32
+ class TreeNode:
33
+ """One node in the parse tree.
34
+
35
+ ``label`` is a human/HL7 label (``MSH``, ``MSH-9``, ``MSH-9.1`` …); ``value`` is the
36
+ raw text of that node (empty for nodes that only group children); ``children`` are the
37
+ next level down. Leaf nodes (subcomponents, or atomic components/fields) have no
38
+ children and carry the value."""
39
+
40
+ label: str
41
+ value: str = ""
42
+ children: list["TreeNode"] = field(default_factory=list)
43
+
44
+
45
+ def parse_tree(raw: str | bytes) -> list[TreeNode]:
46
+ """Build a list of segment :class:`TreeNode` from ``raw``.
47
+
48
+ Raises :class:`HL7PeekError` only when there is no parseable MSH to derive separators
49
+ from; otherwise it returns the best-effort structure of whatever is present.
50
+ """
51
+ text = normalize(raw).strip("\r")
52
+ if not text:
53
+ raise HL7PeekError("empty message")
54
+ enforce_size_limits(
55
+ text, max_bytes=DEFAULT_MAX_MESSAGE_BYTES, max_segments=DEFAULT_MAX_SEGMENTS
56
+ )
57
+ segments = [s for s in text.split("\r") if s]
58
+ if not segments or not segments[0].startswith("MSH"):
59
+ raise HL7PeekError("message does not start with an MSH segment")
60
+
61
+ field_sep, comp_sep, rep_sep, sub_sep = _separators(segments[0])
62
+ return [_segment_node(seg, field_sep, comp_sep, rep_sep, sub_sep) for seg in segments]
63
+
64
+
65
+ def _separators(msh: str) -> tuple[str, str, str, str]:
66
+ """Derive (field, component, repetition, subcomponent) separators from the MSH line."""
67
+ field_sep = msh[3] if len(msh) > 3 else "|"
68
+ enc = msh[4:8] if len(msh) > 4 else "^~\\&"
69
+ comp_sep = enc[0] if len(enc) > 0 else "^"
70
+ rep_sep = enc[1] if len(enc) > 1 else "~"
71
+ sub_sep = enc[3] if len(enc) > 3 else "&"
72
+ return field_sep, comp_sep, rep_sep, sub_sep
73
+
74
+
75
+ def _segment_node(
76
+ segment: str, field_sep: str, comp_sep: str, rep_sep: str, sub_sep: str
77
+ ) -> TreeNode:
78
+ parts = segment.split(field_sep)
79
+ seg_id = parts[0]
80
+ node = TreeNode(label=seg_id)
81
+
82
+ if seg_id == "MSH":
83
+ # MSH-1 is the field separator itself; MSH-2 the encoding chars. Render them as
84
+ # literal fields and number the rest from 3 so paths line up with the spec.
85
+ node.children.append(TreeNode(label="MSH-1", value=field_sep))
86
+ if len(parts) > 1:
87
+ node.children.append(TreeNode(label="MSH-2", value=parts[1]))
88
+ raw_fields = parts[2:]
89
+ start_index = 3
90
+ else:
91
+ raw_fields = parts[1:]
92
+ start_index = 1
93
+
94
+ for offset, raw_field in enumerate(raw_fields):
95
+ fld_index = start_index + offset
96
+ node.children.append(
97
+ _field_node(f"{seg_id}-{fld_index}", raw_field, comp_sep, rep_sep, sub_sep)
98
+ )
99
+ return node
100
+
101
+
102
+ def _field_node(label: str, raw_field: str, comp_sep: str, rep_sep: str, sub_sep: str) -> TreeNode:
103
+ repetitions = raw_field.split(rep_sep)
104
+ if len(repetitions) > 1:
105
+ node = TreeNode(label=label, value=raw_field)
106
+ for i, rep in enumerate(repetitions, start=1):
107
+ node.children.append(_components_node(f"{label}[{i}]", rep, comp_sep, sub_sep))
108
+ return node
109
+ return _components_node(label, raw_field, comp_sep, sub_sep)
110
+
111
+
112
+ def _components_node(label: str, raw_value: str, comp_sep: str, sub_sep: str) -> TreeNode:
113
+ components = raw_value.split(comp_sep)
114
+ if len(components) <= 1 and sub_sep not in raw_value:
115
+ # Atomic field/repetition: a single leaf carrying the value. (A lone component
116
+ # that itself has subcomponents, e.g. ``a&b&c``, still expands below.)
117
+ return TreeNode(label=label, value=raw_value)
118
+ node = TreeNode(label=label, value=raw_value)
119
+ for ci, comp in enumerate(components, start=1):
120
+ subs = comp.split(sub_sep)
121
+ if len(subs) <= 1:
122
+ node.children.append(TreeNode(label=f"{label}.{ci}", value=comp))
123
+ else:
124
+ comp_node = TreeNode(label=f"{label}.{ci}", value=comp)
125
+ for si, sub in enumerate(subs, start=1):
126
+ comp_node.children.append(TreeNode(label=f"{label}.{ci}.{si}", value=sub))
127
+ node.children.append(comp_node)
128
+ return node
@@ -0,0 +1,95 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2026 MessageFoundry Organization and contributors
3
+ """Strict, version-aware HL7 v2 validation — the opt-in tier.
4
+
5
+ Built on ``hl7apy``, which knows the official HL7 message structures per version and
6
+ checks segment cardinality, datatypes, table values and lengths. It is slower and far
7
+ stricter than :mod:`~messagefoundry.parsing.peek`, so it runs only when a channel sets
8
+ ``validation.strict = true`` and is kept off the routing hot path.
9
+
10
+ ``hl7apy`` raises on the *first* problem it finds, which is exactly what a strict channel
11
+ needs: one conformance error is enough to NACK. We surface that single message rather
12
+ than writing a full multi-error report to disk — a report file of a PHI message is a
13
+ data-leak we don't want by default. (Full reporting can become an explicit, opt-in,
14
+ redaction-aware feature later.)
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from dataclasses import dataclass
20
+
21
+ from messagefoundry.parsing.peek import (
22
+ DEFAULT_MAX_MESSAGE_BYTES,
23
+ DEFAULT_MAX_SEGMENTS,
24
+ HL7PeekError,
25
+ enforce_size_limits,
26
+ normalize,
27
+ )
28
+
29
+ __all__ = ["ValidationResult", "validate"]
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class ValidationResult:
34
+ """Outcome of strict validation. Truthy iff the message is conformant."""
35
+
36
+ ok: bool
37
+ version: str | None
38
+ errors: list[str]
39
+
40
+ def __bool__(self) -> bool:
41
+ return self.ok
42
+
43
+
44
+ def validate(
45
+ raw: str | bytes,
46
+ *,
47
+ expected_version: str | None = None,
48
+ profile: object | None = None,
49
+ max_bytes: int | None = DEFAULT_MAX_MESSAGE_BYTES,
50
+ max_segments: int | None = DEFAULT_MAX_SEGMENTS,
51
+ ) -> ValidationResult:
52
+ """Validate ``raw`` against the official structures for its (or ``expected_version``).
53
+
54
+ ``expected_version`` cross-checks MSH-12: if the message declares a different version
55
+ that is reported as an error (a feed sending the wrong version is a misconfiguration
56
+ a strict channel should reject). ``profile`` is reserved for a conformance-profile
57
+ object (Phase 2+); passing one today is accepted but not yet enforced. ``max_bytes`` /
58
+ ``max_segments`` reject an oversized message before the (slow) strict parse.
59
+ """
60
+ from hl7apy.exceptions import HL7apyException
61
+ from hl7apy.parser import parse_message
62
+ from hl7apy.validation import Validator
63
+
64
+ norm = normalize(raw).strip("\r")
65
+ if not norm:
66
+ return ValidationResult(False, expected_version, ["empty message"])
67
+
68
+ # Bound resource use before the (slow) strict parse — the MLLP frame cap doesn't protect
69
+ # a complete-but-huge message, and hl7apy's structure builder is the heavier amplifier.
70
+ try:
71
+ enforce_size_limits(norm, max_bytes=max_bytes, max_segments=max_segments)
72
+ except HL7PeekError as exc:
73
+ return ValidationResult(False, expected_version, [str(exc)])
74
+
75
+ try:
76
+ message = parse_message(norm, find_groups=True)
77
+ except HL7apyException as exc:
78
+ return ValidationResult(False, expected_version, [f"parse error: {exc}"])
79
+ except Exception as exc: # defensive: never let validation crash the pipeline
80
+ return ValidationResult(False, expected_version, [f"parse error: {exc}"])
81
+
82
+ version = getattr(message, "version", None)
83
+ errors: list[str] = []
84
+
85
+ if expected_version and version and expected_version != version:
86
+ errors.append(f"version mismatch: message is {version}, channel expects {expected_version}")
87
+
88
+ try:
89
+ Validator.validate(message)
90
+ except HL7apyException as exc:
91
+ errors.append(str(exc))
92
+ except Exception as exc: # defensive
93
+ errors.append(str(exc))
94
+
95
+ return ValidationResult(ok=not errors, version=version, errors=errors)
@@ -0,0 +1,46 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2026 MessageFoundry Organization and contributors
3
+ """Pure ASC X12 EDI codec (ADR 0012) — a tolerant routing peek, an interchange splitter/assembler, and
4
+ a mutable message model, mirroring the HL7 :mod:`messagefoundry.parsing` library.
5
+
6
+ It is **pure and side-effect-free** (no I/O, no engine state) and imports nothing from
7
+ ``messagefoundry.config`` / ``pipeline`` / ``store`` / ``transports`` — so the console may import it,
8
+ and a code-first Router/Handler calls it **on demand** against a
9
+ :class:`~messagefoundry.parsing.message.RawMessage` (``content_type="x12"``, ADR 0004): X12 is **not**
10
+ pushed through the engine pipeline as a bespoke object. The X12 content type is referred to by the
11
+ literal string ``"x12"`` (never imported from ``config``) to keep this purity.
12
+
13
+ Two tiers, mirroring python-hl7 (tolerant) / hl7apy (strict):
14
+
15
+ * **Tolerant (built here):** :class:`X12Peek` (cheap ISA + GS/ST peek for routing), :func:`split` /
16
+ :class:`X12FrameReader` (interchange framing), :class:`X12Message` (read/set/encode for transforms),
17
+ :func:`check_integrity` (envelope tie-out).
18
+ * **Strict (deferred):** implementation-guide validation (e.g. 005010X222A1 for 837P) is future work.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from messagefoundry.parsing.x12.delimiters import (
24
+ Delimiters,
25
+ discover_delimiters,
26
+ find_isa_start,
27
+ )
28
+ from messagefoundry.parsing.x12.errors import X12Error, X12FrameError, X12PeekError
29
+ from messagefoundry.parsing.x12.interchange import X12FrameReader, check_integrity, split
30
+ from messagefoundry.parsing.x12.message import X12Message
31
+ from messagefoundry.parsing.x12.peek import X12Group, X12Peek
32
+
33
+ __all__ = [
34
+ "X12Peek",
35
+ "X12Group",
36
+ "X12Message",
37
+ "X12FrameReader",
38
+ "split",
39
+ "check_integrity",
40
+ "discover_delimiters",
41
+ "find_isa_start",
42
+ "Delimiters",
43
+ "X12Error",
44
+ "X12PeekError",
45
+ "X12FrameError",
46
+ ]