oehrpy 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,352 @@
1
+ """
2
+ OPT (Operational Template) XML parser.
3
+
4
+ This module parses OPT 1.4 XML files to extract template definitions,
5
+ archetype constraints, and terminology bindings.
6
+
7
+ OPT files define the constraints on archetypes used within a specific
8
+ clinical template, including:
9
+ - Which archetypes are used
10
+ - Which nodes are included/excluded
11
+ - Terminology bindings
12
+ - Default values
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import re
18
+ from dataclasses import dataclass, field
19
+ from pathlib import Path
20
+ from typing import Any
21
+ from xml.etree.ElementTree import Element # For type hints
22
+
23
+ import defusedxml.ElementTree as ET
24
+
25
+ # OPT XML Namespaces
26
+ NAMESPACES = {
27
+ "opt": "http://schemas.openehr.org/v1",
28
+ "xsi": "http://www.w3.org/2001/XMLSchema-instance",
29
+ }
30
+
31
+
32
+ @dataclass
33
+ class TermBinding:
34
+ """Terminology binding for a code."""
35
+
36
+ code: str
37
+ terminology_id: str
38
+ value: str | None = None
39
+
40
+
41
+ @dataclass
42
+ class ConstraintDefinition:
43
+ """Constraint on a node within a template."""
44
+
45
+ node_id: str
46
+ path: str
47
+ rm_type: str
48
+ name: str | None = None
49
+ occurrences_min: int = 0
50
+ occurrences_max: int | None = None # None means unbounded
51
+ is_mandatory: bool = False
52
+ default_value: Any | None = None
53
+ allowed_values: list[str] = field(default_factory=list)
54
+ term_bindings: list[TermBinding] = field(default_factory=list)
55
+
56
+ @property
57
+ def is_multiple(self) -> bool:
58
+ """Check if this node allows multiple occurrences."""
59
+ return self.occurrences_max is None or self.occurrences_max > 1
60
+
61
+
62
+ @dataclass
63
+ class ArchetypeNode:
64
+ """Represents an archetype slot in the template."""
65
+
66
+ archetype_id: str
67
+ node_id: str
68
+ rm_type: str
69
+ name: str
70
+ path: str
71
+ children: list[ArchetypeNode] = field(default_factory=list)
72
+ constraints: list[ConstraintDefinition] = field(default_factory=list)
73
+
74
+ def find_node(self, node_id: str) -> ArchetypeNode | None:
75
+ """Find a child node by ID."""
76
+ for child in self.children:
77
+ if child.node_id == node_id:
78
+ return child
79
+ found = child.find_node(node_id)
80
+ if found:
81
+ return found
82
+ return None
83
+
84
+ def get_flat_path(self, child_path: str = "") -> str:
85
+ """Get the FLAT format path for this node."""
86
+ # Convert archetype path to FLAT path
87
+ path = self.path.lstrip("/")
88
+ # Replace archetype node IDs with names
89
+ path = re.sub(r"\[at\d+\]", "", path)
90
+ if child_path:
91
+ return f"{path}/{child_path}"
92
+ return path
93
+
94
+
95
+ @dataclass
96
+ class TemplateDefinition:
97
+ """Parsed template definition from OPT file."""
98
+
99
+ template_id: str
100
+ concept: str
101
+ description: str | None = None
102
+ language: str = "en"
103
+ archetype_id: str | None = None
104
+ rm_type: str = "COMPOSITION"
105
+ root: ArchetypeNode | None = None
106
+ all_nodes: dict[str, ArchetypeNode] = field(default_factory=dict)
107
+
108
+ def get_node(self, path: str) -> ArchetypeNode | None:
109
+ """Get a node by its path."""
110
+ return self.all_nodes.get(path)
111
+
112
+ def list_observations(self) -> list[ArchetypeNode]:
113
+ """List all OBSERVATION archetypes in the template."""
114
+ return [n for n in self.all_nodes.values() if n.rm_type == "OBSERVATION"]
115
+
116
+ def list_entries(self) -> list[ArchetypeNode]:
117
+ """List all ENTRY archetypes (OBSERVATION, EVALUATION, etc.)."""
118
+ entry_types = {"OBSERVATION", "EVALUATION", "INSTRUCTION", "ACTION", "ADMIN_ENTRY"}
119
+ return [n for n in self.all_nodes.values() if n.rm_type in entry_types]
120
+
121
+
122
+ class OPTParser:
123
+ """Parser for OPT 1.4 XML files."""
124
+
125
+ def __init__(self) -> None:
126
+ self._namespaces = NAMESPACES
127
+
128
+ def parse_file(self, path: Path | str) -> TemplateDefinition:
129
+ """Parse an OPT file from disk.
130
+
131
+ Args:
132
+ path: Path to the OPT XML file.
133
+
134
+ Returns:
135
+ Parsed TemplateDefinition.
136
+ """
137
+ tree = ET.parse(path)
138
+ root = tree.getroot()
139
+ if root is None:
140
+ raise ValueError(f"Failed to parse XML from {path}: no root element")
141
+ return self._parse_root(root)
142
+
143
+ def parse_string(self, xml_content: str) -> TemplateDefinition:
144
+ """Parse OPT from XML string.
145
+
146
+ Args:
147
+ xml_content: OPT XML content.
148
+
149
+ Returns:
150
+ Parsed TemplateDefinition.
151
+ """
152
+ root = ET.fromstring(xml_content)
153
+ if root is None:
154
+ raise ValueError("Failed to parse XML string: no root element")
155
+ return self._parse_root(root)
156
+
157
+ def _parse_root(self, root: Element) -> TemplateDefinition:
158
+ """Parse the root element of an OPT file."""
159
+ # Handle namespace prefixes
160
+ self._detect_namespaces(root)
161
+
162
+ template = TemplateDefinition(
163
+ template_id=self._get_text(root, ".//template_id/value") or "",
164
+ concept=self._get_text(root, ".//concept") or "",
165
+ description=self._get_text(root, ".//description/details/purpose"),
166
+ language=self._get_text(root, ".//language/code_string") or "en",
167
+ )
168
+
169
+ # Parse the definition (root archetype)
170
+ definition = root.find("opt:definition", self._namespaces)
171
+ if definition is None:
172
+ # Try without namespace
173
+ definition = root.find(".//definition")
174
+
175
+ if definition is not None:
176
+ template.archetype_id = self._get_text(definition, "archetype_id/value")
177
+ xsi_type_key = "{{{}}}type".format(self._namespaces.get("xsi", ""))
178
+ template.rm_type = (
179
+ definition.get(xsi_type_key) or definition.get("type") or "COMPOSITION"
180
+ )
181
+ template.rm_type = template.rm_type.split(":")[-1] # Remove namespace prefix
182
+
183
+ template.root = self._parse_node(definition, "/")
184
+ if template.root:
185
+ self._collect_nodes(template.root, template.all_nodes)
186
+
187
+ return template
188
+
189
+ def _detect_namespaces(self, root: Element) -> None:
190
+ """Detect namespaces used in the document."""
191
+ # Extract namespace from root element tag
192
+ if root.tag.startswith("{"):
193
+ ns = root.tag[1:].split("}")[0]
194
+ if "openehr.org" in ns or "schemas.openehr.org" in ns:
195
+ self._namespaces["opt"] = ns
196
+
197
+ # Also try to extract from attributes
198
+ for key, _value in root.attrib.items():
199
+ if key.startswith("{"):
200
+ ns = key[1:].split("}")[0]
201
+ if "openehr.org" in ns:
202
+ self._namespaces["opt"] = ns
203
+
204
+ def _get_text(self, element: Element, xpath: str) -> str | None:
205
+ """Get text content from xpath, trying with and without namespaces."""
206
+ # Try with namespace prefix in XPath
207
+ if "opt" in self._namespaces:
208
+ # Convert simple xpath to namespaced version
209
+ # .//template_id/value -> .//opt:template_id/opt:value
210
+ parts = xpath.split("/")
211
+ ns_parts = []
212
+ for part in parts:
213
+ if part and not part.startswith(".") and ":" not in part:
214
+ ns_parts.append(f"opt:{part}")
215
+ else:
216
+ ns_parts.append(part)
217
+ ns_xpath = "/".join(ns_parts)
218
+
219
+ child = element.find(ns_xpath, self._namespaces)
220
+ if child is not None and child.text is not None:
221
+ return child.text.strip()
222
+
223
+ # Try with namespace without prefix (for ElementTree default namespace)
224
+ child = element.find(xpath, self._namespaces)
225
+ if child is not None and child.text is not None:
226
+ return child.text.strip()
227
+
228
+ # Try without namespace
229
+ child = element.find(xpath)
230
+ if child is not None and child.text is not None:
231
+ return child.text.strip()
232
+
233
+ return None
234
+
235
+ def _parse_node(self, element: Element, parent_path: str) -> ArchetypeNode | None:
236
+ """Parse an archetype node from an XML element."""
237
+ node_id = self._get_text(element, "node_id") or ""
238
+ archetype_id = self._get_text(element, "archetype_id/value") or ""
239
+
240
+ # Get RM type from xsi:type attribute
241
+ rm_type = (
242
+ element.get("{{{}}}type".format(self._namespaces.get("xsi", "")))
243
+ or element.get("type")
244
+ or ""
245
+ )
246
+ rm_type = rm_type.split(":")[-1] # Remove namespace prefix like "opt:C_ARCHETYPE_ROOT"
247
+
248
+ # Map constraint types to RM types
249
+ rm_type_map = {
250
+ "C_ARCHETYPE_ROOT": self._get_rm_type_from_archetype(archetype_id),
251
+ "C_COMPLEX_OBJECT": self._get_text(element, "rm_type_name") or "ITEM",
252
+ }
253
+ if rm_type in rm_type_map:
254
+ rm_type = rm_type_map[rm_type]
255
+
256
+ # Get name from ontology or term_definitions
257
+ name = (
258
+ self._get_text(element, "name/value")
259
+ or self._get_term_text(element, node_id)
260
+ or node_id
261
+ )
262
+
263
+ path = f"{parent_path}/{node_id}" if node_id else parent_path
264
+
265
+ node = ArchetypeNode(
266
+ archetype_id=archetype_id,
267
+ node_id=node_id,
268
+ rm_type=rm_type,
269
+ name=name,
270
+ path=path,
271
+ )
272
+
273
+ # Parse child attributes
274
+ attrs_with_ns = element.findall("opt:attributes", self._namespaces)
275
+ attrs_without_ns = element.findall("attributes") if not attrs_with_ns else []
276
+ for attr in attrs_with_ns or attrs_without_ns:
277
+ attr_name = self._get_text(attr, "rm_attribute_name") or ""
278
+
279
+ children_with_ns = attr.findall("opt:children", self._namespaces)
280
+ children_without_ns = attr.findall("children") if not children_with_ns else []
281
+ for child in children_with_ns or children_without_ns:
282
+ child_node = self._parse_node(child, f"{path}/{attr_name}")
283
+ if child_node:
284
+ node.children.append(child_node)
285
+
286
+ # Parse constraints
287
+ constraints = self._parse_constraints(element, path)
288
+ node.constraints = constraints
289
+
290
+ return node
291
+
292
+ def _parse_constraints(self, element: Element, path: str) -> list[ConstraintDefinition]:
293
+ """Parse constraint definitions from an element."""
294
+ constraints = []
295
+
296
+ # Parse occurrences
297
+ occ = element.find("opt:occurrences", self._namespaces) or element.find("occurrences")
298
+ if occ is not None:
299
+ lower = self._get_text(occ, "lower") or "0"
300
+ upper = self._get_text(occ, "upper")
301
+ upper_unbounded = self._get_text(occ, "upper_unbounded") == "true"
302
+
303
+ constraint = ConstraintDefinition(
304
+ node_id=self._get_text(element, "node_id") or "",
305
+ path=path,
306
+ rm_type=self._get_text(element, "rm_type_name") or "",
307
+ occurrences_min=int(lower),
308
+ occurrences_max=None if upper_unbounded else (int(upper) if upper else None),
309
+ )
310
+ constraint.is_mandatory = constraint.occurrences_min > 0
311
+ constraints.append(constraint)
312
+
313
+ return constraints
314
+
315
+ def _get_term_text(self, element: Element, node_id: str) -> str | None:
316
+ """Get term text from ontology for a node ID."""
317
+ # This is a simplified implementation - full implementation would
318
+ # look up terms in the ontology section
319
+ return None
320
+
321
+ def _get_rm_type_from_archetype(self, archetype_id: str) -> str:
322
+ """Extract RM type from archetype ID."""
323
+ if not archetype_id:
324
+ return "ITEM"
325
+
326
+ # Archetype ID format: openEHR-EHR-OBSERVATION.blood_pressure.v1
327
+ parts = archetype_id.split("-")
328
+ if len(parts) >= 3:
329
+ return parts[2].split(".")[0]
330
+ return "ITEM"
331
+
332
+ def _collect_nodes(self, node: ArchetypeNode, nodes: dict[str, ArchetypeNode]) -> None:
333
+ """Collect all nodes into a flat dictionary."""
334
+ if node.path:
335
+ nodes[node.path] = node
336
+ for child in node.children:
337
+ self._collect_nodes(child, nodes)
338
+
339
+
340
+ def parse_opt(source: str | Path) -> TemplateDefinition:
341
+ """Convenience function to parse an OPT file.
342
+
343
+ Args:
344
+ source: Path to OPT file or XML string.
345
+
346
+ Returns:
347
+ Parsed TemplateDefinition.
348
+ """
349
+ parser = OPTParser()
350
+ if isinstance(source, Path) or (isinstance(source, str) and not source.strip().startswith("<")):
351
+ return parser.parse_file(source)
352
+ return parser.parse_string(source)