pathling 9.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. pathling/__init__.py +56 -0
  2. pathling/_version.py +9 -0
  3. pathling/bulk.py +310 -0
  4. pathling/coding.py +90 -0
  5. pathling/context.py +381 -0
  6. pathling/core.py +232 -0
  7. pathling/datasink.py +124 -0
  8. pathling/datasource.py +344 -0
  9. pathling/fhir.py +31 -0
  10. pathling/functions.py +88 -0
  11. pathling/spark.py +89 -0
  12. pathling/udfs.py +264 -0
  13. pathling-9.1.0.data/data/share/pathling/examples/bulk.py +149 -0
  14. pathling-9.1.0.data/data/share/pathling/examples/data/bundles/Bennett146_Swaniawski813_704c9750-f6e6-473b-ee83-fbd48e07fe3f.json +2878 -0
  15. pathling-9.1.0.data/data/share/pathling/examples/data/bundles/Dino214_Parisian75_40d82b80-b682-cd8b-da6d-396809878641.json +2194 -0
  16. pathling-9.1.0.data/data/share/pathling/examples/data/csv/conditions.csv +20 -0
  17. pathling-9.1.0.data/data/share/pathling/examples/data/resources/Condition.ndjson +25 -0
  18. pathling-9.1.0.data/data/share/pathling/examples/data/resources/Patient.ndjson +9 -0
  19. pathling-9.1.0.data/data/share/pathling/examples/designation.py +38 -0
  20. pathling-9.1.0.data/data/share/pathling/examples/display.py +30 -0
  21. pathling-9.1.0.data/data/share/pathling/examples/encode_bundles.py +35 -0
  22. pathling-9.1.0.data/data/share/pathling/examples/encode_resources.py +32 -0
  23. pathling-9.1.0.data/data/share/pathling/examples/fhir_view.py +58 -0
  24. pathling-9.1.0.data/data/share/pathling/examples/member_of.py +53 -0
  25. pathling-9.1.0.data/data/share/pathling/examples/property_of.py +47 -0
  26. pathling-9.1.0.data/data/share/pathling/examples/subsumes.py +50 -0
  27. pathling-9.1.0.data/data/share/pathling/examples/translate.py +41 -0
  28. pathling-9.1.0.dist-info/METADATA +391 -0
  29. pathling-9.1.0.dist-info/RECORD +31 -0
  30. pathling-9.1.0.dist-info/WHEEL +4 -0
  31. pathling-9.1.0.dist-info/licenses/LICENSE +227 -0
pathling/__init__.py ADDED
@@ -0,0 +1,56 @@
1
+ # Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
2
+ # Organisation (CSIRO) ABN 41 687 119 230.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from .coding import Coding
17
+ from .context import PathlingContext, StorageType
18
+ from .core import Expression, VariableExpression
19
+ from .datasource import DataSources, DataSource
20
+ from .fhir import MimeType, Version
21
+ from .functions import to_coding, to_snomed_coding, to_ecl_value_set
22
+ from .udfs import (
23
+ member_of,
24
+ translate,
25
+ subsumes,
26
+ subsumed_by,
27
+ property_of,
28
+ display,
29
+ designation,
30
+ PropertyType,
31
+ Equivalence,
32
+ )
33
+
34
+ __all__ = [
35
+ "PathlingContext",
36
+ "StorageType",
37
+ "MimeType",
38
+ "Version",
39
+ "Coding",
40
+ "member_of",
41
+ "translate",
42
+ "subsumes",
43
+ "subsumed_by",
44
+ "property_of",
45
+ "display",
46
+ "designation",
47
+ "PropertyType",
48
+ "Equivalence",
49
+ "to_coding",
50
+ "to_snomed_coding",
51
+ "to_ecl_value_set",
52
+ "Expression",
53
+ "VariableExpression",
54
+ "DataSources",
55
+ "DataSource",
56
+ ]
pathling/_version.py ADDED
@@ -0,0 +1,9 @@
1
+ #
2
+ # Auto generated from POM project version.
3
+ # Please do not modify.
4
+ #
5
+ __version__="9.1.0"
6
+ __java_version__="9.1.0"
7
+ __scala_version__="2.13"
8
+ __delta_version__="4.0.0"
9
+ __hadoop_version__="3.4.1"
pathling/bulk.py ADDED
@@ -0,0 +1,310 @@
1
+ # Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
2
+ # Organisation (CSIRO) ABN 41 687 119 230.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from dataclasses import dataclass
17
+ from datetime import datetime, timezone
18
+ from typing import List, Optional, Tuple, Callable
19
+
20
+ from py4j.java_gateway import JavaObject, JVMView
21
+ from pyspark.sql import SparkSession
22
+
23
+
24
+ @dataclass
25
+ class FileResult:
26
+ """
27
+ Represents the result of a single file export operation.
28
+ """
29
+ source: str
30
+ """
31
+ The source URL of the exported file.
32
+ """
33
+ destination: str
34
+ """
35
+ The destination URL where the file was saved.
36
+ """
37
+ size: int
38
+ """
39
+ The size of the exported file in bytes.
40
+ """
41
+
42
+
43
+ @dataclass
44
+ class ExportResult:
45
+ """
46
+ Represents the result of a bulk export operation.
47
+ """
48
+ transaction_time: datetime
49
+ """
50
+ The time at which the transaction was processed at the server.
51
+ Corresponds to `transactionTime` in the bulk export response.
52
+ """
53
+ results: List[FileResult]
54
+ """
55
+ A list of FileResult objects representing the exported files.
56
+ """
57
+
58
+ @classmethod
59
+ def from_java(cls, java_result: JavaObject) -> 'ExportResult':
60
+ """
61
+ Create an ExportResult from a Java export result object.
62
+
63
+ :param java_result: The Java export result object
64
+ :return: A Python ExportResult object
65
+ """
66
+ # Convert transaction time from Java Instant to Python datetime
67
+ transaction_time = datetime.fromtimestamp(
68
+ java_result.getTransactionTime().toEpochMilli() / 1000.0, tz=timezone.utc)
69
+
70
+ # Convert file results
71
+ file_results = [
72
+ FileResult(
73
+ source=str(java_file_result.getSource()),
74
+ destination=str(java_file_result.getDestination()),
75
+ size=java_file_result.getSize())
76
+ for java_file_result in java_result.getResults()
77
+ ]
78
+
79
+ return cls(
80
+ transaction_time=transaction_time,
81
+ results=file_results
82
+ )
83
+
84
+
85
+ class BulkExportClient:
86
+ """
87
+ A client for exporting data from the FHIR Bulk Data Access API.
88
+ """
89
+
90
+ def __init__(self, java_client):
91
+ """
92
+ Create a new BulkExportClient that wraps a Java BulkExportClient.
93
+
94
+ :param java_client: The Java BulkExportClient instance to wrap
95
+ """
96
+ self._java_client = java_client
97
+
98
+ def export(self) -> ExportResult:
99
+ """
100
+ Export data from the FHIR server.
101
+
102
+ :return: The result of the export operation as a Python ExportResult object
103
+ """
104
+ java_result = self._java_client.export()
105
+ return ExportResult.from_java(java_result)
106
+
107
+ @classmethod
108
+ def _configure_builder(cls, jvm, builder, fhir_endpoint_url: str, output_dir: str,
109
+ output_format: str = "application/fhir+ndjson",
110
+ since: Optional[datetime] = None,
111
+ types: Optional[List[str]] = None,
112
+ elements: Optional[List[str]] = None,
113
+ include_associated_data: Optional[List[str]] = None,
114
+ type_filters: Optional[List[str]] = None,
115
+ output_extension: str = "ndjson",
116
+ timeout: Optional[int] = None,
117
+ max_concurrent_downloads: int = 10,
118
+ auth_config: Optional[dict] = None):
119
+ """
120
+ Configure common builder parameters.
121
+
122
+ :param jvm: The JVM instance
123
+ :param builder: The builder instance to configure
124
+ :param fhir_endpoint_url: The URL of the FHIR server
125
+ :param output_dir: Output directory
126
+ :param output_format: Output format
127
+ :param since: Timestamp filter (must include timezone information)
128
+ :param types: Resource types to include
129
+ :param elements: Elements to include
130
+ :param include_associated_data: Associated data to include
131
+ :param type_filters: Resource filters
132
+ :param output_extension: File extension for output files
133
+ :param timeout: Optional timeout duration in seconds
134
+ :param max_concurrent_downloads: Maximum number of concurrent downloads
135
+ :param auth_config: Optional authentication configuration dictionary with the following possible keys:
136
+ - enabled: Whether authentication is enabled (default: False)
137
+ - client_id: The client ID to use for authentication
138
+ - private_key_jwk: The private key in JWK format
139
+ - client_secret: The client secret to use for authentication
140
+ - token_endpoint: The token endpoint URL
141
+ - use_smart: Whether to use SMART authentication (default: True)
142
+ - use_form_for_basic_auth: Whether to use form-based basic auth (default: False)
143
+ - scope: The scope to request
144
+ - token_expiry_tolerance: The token expiry tolerance in seconds (default: 120)
145
+ """
146
+ builder.withFhirEndpointUrl(fhir_endpoint_url)
147
+ builder.withOutputDir(output_dir)
148
+ builder.withOutputFormat(output_format)
149
+ builder.withOutputExtension(output_extension)
150
+ builder.withMaxConcurrentDownloads(max_concurrent_downloads)
151
+
152
+ if timeout is not None:
153
+ java_duration = jvm.java.time.Duration.ofSeconds(timeout)
154
+ builder.withTimeout(java_duration)
155
+
156
+ if since is not None:
157
+ if since.tzinfo is None:
158
+ raise ValueError("datetime must include timezone information")
159
+ # Format with microsecond precision and timezone offset
160
+ instant_str = since.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] # Truncate to milliseconds
161
+ if since.utcoffset() is None:
162
+ instant_str += 'Z'
163
+ else:
164
+ offset = since.strftime('%z')
165
+ # Insert colon in timezone offset
166
+ instant_str += f"{offset[:3]}:{offset[3:]}"
167
+ java_instant = jvm.java.time.Instant.parse(instant_str)
168
+ builder.withSince(java_instant)
169
+ if types is not None:
170
+ for type_ in types:
171
+ builder.withType(type_)
172
+ if elements is not None:
173
+ for element in elements:
174
+ builder.withElement(element)
175
+ if include_associated_data is not None:
176
+ # Convert Python list to Java List<String>
177
+ java_list = jvm.java.util.ArrayList()
178
+ for data in include_associated_data:
179
+ java_list.add(data)
180
+ builder.withIncludeAssociatedData(java_list)
181
+ if type_filters is not None:
182
+ for filter_ in type_filters:
183
+ builder.withTypeFilter(filter_)
184
+
185
+ if auth_config is not None:
186
+ auth_builder = jvm.au.csiro.fhir.auth.AuthConfig.builder()
187
+
188
+ # Set defaults to match Java class
189
+ auth_builder.enabled(False)
190
+ auth_builder.useSMART(True)
191
+ auth_builder.useFormForBasicAuth(False)
192
+ auth_builder.tokenExpiryTolerance(120)
193
+
194
+ # Map Python config to Java builder methods
195
+ if 'enabled' in auth_config:
196
+ auth_builder.enabled(auth_config['enabled'])
197
+ if 'use_smart' in auth_config:
198
+ auth_builder.useSMART(auth_config['use_smart'])
199
+ if 'token_endpoint' in auth_config:
200
+ auth_builder.tokenEndpoint(auth_config['token_endpoint'])
201
+ if 'client_id' in auth_config:
202
+ auth_builder.clientId(auth_config['client_id'])
203
+ if 'client_secret' in auth_config:
204
+ auth_builder.clientSecret(auth_config['client_secret'])
205
+ if 'private_key_jwk' in auth_config:
206
+ auth_builder.privateKeyJWK(auth_config['private_key_jwk'])
207
+ if 'use_form_for_basic_auth' in auth_config:
208
+ auth_builder.useFormForBasicAuth(auth_config['use_form_for_basic_auth'])
209
+ if 'scope' in auth_config:
210
+ auth_builder.scope(auth_config['scope'])
211
+ if 'token_expiry_tolerance' in auth_config:
212
+ auth_builder.tokenExpiryTolerance(auth_config['token_expiry_tolerance'])
213
+
214
+ auth_config_obj = auth_builder.build()
215
+ builder.withAuthConfig(auth_config_obj)
216
+
217
+ @classmethod
218
+ def for_system(cls, spark, *args, **kwargs) -> 'BulkExportClient':
219
+ """
220
+ Create a builder for a system-level export.
221
+
222
+ :param spark: The SparkSession instance
223
+ :param fhir_endpoint_url: The URL of the FHIR server to export from
224
+ :param output_dir: The directory to write the output files to
225
+ :param output_format: The format of the output data
226
+ :param since: Only include resources modified after this timestamp
227
+ :param types: List of FHIR resource types to include
228
+ :param elements: List of FHIR elements to include
229
+ :param include_associated_data: Pre-defined set of FHIR resources to include
230
+ :param type_filters: FHIR search queries to filter resources
231
+ :param output_extension: File extension for output files
232
+ :param timeout: Optional timeout duration in seconds
233
+ :param max_concurrent_downloads: Maximum number of concurrent downloads
234
+ :param auth_config: Optional authentication configuration dictionary
235
+ :return: A BulkExportClient configured for system-level export
236
+ """
237
+ builder, jvm = cls._create_builder(spark, lambda bc: bc.systemBuilder())
238
+ cls._configure_builder(jvm, builder, *args, **kwargs)
239
+ return cls(builder.build())
240
+
241
+ @classmethod
242
+ def for_group(cls, spark, fhir_endpoint_url: str, output_dir: str,
243
+ group_id: str, *args, **kwargs) -> 'BulkExportClient':
244
+ """
245
+ Create a builder for a group-level export.
246
+
247
+ :param spark: The SparkSession instance
248
+ :param fhir_endpoint_url: The URL of the FHIR server to export from
249
+ :param output_dir: The directory to write the output files to
250
+ :param group_id: The ID of the group to export
251
+ :param output_format: The format of the output data
252
+ :param since: Only include resources modified after this timestamp
253
+ :param types: List of FHIR resource types to include
254
+ :param elements: List of FHIR elements to include
255
+ :param include_associated_data: Pre-defined set of FHIR resources to include
256
+ :param type_filters: FHIR search queries to filter resources
257
+ :param output_extension: File extension for output files
258
+ :param timeout: Optional timeout duration in seconds
259
+ :param max_concurrent_downloads: Maximum number of concurrent downloads
260
+ :param auth_config: Optional authentication configuration dictionary
261
+ :return: A BulkExportClient configured for group-level export
262
+ """
263
+ # Pass group_id directly to groupBuilder
264
+ builder, jvm = cls._create_builder(spark, lambda bc: bc.groupBuilder(group_id))
265
+ cls._configure_builder(jvm, builder, fhir_endpoint_url, output_dir, *args, **kwargs)
266
+ return cls(builder.build())
267
+
268
+ @classmethod
269
+ def for_patient(cls, spark, fhir_endpoint_url: str, output_dir: str,
270
+ patients: Optional[List[str]] = None, *args, **kwargs) -> 'BulkExportClient':
271
+ """
272
+ Create a builder for a patient-level export.
273
+
274
+ :param spark: The SparkSession instance
275
+ :param fhir_endpoint_url: The URL of the FHIR server to export from
276
+ :param output_dir: The directory to write the output files to
277
+ :param patients: List of patient references to include
278
+ :param output_format: The format of the output data
279
+ :param since: Only include resources modified after this timestamp
280
+ :param types: List of FHIR resource types to include
281
+ :param elements: List of FHIR elements to include
282
+ :param include_associated_data: Pre-defined set of FHIR resources to include
283
+ :param type_filters: FHIR search queries to filter resources
284
+ :param output_extension: File extension for output files
285
+ :param timeout: Optional timeout duration in seconds
286
+ :param max_concurrent_downloads: Maximum number of concurrent downloads
287
+ :param auth_config: Optional authentication configuration dictionary
288
+ :return: A BulkExportClient configured for patient-level export
289
+ """
290
+ builder, jvm = cls._create_builder(spark, lambda bc: bc.patientBuilder())
291
+ if patients is not None:
292
+ for patient in patients:
293
+ ref = jvm.au.csiro.fhir.model.Reference.of(patient)
294
+ builder.withPatient(ref)
295
+ cls._configure_builder(jvm, builder, fhir_endpoint_url, output_dir, *args, **kwargs)
296
+ return cls(builder.build())
297
+
298
+ @classmethod
299
+ def _create_builder(cls,
300
+ spark: SparkSession,
301
+ factory_f: Callable[[JavaObject], JavaObject]) -> Tuple[
302
+ JavaObject, JVMView]:
303
+
304
+ jvm: JVMView = spark._jvm
305
+ client_class = jvm.au.csiro.fhir.export.BulkExportClient
306
+ builder: JavaObject = factory_f(client_class)
307
+ builder = builder.withFileStoreFactory(
308
+ jvm.au.csiro.filestore.hdfs.HdfsFileStoreFactory(spark._jsc.sc().hadoopConfiguration())
309
+ )
310
+ return (builder, jvm)
pathling/coding.py ADDED
@@ -0,0 +1,90 @@
1
+ # Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
2
+ # Organisation (CSIRO) ABN 41 687 119 230.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from typing import Optional
17
+
18
+ from pyspark.sql.functions import lit, struct
19
+
20
+ from pathling.functions import SNOMED_URI
21
+
22
+
23
+ class Coding:
24
+ """
25
+ A Coding represents a code in a code system.
26
+ See: https://hl7.org/fhir/R4/datatypes.html#Coding
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ system: str,
32
+ code: str,
33
+ version: Optional[str] = None,
34
+ display: Optional[str] = None,
35
+ user_selected: Optional[bool] = None,
36
+ ):
37
+ """
38
+ :param system: a URI that identifies the code system
39
+ :param code: the code
40
+ :param version: a URI that identifies the version of the code system
41
+ :param display: the display text for the Coding
42
+ :param user_selected: an indicator of whether the Coding was chosen directly by the user
43
+ """
44
+ self.system = system
45
+ self.code = code
46
+ self.version = version
47
+ self.display = display
48
+ self.user_selected = user_selected
49
+
50
+ def to_literal(self):
51
+ """
52
+ Converts a Coding into a Column that contains a Coding struct. The Coding
53
+ struct Column can be used as an input to terminology functions such as `member_of` and
54
+ `translate`.
55
+
56
+ :return: a Column containing a Coding struct
57
+ """
58
+ id_column = lit(None).alias("id")
59
+ system_column = lit(self.system).alias("system")
60
+ version_column = lit(self.version).alias("version")
61
+ code_column = lit(self.code).alias("code")
62
+ display_column = lit(self.display).alias("display")
63
+ user_selected_column = lit(self.user_selected).alias("userSelected")
64
+ return struct(
65
+ id_column,
66
+ system_column,
67
+ version_column,
68
+ code_column,
69
+ display_column,
70
+ user_selected_column,
71
+ )
72
+
73
+ @classmethod
74
+ def of_snomed(
75
+ cls,
76
+ code: str,
77
+ version: Optional[str] = None,
78
+ display: Optional[str] = None,
79
+ user_selected: Optional[bool] = None,
80
+ ) -> "Coding":
81
+ """
82
+ Creates a SNOMED Coding.
83
+
84
+ :param code: the code
85
+ :param version: a URI that identifies the version of the code system
86
+ :param display: the display text for the Coding
87
+ :param user_selected: an indicator of whether the Coding was chosen directly by the user
88
+ :return: a SNOMED coding with given arguments.
89
+ """
90
+ return Coding(SNOMED_URI, code, version, display, user_selected)