pathling 9.2.0__tar.gz → 9.2.0.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pathling-9.2.0 → pathling-9.2.0.dev0}/PKG-INFO +1 -1
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/bulk.py +12 -13
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/designation.py +3 -3
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/display.py +3 -3
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/encode_bundles.py +1 -1
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/encode_resources.py +1 -1
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/fhir_view.py +18 -22
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/member_of.py +4 -4
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/property_of.py +5 -5
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/subsumes.py +3 -3
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/translate.py +2 -2
- {pathling-9.2.0 → pathling-9.2.0.dev0}/pathling/__init__.py +10 -10
- {pathling-9.2.0 → pathling-9.2.0.dev0}/pathling/_version.py +2 -2
- {pathling-9.2.0 → pathling-9.2.0.dev0}/pathling/bulk.py +63 -90
- {pathling-9.2.0 → pathling-9.2.0.dev0}/pathling/coding.py +1 -1
- {pathling-9.2.0 → pathling-9.2.0.dev0}/pathling/context.py +7 -8
- {pathling-9.2.0 → pathling-9.2.0.dev0}/pathling/core.py +2 -2
- {pathling-9.2.0 → pathling-9.2.0.dev0}/pathling/datasink.py +13 -66
- {pathling-9.2.0 → pathling-9.2.0.dev0}/pathling/datasource.py +15 -17
- {pathling-9.2.0 → pathling-9.2.0.dev0}/pathling/fhir.py +1 -1
- {pathling-9.2.0 → pathling-9.2.0.dev0}/pathling/functions.py +1 -1
- {pathling-9.2.0 → pathling-9.2.0.dev0}/pathling/spark.py +6 -12
- {pathling-9.2.0 → pathling-9.2.0.dev0}/pathling/udfs.py +4 -4
- {pathling-9.2.0 → pathling-9.2.0.dev0}/pyproject.toml +0 -20
- {pathling-9.2.0 → pathling-9.2.0.dev0}/.gitignore +0 -0
- {pathling-9.2.0 → pathling-9.2.0.dev0}/LICENSE +0 -0
- {pathling-9.2.0 → pathling-9.2.0.dev0}/README.md +0 -0
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/data/bundles/Bennett146_Swaniawski813_704c9750-f6e6-473b-ee83-fbd48e07fe3f.json +0 -0
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/data/bundles/Dino214_Parisian75_40d82b80-b682-cd8b-da6d-396809878641.json +0 -0
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/data/csv/conditions.csv +0 -0
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/data/resources/Condition.ndjson +0 -0
- {pathling-9.2.0 → pathling-9.2.0.dev0}/examples/data/resources/Patient.ndjson +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
7
|
-
#
|
|
7
|
+
#
|
|
8
8
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
#
|
|
9
|
+
#
|
|
10
10
|
# Unless required by applicable law or agreed to in writing, software
|
|
11
11
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -49,10 +49,9 @@ def test_bulk_exports():
|
|
|
49
49
|
# Base parameters from the demo server
|
|
50
50
|
fhir_server = "https://bulk-data.smarthealthit.org/fhir"
|
|
51
51
|
output_base = os.path.join(tempfile.gettempdir(), "bulk_export_test")
|
|
52
|
-
|
|
52
|
+
|
|
53
53
|
if os.path.exists(output_base):
|
|
54
54
|
import shutil
|
|
55
|
-
|
|
56
55
|
shutil.rmtree(output_base)
|
|
57
56
|
os.makedirs(output_base)
|
|
58
57
|
|
|
@@ -76,8 +75,8 @@ def test_bulk_exports():
|
|
|
76
75
|
"use_smart": True,
|
|
77
76
|
"use_form_for_basic_auth": False,
|
|
78
77
|
"scope": "system/*.read",
|
|
79
|
-
"token_expiry_tolerance": 30
|
|
80
|
-
}
|
|
78
|
+
"token_expiry_tolerance": 30
|
|
79
|
+
}
|
|
81
80
|
)
|
|
82
81
|
print("System export completed successfully")
|
|
83
82
|
|
|
@@ -86,7 +85,7 @@ def test_bulk_exports():
|
|
|
86
85
|
pc.read.bulk(
|
|
87
86
|
fhir_endpoint_url=fhir_server,
|
|
88
87
|
output_dir=f"{output_base}/group_basic",
|
|
89
|
-
group_id="BMCHealthNet"
|
|
88
|
+
group_id="BMCHealthNet"
|
|
90
89
|
)
|
|
91
90
|
print("Group export completed successfully")
|
|
92
91
|
|
|
@@ -104,7 +103,7 @@ def test_bulk_exports():
|
|
|
104
103
|
type_filters=["Patient?status=active"],
|
|
105
104
|
output_extension="ndjson",
|
|
106
105
|
timeout=1800,
|
|
107
|
-
max_concurrent_downloads=8
|
|
106
|
+
max_concurrent_downloads=8
|
|
108
107
|
)
|
|
109
108
|
print("Group export completed successfully")
|
|
110
109
|
|
|
@@ -115,8 +114,8 @@ def test_bulk_exports():
|
|
|
115
114
|
output_dir=f"{output_base}/patient_basic",
|
|
116
115
|
patients=[
|
|
117
116
|
"Patient/58c297c4-d684-4677-8024-01131d93835e",
|
|
118
|
-
"Patient/118616a4-f0b2-411f-8050-39d5d27c738c"
|
|
119
|
-
]
|
|
117
|
+
"Patient/118616a4-f0b2-411f-8050-39d5d27c738c"
|
|
118
|
+
]
|
|
120
119
|
)
|
|
121
120
|
print("Patient export completed successfully")
|
|
122
121
|
|
|
@@ -128,7 +127,7 @@ def test_bulk_exports():
|
|
|
128
127
|
patients=[
|
|
129
128
|
"Patient/58c297c4-d684-4677-8024-01131d93835e",
|
|
130
129
|
"Patient/118616a4-f0b2-411f-8050-39d5d27c738c",
|
|
131
|
-
"Patient/21fba439-ca79-411f-a081-37a432a78f3a"
|
|
130
|
+
"Patient/21fba439-ca79-411f-a081-37a432a78f3a"
|
|
132
131
|
],
|
|
133
132
|
output_format="application/fhir+ndjson",
|
|
134
133
|
since=datetime(2020, 1, 1, tzinfo=timezone.utc),
|
|
@@ -138,7 +137,7 @@ def test_bulk_exports():
|
|
|
138
137
|
type_filters=["Observation?category=vital-signs"],
|
|
139
138
|
output_extension="ndjson",
|
|
140
139
|
timeout=2400,
|
|
141
|
-
max_concurrent_downloads=3
|
|
140
|
+
max_concurrent_downloads=3
|
|
142
141
|
)
|
|
143
142
|
print("Patient export completed successfully")
|
|
144
143
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -15,14 +15,14 @@
|
|
|
15
15
|
|
|
16
16
|
import os
|
|
17
17
|
|
|
18
|
-
from pathling import Coding,
|
|
18
|
+
from pathling import PathlingContext, Coding, to_snomed_coding, designation
|
|
19
19
|
|
|
20
20
|
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
21
21
|
|
|
22
22
|
pc = PathlingContext.create()
|
|
23
23
|
|
|
24
24
|
csv = pc.spark.read.options(header=True).csv(
|
|
25
|
-
f
|
|
25
|
+
f'file://{os.path.join(HERE, "data/csv/conditions.csv")}'
|
|
26
26
|
)
|
|
27
27
|
|
|
28
28
|
# Obtain display name for snomed codes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -15,14 +15,14 @@
|
|
|
15
15
|
|
|
16
16
|
import os
|
|
17
17
|
|
|
18
|
-
from pathling import PathlingContext,
|
|
18
|
+
from pathling import PathlingContext, to_snomed_coding, display
|
|
19
19
|
|
|
20
20
|
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
21
21
|
|
|
22
22
|
pc = PathlingContext.create()
|
|
23
23
|
|
|
24
24
|
csv = pc.spark.read.options(header=True).csv(
|
|
25
|
-
f
|
|
25
|
+
f'file://{os.path.join(HERE, "data/csv/conditions.csv")}'
|
|
26
26
|
)
|
|
27
27
|
|
|
28
28
|
# Obtain display name for snomed codes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
|
|
16
16
|
import os
|
|
17
|
-
|
|
18
17
|
from pathling import PathlingContext
|
|
19
18
|
|
|
20
19
|
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
@@ -25,38 +24,35 @@ pc = PathlingContext.create()
|
|
|
25
24
|
datasource = pc.read.ndjson(NDJSON_DIR)
|
|
26
25
|
|
|
27
26
|
view_ds = datasource.view(
|
|
28
|
-
resource=
|
|
27
|
+
resource='Patient',
|
|
29
28
|
select=[
|
|
30
29
|
{
|
|
31
|
-
|
|
32
|
-
{
|
|
33
|
-
{
|
|
34
|
-
{
|
|
35
|
-
|
|
36
|
-
"name": "phone_numbers",
|
|
37
|
-
"collection": True,
|
|
38
|
-
},
|
|
30
|
+
'column': [
|
|
31
|
+
{'path': 'id', 'name': 'id'},
|
|
32
|
+
{'path': 'gender', 'name': 'gender'},
|
|
33
|
+
{'path': "telecom.where(system='phone').value ", 'name': 'phone_numbers',
|
|
34
|
+
'collection': True},
|
|
39
35
|
]
|
|
40
36
|
},
|
|
41
37
|
{
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
{
|
|
45
|
-
{
|
|
38
|
+
'forEach': 'name',
|
|
39
|
+
'column': [
|
|
40
|
+
{'path': 'use', 'name': 'name_use'},
|
|
41
|
+
{'path': 'family', 'name': 'family_name'},
|
|
46
42
|
],
|
|
47
|
-
|
|
43
|
+
'select': [
|
|
48
44
|
{
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
{
|
|
45
|
+
'forEachOrNull': 'given',
|
|
46
|
+
'column': [
|
|
47
|
+
{'path': '$this', 'name': 'given_name'},
|
|
52
48
|
],
|
|
53
49
|
}
|
|
54
|
-
]
|
|
50
|
+
]
|
|
55
51
|
},
|
|
56
52
|
],
|
|
57
53
|
where=[
|
|
58
|
-
{
|
|
59
|
-
]
|
|
54
|
+
{'path': "gender = 'male'"},
|
|
55
|
+
]
|
|
60
56
|
)
|
|
61
57
|
|
|
62
58
|
view_ds.show()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -17,9 +17,9 @@ import os
|
|
|
17
17
|
|
|
18
18
|
from pathling import (
|
|
19
19
|
PathlingContext,
|
|
20
|
-
member_of,
|
|
21
|
-
to_ecl_value_set,
|
|
22
20
|
to_snomed_coding,
|
|
21
|
+
to_ecl_value_set,
|
|
22
|
+
member_of,
|
|
23
23
|
)
|
|
24
24
|
|
|
25
25
|
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
@@ -34,7 +34,7 @@ pc = PathlingContext.create(
|
|
|
34
34
|
pc.spark.sparkContext.setLogLevel("DEBUG")
|
|
35
35
|
|
|
36
36
|
csv = pc.spark.read.options(header=True).csv(
|
|
37
|
-
f
|
|
37
|
+
f'file://{os.path.join(HERE, "data/csv/conditions.csv")}'
|
|
38
38
|
)
|
|
39
39
|
|
|
40
40
|
VIRAL_INFECTION_ECL = """
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -17,10 +17,10 @@ import os
|
|
|
17
17
|
|
|
18
18
|
from pathling import (
|
|
19
19
|
PathlingContext,
|
|
20
|
-
PropertyType,
|
|
21
|
-
display,
|
|
22
|
-
property_of,
|
|
23
20
|
to_snomed_coding,
|
|
21
|
+
property_of,
|
|
22
|
+
display,
|
|
23
|
+
PropertyType,
|
|
24
24
|
)
|
|
25
25
|
|
|
26
26
|
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
@@ -28,7 +28,7 @@ HERE = os.path.abspath(os.path.dirname(__file__))
|
|
|
28
28
|
pc = PathlingContext.create()
|
|
29
29
|
|
|
30
30
|
csv = pc.spark.read.options(header=True).csv(
|
|
31
|
-
f
|
|
31
|
+
f'file://{os.path.join(HERE, "data/csv/conditions.csv")}'
|
|
32
32
|
)
|
|
33
33
|
|
|
34
34
|
# Get the parent codes for each code in the dataset.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -15,14 +15,14 @@
|
|
|
15
15
|
|
|
16
16
|
import os
|
|
17
17
|
|
|
18
|
-
from pathling import Coding,
|
|
18
|
+
from pathling import PathlingContext, Coding, to_coding, subsumes
|
|
19
19
|
|
|
20
20
|
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
21
21
|
|
|
22
22
|
pc = PathlingContext.create()
|
|
23
23
|
|
|
24
24
|
csv = pc.spark.read.options(header=True).csv(
|
|
25
|
-
f
|
|
25
|
+
f'file://{os.path.join(HERE, "data/csv/conditions.csv")}'
|
|
26
26
|
)
|
|
27
27
|
first_3 = csv.limit(3)
|
|
28
28
|
cross_join = first_3.selectExpr(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -24,7 +24,7 @@ HERE = os.path.abspath(os.path.dirname(__file__))
|
|
|
24
24
|
pc = PathlingContext.create()
|
|
25
25
|
|
|
26
26
|
csv = pc.spark.read.options(header=True).csv(
|
|
27
|
-
f
|
|
27
|
+
f'file://{os.path.join(HERE, "data/csv/conditions.csv")}'
|
|
28
28
|
)
|
|
29
29
|
|
|
30
30
|
# Translate codings to Read CTV3 using the map that ships with SNOMED CT.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -16,19 +16,19 @@
|
|
|
16
16
|
from .coding import Coding
|
|
17
17
|
from .context import PathlingContext, StorageType
|
|
18
18
|
from .core import Expression, VariableExpression
|
|
19
|
-
from .datasource import
|
|
19
|
+
from .datasource import DataSources, DataSource
|
|
20
20
|
from .fhir import MimeType, Version
|
|
21
|
-
from .functions import to_coding,
|
|
21
|
+
from .functions import to_coding, to_snomed_coding, to_ecl_value_set
|
|
22
22
|
from .udfs import (
|
|
23
|
-
Equivalence,
|
|
24
|
-
PropertyType,
|
|
25
|
-
designation,
|
|
26
|
-
display,
|
|
27
23
|
member_of,
|
|
28
|
-
property_of,
|
|
29
|
-
subsumed_by,
|
|
30
|
-
subsumes,
|
|
31
24
|
translate,
|
|
25
|
+
subsumes,
|
|
26
|
+
subsumed_by,
|
|
27
|
+
property_of,
|
|
28
|
+
display,
|
|
29
|
+
designation,
|
|
30
|
+
PropertyType,
|
|
31
|
+
Equivalence,
|
|
32
32
|
)
|
|
33
33
|
|
|
34
34
|
__all__ = [
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
# Auto generated from POM project version.
|
|
3
3
|
# Please do not modify.
|
|
4
4
|
#
|
|
5
|
-
__version__="9.2.0"
|
|
6
|
-
__java_version__="9.2.0"
|
|
5
|
+
__version__="9.2.0.dev0"
|
|
6
|
+
__java_version__="9.2.0-SNAPSHOT"
|
|
7
7
|
__scala_version__="2.13"
|
|
8
8
|
__delta_version__="4.0.0"
|
|
9
9
|
__hadoop_version__="3.4.1"
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
7
|
-
#
|
|
7
|
+
#
|
|
8
8
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
#
|
|
9
|
+
#
|
|
10
10
|
# Unless required by applicable law or agreed to in writing, software
|
|
11
11
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
from dataclasses import dataclass
|
|
17
17
|
from datetime import datetime, timezone
|
|
18
|
-
from typing import
|
|
18
|
+
from typing import List, Optional, Tuple, Callable
|
|
19
19
|
|
|
20
20
|
from py4j.java_gateway import JavaObject, JVMView
|
|
21
21
|
from pyspark.sql import SparkSession
|
|
@@ -26,7 +26,6 @@ class FileResult:
|
|
|
26
26
|
"""
|
|
27
27
|
Represents the result of a single file export operation.
|
|
28
28
|
"""
|
|
29
|
-
|
|
30
29
|
source: str
|
|
31
30
|
"""
|
|
32
31
|
The source URL of the exported file.
|
|
@@ -46,7 +45,6 @@ class ExportResult:
|
|
|
46
45
|
"""
|
|
47
46
|
Represents the result of a bulk export operation.
|
|
48
47
|
"""
|
|
49
|
-
|
|
50
48
|
transaction_time: datetime
|
|
51
49
|
"""
|
|
52
50
|
The time at which the transaction was processed at the server.
|
|
@@ -58,29 +56,30 @@ class ExportResult:
|
|
|
58
56
|
"""
|
|
59
57
|
|
|
60
58
|
@classmethod
|
|
61
|
-
def from_java(cls, java_result: JavaObject) ->
|
|
59
|
+
def from_java(cls, java_result: JavaObject) -> 'ExportResult':
|
|
62
60
|
"""
|
|
63
61
|
Create an ExportResult from a Java export result object.
|
|
64
|
-
|
|
62
|
+
|
|
65
63
|
:param java_result: The Java export result object
|
|
66
64
|
:return: A Python ExportResult object
|
|
67
65
|
"""
|
|
68
66
|
# Convert transaction time from Java Instant to Python datetime
|
|
69
67
|
transaction_time = datetime.fromtimestamp(
|
|
70
|
-
java_result.getTransactionTime().toEpochMilli() / 1000.0, tz=timezone.utc
|
|
71
|
-
)
|
|
68
|
+
java_result.getTransactionTime().toEpochMilli() / 1000.0, tz=timezone.utc)
|
|
72
69
|
|
|
73
70
|
# Convert file results
|
|
74
71
|
file_results = [
|
|
75
72
|
FileResult(
|
|
76
73
|
source=str(java_file_result.getSource()),
|
|
77
74
|
destination=str(java_file_result.getDestination()),
|
|
78
|
-
size=java_file_result.getSize()
|
|
79
|
-
)
|
|
75
|
+
size=java_file_result.getSize())
|
|
80
76
|
for java_file_result in java_result.getResults()
|
|
81
77
|
]
|
|
82
78
|
|
|
83
|
-
return cls(
|
|
79
|
+
return cls(
|
|
80
|
+
transaction_time=transaction_time,
|
|
81
|
+
results=file_results
|
|
82
|
+
)
|
|
84
83
|
|
|
85
84
|
|
|
86
85
|
class BulkExportClient:
|
|
@@ -91,7 +90,7 @@ class BulkExportClient:
|
|
|
91
90
|
def __init__(self, java_client):
|
|
92
91
|
"""
|
|
93
92
|
Create a new BulkExportClient that wraps a Java BulkExportClient.
|
|
94
|
-
|
|
93
|
+
|
|
95
94
|
:param java_client: The Java BulkExportClient instance to wrap
|
|
96
95
|
"""
|
|
97
96
|
self._java_client = java_client
|
|
@@ -99,33 +98,27 @@ class BulkExportClient:
|
|
|
99
98
|
def export(self) -> ExportResult:
|
|
100
99
|
"""
|
|
101
100
|
Export data from the FHIR server.
|
|
102
|
-
|
|
101
|
+
|
|
103
102
|
:return: The result of the export operation as a Python ExportResult object
|
|
104
103
|
"""
|
|
105
104
|
java_result = self._java_client.export()
|
|
106
105
|
return ExportResult.from_java(java_result)
|
|
107
106
|
|
|
108
107
|
@classmethod
|
|
109
|
-
def _configure_builder(
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
type_filters: Optional[List[str]] = None,
|
|
121
|
-
output_extension: str = "ndjson",
|
|
122
|
-
timeout: Optional[int] = None,
|
|
123
|
-
max_concurrent_downloads: int = 10,
|
|
124
|
-
auth_config: Optional[dict] = None,
|
|
125
|
-
):
|
|
108
|
+
def _configure_builder(cls, jvm, builder, fhir_endpoint_url: str, output_dir: str,
|
|
109
|
+
output_format: str = "application/fhir+ndjson",
|
|
110
|
+
since: Optional[datetime] = None,
|
|
111
|
+
types: Optional[List[str]] = None,
|
|
112
|
+
elements: Optional[List[str]] = None,
|
|
113
|
+
include_associated_data: Optional[List[str]] = None,
|
|
114
|
+
type_filters: Optional[List[str]] = None,
|
|
115
|
+
output_extension: str = "ndjson",
|
|
116
|
+
timeout: Optional[int] = None,
|
|
117
|
+
max_concurrent_downloads: int = 10,
|
|
118
|
+
auth_config: Optional[dict] = None):
|
|
126
119
|
"""
|
|
127
120
|
Configure common builder parameters.
|
|
128
|
-
|
|
121
|
+
|
|
129
122
|
:param jvm: The JVM instance
|
|
130
123
|
:param builder: The builder instance to configure
|
|
131
124
|
:param fhir_endpoint_url: The URL of the FHIR server
|
|
@@ -164,13 +157,11 @@ class BulkExportClient:
|
|
|
164
157
|
if since.tzinfo is None:
|
|
165
158
|
raise ValueError("datetime must include timezone information")
|
|
166
159
|
# Format with microsecond precision and timezone offset
|
|
167
|
-
instant_str = since.strftime(
|
|
168
|
-
:-3
|
|
169
|
-
] # Truncate to milliseconds
|
|
160
|
+
instant_str = since.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] # Truncate to milliseconds
|
|
170
161
|
if since.utcoffset() is None:
|
|
171
|
-
instant_str +=
|
|
162
|
+
instant_str += 'Z'
|
|
172
163
|
else:
|
|
173
|
-
offset = since.strftime(
|
|
164
|
+
offset = since.strftime('%z')
|
|
174
165
|
# Insert colon in timezone offset
|
|
175
166
|
instant_str += f"{offset[:3]}:{offset[3:]}"
|
|
176
167
|
java_instant = jvm.java.time.Instant.parse(instant_str)
|
|
@@ -201,33 +192,33 @@ class BulkExportClient:
|
|
|
201
192
|
auth_builder.tokenExpiryTolerance(120)
|
|
202
193
|
|
|
203
194
|
# Map Python config to Java builder methods
|
|
204
|
-
if
|
|
205
|
-
auth_builder.enabled(auth_config[
|
|
206
|
-
if
|
|
207
|
-
auth_builder.useSMART(auth_config[
|
|
208
|
-
if
|
|
209
|
-
auth_builder.tokenEndpoint(auth_config[
|
|
210
|
-
if
|
|
211
|
-
auth_builder.clientId(auth_config[
|
|
212
|
-
if
|
|
213
|
-
auth_builder.clientSecret(auth_config[
|
|
214
|
-
if
|
|
215
|
-
auth_builder.privateKeyJWK(auth_config[
|
|
216
|
-
if
|
|
217
|
-
auth_builder.useFormForBasicAuth(auth_config[
|
|
218
|
-
if
|
|
219
|
-
auth_builder.scope(auth_config[
|
|
220
|
-
if
|
|
221
|
-
auth_builder.tokenExpiryTolerance(auth_config[
|
|
195
|
+
if 'enabled' in auth_config:
|
|
196
|
+
auth_builder.enabled(auth_config['enabled'])
|
|
197
|
+
if 'use_smart' in auth_config:
|
|
198
|
+
auth_builder.useSMART(auth_config['use_smart'])
|
|
199
|
+
if 'token_endpoint' in auth_config:
|
|
200
|
+
auth_builder.tokenEndpoint(auth_config['token_endpoint'])
|
|
201
|
+
if 'client_id' in auth_config:
|
|
202
|
+
auth_builder.clientId(auth_config['client_id'])
|
|
203
|
+
if 'client_secret' in auth_config:
|
|
204
|
+
auth_builder.clientSecret(auth_config['client_secret'])
|
|
205
|
+
if 'private_key_jwk' in auth_config:
|
|
206
|
+
auth_builder.privateKeyJWK(auth_config['private_key_jwk'])
|
|
207
|
+
if 'use_form_for_basic_auth' in auth_config:
|
|
208
|
+
auth_builder.useFormForBasicAuth(auth_config['use_form_for_basic_auth'])
|
|
209
|
+
if 'scope' in auth_config:
|
|
210
|
+
auth_builder.scope(auth_config['scope'])
|
|
211
|
+
if 'token_expiry_tolerance' in auth_config:
|
|
212
|
+
auth_builder.tokenExpiryTolerance(auth_config['token_expiry_tolerance'])
|
|
222
213
|
|
|
223
214
|
auth_config_obj = auth_builder.build()
|
|
224
215
|
builder.withAuthConfig(auth_config_obj)
|
|
225
216
|
|
|
226
217
|
@classmethod
|
|
227
|
-
def for_system(cls, spark, *args, **kwargs) ->
|
|
218
|
+
def for_system(cls, spark, *args, **kwargs) -> 'BulkExportClient':
|
|
228
219
|
"""
|
|
229
220
|
Create a builder for a system-level export.
|
|
230
|
-
|
|
221
|
+
|
|
231
222
|
:param spark: The SparkSession instance
|
|
232
223
|
:param fhir_endpoint_url: The URL of the FHIR server to export from
|
|
233
224
|
:param output_dir: The directory to write the output files to
|
|
@@ -248,18 +239,11 @@ class BulkExportClient:
|
|
|
248
239
|
return cls(builder.build())
|
|
249
240
|
|
|
250
241
|
@classmethod
|
|
251
|
-
def for_group(
|
|
252
|
-
|
|
253
|
-
spark,
|
|
254
|
-
fhir_endpoint_url: str,
|
|
255
|
-
output_dir: str,
|
|
256
|
-
group_id: str,
|
|
257
|
-
*args,
|
|
258
|
-
**kwargs,
|
|
259
|
-
) -> "BulkExportClient":
|
|
242
|
+
def for_group(cls, spark, fhir_endpoint_url: str, output_dir: str,
|
|
243
|
+
group_id: str, *args, **kwargs) -> 'BulkExportClient':
|
|
260
244
|
"""
|
|
261
245
|
Create a builder for a group-level export.
|
|
262
|
-
|
|
246
|
+
|
|
263
247
|
:param spark: The SparkSession instance
|
|
264
248
|
:param fhir_endpoint_url: The URL of the FHIR server to export from
|
|
265
249
|
:param output_dir: The directory to write the output files to
|
|
@@ -278,24 +262,15 @@ class BulkExportClient:
|
|
|
278
262
|
"""
|
|
279
263
|
# Pass group_id directly to groupBuilder
|
|
280
264
|
builder, jvm = cls._create_builder(spark, lambda bc: bc.groupBuilder(group_id))
|
|
281
|
-
cls._configure_builder(
|
|
282
|
-
jvm, builder, fhir_endpoint_url, output_dir, *args, **kwargs
|
|
283
|
-
)
|
|
265
|
+
cls._configure_builder(jvm, builder, fhir_endpoint_url, output_dir, *args, **kwargs)
|
|
284
266
|
return cls(builder.build())
|
|
285
267
|
|
|
286
268
|
@classmethod
|
|
287
|
-
def for_patient(
|
|
288
|
-
|
|
289
|
-
spark,
|
|
290
|
-
fhir_endpoint_url: str,
|
|
291
|
-
output_dir: str,
|
|
292
|
-
patients: Optional[List[str]] = None,
|
|
293
|
-
*args,
|
|
294
|
-
**kwargs,
|
|
295
|
-
) -> "BulkExportClient":
|
|
269
|
+
def for_patient(cls, spark, fhir_endpoint_url: str, output_dir: str,
|
|
270
|
+
patients: Optional[List[str]] = None, *args, **kwargs) -> 'BulkExportClient':
|
|
296
271
|
"""
|
|
297
272
|
Create a builder for a patient-level export.
|
|
298
|
-
|
|
273
|
+
|
|
299
274
|
:param spark: The SparkSession instance
|
|
300
275
|
:param fhir_endpoint_url: The URL of the FHIR server to export from
|
|
301
276
|
:param output_dir: The directory to write the output files to
|
|
@@ -317,21 +292,19 @@ class BulkExportClient:
|
|
|
317
292
|
for patient in patients:
|
|
318
293
|
ref = jvm.au.csiro.fhir.model.Reference.of(patient)
|
|
319
294
|
builder.withPatient(ref)
|
|
320
|
-
cls._configure_builder(
|
|
321
|
-
jvm, builder, fhir_endpoint_url, output_dir, *args, **kwargs
|
|
322
|
-
)
|
|
295
|
+
cls._configure_builder(jvm, builder, fhir_endpoint_url, output_dir, *args, **kwargs)
|
|
323
296
|
return cls(builder.build())
|
|
324
297
|
|
|
325
298
|
@classmethod
|
|
326
|
-
def _create_builder(
|
|
327
|
-
|
|
328
|
-
|
|
299
|
+
def _create_builder(cls,
|
|
300
|
+
spark: SparkSession,
|
|
301
|
+
factory_f: Callable[[JavaObject], JavaObject]) -> Tuple[
|
|
302
|
+
JavaObject, JVMView]:
|
|
303
|
+
|
|
329
304
|
jvm: JVMView = spark._jvm
|
|
330
305
|
client_class = jvm.au.csiro.fhir.export.BulkExportClient
|
|
331
306
|
builder: JavaObject = factory_f(client_class)
|
|
332
307
|
builder = builder.withFileStoreFactory(
|
|
333
|
-
jvm.au.csiro.filestore.hdfs.HdfsFileStoreFactory(
|
|
334
|
-
spark._jsc.sc().hadoopConfiguration()
|
|
335
|
-
)
|
|
308
|
+
jvm.au.csiro.filestore.hdfs.HdfsFileStoreFactory(spark._jsc.sc().hadoopConfiguration())
|
|
336
309
|
)
|
|
337
310
|
return (builder, jvm)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -15,15 +15,15 @@
|
|
|
15
15
|
|
|
16
16
|
# noinspection PyPackageRequirements
|
|
17
17
|
|
|
18
|
-
from typing import TYPE_CHECKING, Optional, Sequence
|
|
19
|
-
|
|
20
18
|
from py4j.java_gateway import JavaObject
|
|
21
19
|
from pyspark.sql import DataFrame, SparkSession
|
|
20
|
+
from typing import Optional, Sequence, TYPE_CHECKING
|
|
22
21
|
|
|
23
22
|
from pathling._version import (
|
|
24
|
-
__delta_version__,
|
|
25
23
|
__java_version__,
|
|
26
24
|
__scala_version__,
|
|
25
|
+
__delta_version__,
|
|
26
|
+
__hadoop_version__,
|
|
27
27
|
)
|
|
28
28
|
from pathling.fhir import MimeType
|
|
29
29
|
|
|
@@ -193,7 +193,7 @@ class PathlingContext:
|
|
|
193
193
|
SparkSession.builder.config(
|
|
194
194
|
"spark.jars.packages",
|
|
195
195
|
f"au.csiro.pathling:library-runtime:{__java_version__},"
|
|
196
|
-
f"io.delta:delta-spark_{__scala_version__}:{__delta_version__},"
|
|
196
|
+
f"io.delta:delta-spark_{__scala_version__}:{__delta_version__},"
|
|
197
197
|
)
|
|
198
198
|
.config(
|
|
199
199
|
"spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension"
|
|
@@ -208,9 +208,7 @@ class PathlingContext:
|
|
|
208
208
|
if enable_remote_debugging:
|
|
209
209
|
suspend_option = "y" if debug_suspend else "n"
|
|
210
210
|
debug_options = f"-agentlib:jdwp=transport=dt_socket,server=y,suspend={suspend_option},address={debug_port}"
|
|
211
|
-
spark_builder = spark_builder.config(
|
|
212
|
-
"spark.driver.extraJavaOptions", debug_options
|
|
213
|
-
)
|
|
211
|
+
spark_builder = spark_builder.config("spark.driver.extraJavaOptions", debug_options)
|
|
214
212
|
|
|
215
213
|
return spark_builder.getOrCreate()
|
|
216
214
|
|
|
@@ -372,6 +370,7 @@ class PathlingContext:
|
|
|
372
370
|
)
|
|
373
371
|
)
|
|
374
372
|
|
|
373
|
+
|
|
375
374
|
@property
|
|
376
375
|
def read(self) -> "DataSources":
|
|
377
376
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
|
|
16
|
-
from typing import Any, Callable,
|
|
16
|
+
from typing import Any, Callable, Sequence, Tuple, Optional, Union
|
|
17
17
|
|
|
18
18
|
from py4j.java_collections import SetConverter
|
|
19
19
|
from py4j.java_gateway import JavaObject
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -13,57 +13,14 @@
|
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
|
|
16
|
-
from
|
|
17
|
-
from typing import TYPE_CHECKING, Callable, List, Optional
|
|
16
|
+
from typing import Callable, Optional
|
|
18
17
|
|
|
19
18
|
from pathling.core import SparkConversionsMixin, StringMapper
|
|
20
|
-
|
|
19
|
+
from typing import TYPE_CHECKING
|
|
21
20
|
if TYPE_CHECKING:
|
|
22
21
|
from pathling.datasource import DataSource
|
|
23
22
|
|
|
24
23
|
|
|
25
|
-
@dataclass
|
|
26
|
-
class FileInformation:
|
|
27
|
-
"""
|
|
28
|
-
Information about a file created by a write operation.
|
|
29
|
-
|
|
30
|
-
:param fhir_resource_type: The FHIR resource type code for this file.
|
|
31
|
-
:param absolute_url: The absolute URL or path to the file.
|
|
32
|
-
"""
|
|
33
|
-
|
|
34
|
-
fhir_resource_type: str
|
|
35
|
-
absolute_url: str
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
@dataclass
|
|
39
|
-
class WriteDetails:
|
|
40
|
-
"""
|
|
41
|
-
Details about files created or modified by a write operation.
|
|
42
|
-
|
|
43
|
-
:param file_infos: A list of file information objects describing each file written.
|
|
44
|
-
"""
|
|
45
|
-
|
|
46
|
-
file_infos: List[FileInformation]
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def _convert_write_details(java_result) -> WriteDetails:
|
|
50
|
-
"""
|
|
51
|
-
Convert a Java WriteDetails object to a Python WriteDetails dataclass.
|
|
52
|
-
|
|
53
|
-
:param java_result: The Java WriteDetails object from the library API.
|
|
54
|
-
:returns: A Python WriteDetails dataclass with the converted data.
|
|
55
|
-
"""
|
|
56
|
-
java_file_infos = java_result.fileInfos()
|
|
57
|
-
file_infos = [
|
|
58
|
-
FileInformation(
|
|
59
|
-
fhir_resource_type=fi.fhirResourceType(),
|
|
60
|
-
absolute_url=fi.absoluteUrl(),
|
|
61
|
-
)
|
|
62
|
-
for fi in java_file_infos
|
|
63
|
-
]
|
|
64
|
-
return WriteDetails(file_infos=file_infos)
|
|
65
|
-
|
|
66
|
-
|
|
67
24
|
class SaveMode:
|
|
68
25
|
"""
|
|
69
26
|
Constants that represent the different save modes.
|
|
@@ -100,7 +57,7 @@ class DataSinks(SparkConversionsMixin):
|
|
|
100
57
|
path: str,
|
|
101
58
|
save_mode: Optional[str] = SaveMode.ERROR,
|
|
102
59
|
file_name_mapper: Callable[[str], str] = None,
|
|
103
|
-
) ->
|
|
60
|
+
) -> None:
|
|
104
61
|
"""
|
|
105
62
|
Writes the data to a directory of NDJSON files. The files will be named using the resource
|
|
106
63
|
type and the ".ndjson" extension.
|
|
@@ -113,20 +70,16 @@ class DataSinks(SparkConversionsMixin):
|
|
|
113
70
|
- "error" will raise an error if the file already exists.
|
|
114
71
|
:param file_name_mapper: An optional function that can be used to customise the mapping of
|
|
115
72
|
the resource type to the file name.
|
|
116
|
-
:returns: Details about the files that were written.
|
|
117
73
|
"""
|
|
118
74
|
if file_name_mapper:
|
|
119
75
|
wrapped_mapper = StringMapper(
|
|
120
76
|
self.spark._jvm._gateway_client, file_name_mapper
|
|
121
77
|
)
|
|
122
|
-
|
|
78
|
+
self._datasinks.saveMode(save_mode).ndjson(path, wrapped_mapper)
|
|
123
79
|
else:
|
|
124
|
-
|
|
125
|
-
return _convert_write_details(result)
|
|
80
|
+
self._datasinks.saveMode(save_mode).ndjson(path)
|
|
126
81
|
|
|
127
|
-
def parquet(
|
|
128
|
-
self, path: str, save_mode: Optional[str] = SaveMode.ERROR
|
|
129
|
-
) -> WriteDetails:
|
|
82
|
+
def parquet(self, path: str, save_mode: Optional[str] = SaveMode.ERROR) -> None:
|
|
130
83
|
"""
|
|
131
84
|
Writes the data to a directory of Parquet files.
|
|
132
85
|
|
|
@@ -136,14 +89,12 @@ class DataSinks(SparkConversionsMixin):
|
|
|
136
89
|
- "append" will append the new data to the existing data.
|
|
137
90
|
- "ignore" will only save the data if the file does not already exist.
|
|
138
91
|
- "error" will raise an error if the file already exists.
|
|
139
|
-
:returns: Details about the files that were written.
|
|
140
92
|
"""
|
|
141
|
-
|
|
142
|
-
return _convert_write_details(result)
|
|
93
|
+
self._datasinks.saveMode(save_mode).parquet(path)
|
|
143
94
|
|
|
144
95
|
def delta(
|
|
145
96
|
self, path: str, save_mode: Optional[str] = SaveMode.OVERWRITE
|
|
146
|
-
) ->
|
|
97
|
+
) -> None:
|
|
147
98
|
"""
|
|
148
99
|
Writes the data to a directory of Delta files.
|
|
149
100
|
|
|
@@ -151,16 +102,14 @@ class DataSinks(SparkConversionsMixin):
|
|
|
151
102
|
:param save_mode: The save mode to use when writing the data - "overwrite" will
|
|
152
103
|
overwrite any existing data, "merge" will merge the new data with the existing data based
|
|
153
104
|
on resource ID.
|
|
154
|
-
:returns: Details about the files that were written.
|
|
155
105
|
"""
|
|
156
|
-
|
|
157
|
-
return _convert_write_details(result)
|
|
106
|
+
self._datasinks.saveMode(save_mode).delta(path)
|
|
158
107
|
|
|
159
108
|
def tables(
|
|
160
109
|
self,
|
|
161
110
|
schema: Optional[str] = None,
|
|
162
111
|
save_mode: Optional[str] = SaveMode.OVERWRITE,
|
|
163
|
-
) ->
|
|
112
|
+
) -> None:
|
|
164
113
|
"""
|
|
165
114
|
Writes the data to a set of tables in the Spark catalog.
|
|
166
115
|
|
|
@@ -168,10 +117,8 @@ class DataSinks(SparkConversionsMixin):
|
|
|
168
117
|
:param save_mode: The save mode to use when writing the data - "overwrite" will
|
|
169
118
|
overwrite any existing data, "merge" will merge the new data with the existing data based
|
|
170
119
|
on resource ID.
|
|
171
|
-
:returns: Details about the files that were written.
|
|
172
120
|
"""
|
|
173
121
|
if schema:
|
|
174
|
-
|
|
122
|
+
self._datasinks.saveMode(save_mode).tables(schema)
|
|
175
123
|
else:
|
|
176
|
-
|
|
177
|
-
return _convert_write_details(result)
|
|
124
|
+
self._datasinks.saveMode(save_mode).tables()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -14,15 +14,16 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
|
|
16
16
|
from datetime import datetime
|
|
17
|
-
from
|
|
18
|
-
from typing import
|
|
17
|
+
from typing import Dict, Sequence, Optional, Callable
|
|
18
|
+
from typing import List, TYPE_CHECKING
|
|
19
19
|
|
|
20
|
-
from
|
|
20
|
+
from json import dumps, loads
|
|
21
21
|
from py4j.java_gateway import JavaObject
|
|
22
|
+
from py4j.java_collections import SetConverter
|
|
22
23
|
from pyspark.sql import DataFrame
|
|
23
24
|
|
|
24
25
|
from pathling import PathlingContext
|
|
25
|
-
from pathling.core import
|
|
26
|
+
from pathling.core import StringToStringSetMapper, SparkConversionsMixin
|
|
26
27
|
from pathling.fhir import MimeType
|
|
27
28
|
from pathling.spark import Dfs
|
|
28
29
|
|
|
@@ -53,7 +54,7 @@ class DataSource(SparkConversionsMixin):
|
|
|
53
54
|
def resource_types(self):
|
|
54
55
|
"""
|
|
55
56
|
Returns a list of the resource types that are available in the data source.
|
|
56
|
-
|
|
57
|
+
|
|
57
58
|
:return: A list of strings representing the resource types.
|
|
58
59
|
"""
|
|
59
60
|
return list(self._jds.getResourceTypes())
|
|
@@ -65,7 +66,6 @@ class DataSource(SparkConversionsMixin):
|
|
|
65
66
|
"""
|
|
66
67
|
# Import here to avoid circular dependency
|
|
67
68
|
from pathling.datasink import DataSinks
|
|
68
|
-
|
|
69
69
|
return DataSinks(self)
|
|
70
70
|
|
|
71
71
|
def view(
|
|
@@ -240,14 +240,14 @@ class DataSources(SparkConversionsMixin):
|
|
|
240
240
|
type_filters: Optional[List[str]] = None,
|
|
241
241
|
timeout: Optional[int] = None,
|
|
242
242
|
max_concurrent_downloads: int = 10,
|
|
243
|
-
auth_config: Optional[Dict] = None
|
|
243
|
+
auth_config: Optional[Dict] = None
|
|
244
244
|
) -> DataSource:
|
|
245
245
|
"""
|
|
246
|
-
Creates a data source from a FHIR Bulk Data Access API endpoint.
|
|
246
|
+
Creates a data source from a FHIR Bulk Data Access API endpoint.
|
|
247
247
|
Currently only supports bulk export in the ndjson format.
|
|
248
|
-
|
|
248
|
+
|
|
249
249
|
:param fhir_endpoint_url: The URL of the FHIR server to export from
|
|
250
|
-
:param output_dir: The directory to write the output files to.
|
|
250
|
+
:param output_dir: The directory to write the output files to.
|
|
251
251
|
This should be a valid path in the Spark's filesystem.
|
|
252
252
|
If set to `None`, a temporary directory will be used instead.
|
|
253
253
|
:param overwrite: Whether to overwrite the output directory if it already exists. Defaults to True.
|
|
@@ -277,9 +277,7 @@ class DataSources(SparkConversionsMixin):
|
|
|
277
277
|
dfs = Dfs(self._pc.spark)
|
|
278
278
|
|
|
279
279
|
# If `output_dir` is not provided, create a temporary directory
|
|
280
|
-
output_dir = output_dir or dfs.get_temp_dir_path(
|
|
281
|
-
prefix="tmp-bulk-export", qualified=True
|
|
282
|
-
)
|
|
280
|
+
output_dir = output_dir or dfs.get_temp_dir_path(prefix="tmp-bulk-export", qualified=True)
|
|
283
281
|
# If `overwrite`, then ensure the output directory does not exist
|
|
284
282
|
if overwrite and dfs.exists(output_dir):
|
|
285
283
|
dfs.delete(output_dir, recursive=True)
|
|
@@ -303,7 +301,7 @@ class DataSources(SparkConversionsMixin):
|
|
|
303
301
|
output_extension=output_extension,
|
|
304
302
|
timeout=timeout,
|
|
305
303
|
max_concurrent_downloads=max_concurrent_downloads,
|
|
306
|
-
auth_config=auth_config
|
|
304
|
+
auth_config=auth_config
|
|
307
305
|
)
|
|
308
306
|
elif patients is not None:
|
|
309
307
|
client = BulkExportClient.for_patient(
|
|
@@ -320,7 +318,7 @@ class DataSources(SparkConversionsMixin):
|
|
|
320
318
|
output_extension=output_extension,
|
|
321
319
|
timeout=timeout,
|
|
322
320
|
max_concurrent_downloads=max_concurrent_downloads,
|
|
323
|
-
auth_config=auth_config
|
|
321
|
+
auth_config=auth_config
|
|
324
322
|
)
|
|
325
323
|
else:
|
|
326
324
|
client = BulkExportClient.for_system(
|
|
@@ -336,7 +334,7 @@ class DataSources(SparkConversionsMixin):
|
|
|
336
334
|
output_extension=output_extension,
|
|
337
335
|
timeout=timeout,
|
|
338
336
|
max_concurrent_downloads=max_concurrent_downloads,
|
|
339
|
-
auth_config=auth_config
|
|
337
|
+
auth_config=auth_config
|
|
340
338
|
)
|
|
341
339
|
|
|
342
340
|
# Perform the export
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -40,11 +40,11 @@ class Dfs:
|
|
|
40
40
|
def get_temp_dir_path(self, prefix: str = "tmp-app", qualified=True) -> str:
|
|
41
41
|
"""
|
|
42
42
|
Returns a unique path for a temporary directory in Spark's filesystem.
|
|
43
|
-
|
|
44
|
-
The path is constructed by appending a UUID to the base temporary directory,
|
|
43
|
+
|
|
44
|
+
The path is constructed by appending a UUID to the base temporary directory,
|
|
45
45
|
ensuring uniqueness for each call.
|
|
46
46
|
The directory itself is not created, only the path is returned.
|
|
47
|
-
|
|
47
|
+
|
|
48
48
|
:param prefix: String to insert between the base directory and the UUID (default: "tmp-app").
|
|
49
49
|
:param qualified: If True, returns a fully qualified Hadoop path; if False, returns a raw path string.
|
|
50
50
|
:return: String representing the unique temporary directory path.
|
|
@@ -54,14 +54,8 @@ class Dfs:
|
|
|
54
54
|
raise ValueError("`hadoop.tmp.dir` must be set in Hadoop configuration.")
|
|
55
55
|
uuid_suffix = str(uuid.uuid4())
|
|
56
56
|
base_tmp_path = self._jvm.org.apache.hadoop.fs.Path(base_tmp_dir)
|
|
57
|
-
tmp_path = self._jvm.org.apache.hadoop.fs.Path(
|
|
58
|
-
|
|
59
|
-
)
|
|
60
|
-
return (
|
|
61
|
-
self._fs.makeQualified(tmp_path).toString()
|
|
62
|
-
if qualified
|
|
63
|
-
else tmp_path.toString()
|
|
64
|
-
)
|
|
57
|
+
tmp_path = self._jvm.org.apache.hadoop.fs.Path(base_tmp_path, f"{prefix}-{uuid_suffix}")
|
|
58
|
+
return self._fs.makeQualified(tmp_path).toString() if qualified else tmp_path.toString()
|
|
65
59
|
|
|
66
60
|
def exists(self, path: str) -> bool:
|
|
67
61
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Copyright © 2018-2025 Commonwealth Scientific and Industrial Research
|
|
2
2
|
# Organisation (CSIRO) ABN 41 687 119 230.
|
|
3
|
-
#
|
|
3
|
+
#
|
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
5
|
# you may not use this file except in compliance with the License.
|
|
6
6
|
# You may obtain a copy of the License at
|
|
@@ -13,12 +13,12 @@
|
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
|
|
16
|
-
from typing import Any,
|
|
16
|
+
from typing import Any, Optional, Union, Collection
|
|
17
17
|
|
|
18
18
|
from py4j.java_gateway import JavaObject
|
|
19
19
|
from pyspark import SparkContext
|
|
20
|
-
from pyspark.sql.classic.column import _to_java_column
|
|
21
20
|
from pyspark.sql.column import Column
|
|
21
|
+
from pyspark.sql.classic.column import _to_java_column
|
|
22
22
|
from pyspark.sql.functions import lit
|
|
23
23
|
|
|
24
24
|
from pathling.coding import Coding
|
|
@@ -37,7 +37,7 @@ def _coding_to_java_column(coding: Optional[CodingArg]) -> JavaObject:
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
def _ensure_collection(
|
|
40
|
-
collection_or_value: Optional[Union[Any, Collection[Any]]]
|
|
40
|
+
collection_or_value: Optional[Union[Any, Collection[Any]]]
|
|
41
41
|
) -> Optional[Collection[Any]]:
|
|
42
42
|
return (
|
|
43
43
|
collection_or_value
|
|
@@ -54,7 +54,6 @@ dev = [
|
|
|
54
54
|
"build==1.2.1",
|
|
55
55
|
"pytest-cov==5.0.0",
|
|
56
56
|
"http-server-mock==1.7",
|
|
57
|
-
"ruff>=0.8.0",
|
|
58
57
|
]
|
|
59
58
|
|
|
60
59
|
[tool.hatch.version]
|
|
@@ -77,22 +76,3 @@ include = [
|
|
|
77
76
|
"/README.md",
|
|
78
77
|
"/LICENSE",
|
|
79
78
|
]
|
|
80
|
-
|
|
81
|
-
[tool.ruff]
|
|
82
|
-
target-version = "py39"
|
|
83
|
-
line-length = 88
|
|
84
|
-
|
|
85
|
-
[tool.ruff.lint]
|
|
86
|
-
select = [
|
|
87
|
-
"E", # pycodestyle errors
|
|
88
|
-
"W", # pycodestyle warnings
|
|
89
|
-
"F", # pyflakes
|
|
90
|
-
"I", # isort (import sorting)
|
|
91
|
-
]
|
|
92
|
-
ignore = [
|
|
93
|
-
"E501", # line too long - handled by formatter where possible
|
|
94
|
-
]
|
|
95
|
-
|
|
96
|
-
[tool.ruff.format]
|
|
97
|
-
quote-style = "double"
|
|
98
|
-
indent-style = "space"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|